In [7]:
#to load documents from a folder 
from llama_index.core import SimpleDirectoryReader
data_dir = "./sample_files/"

docs = SimpleDirectoryReader(
        input_dir=data_dir
).load_data()

print('Number of pages:', len(docs))
print(docs)

print (f"Loaded {len(docs)} chunks from '{data_dir}'") #change

Number of pages: 3
[Document(id_='37bd3c49-466d-4628-a612-5a9ead4f1562', embedding=None, metadata={'file_path': 'c:\\Users\\Lateb\\OneDrive\\Desktop\\CODING\\PharmacyProblemAnalyzer-Gemini-main\\PharmacyProblemAnalyzer-Gemini-main\\sample_files\\sample1.txt', 'file_name': 'sample1.txt', 'file_type': 'text/plain', 'file_size': 229, 'creation_date': '2024-05-19', 'last_modified_date': '2024-05-19'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text="The issues with the pharmacy are as follows: 1. The cashier doesn't have an electronic billing machine so customer service is extremely slow. 2. The medicine is not labeled or arranged in order so searching for it is very tiring.", start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}'

In [2]:
#setting up variables

uri = "mongodb+srv://geminiuser:1234@cluster0.nurmebz.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"



In [38]:
#Api to embed + upload files onto mongodb
from flask import Flask, request, jsonify
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, ServiceContext, StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from transformers import AutoTokenizer, AutoModelForCausalLM
import pymongo
import os
import shutil

app = Flask(__name__)


DB_NAME = "langchain_demo"
COLLECTION_NAME = 'collection_of_text_blobs'
INDEX_NAME = 'Indexx'
mongodb_client = pymongo.MongoClient(uri)
db = mongodb_client[DB_NAME]
collection = db[COLLECTION_NAME]

print("Atlas client initialized")


embed_model = HuggingFaceEmbedding(model_name="./sentence-transformers")

vector = embed_model.get_text_embedding("Vector Search with MongoDB")
print(len(vector))


service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=None)

from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core import StorageContext

vector_store = MongoDBAtlasVectorSearch(mongodb_client = mongodb_client,
                                 db_name = DB_NAME, collection_name = COLLECTION_NAME,
                                 index_name  = INDEX_NAME)

storage_context = StorageContext.from_defaults(vector_store=vector_store)


@app.route('/upload', methods=['POST'])
def upload_documents():
    temp_dir = "./temp_files"
    os.makedirs(temp_dir, exist_ok=True)

    files = request.files.getlist("files")
    for file in files:
        file_path = os.path.join(temp_dir, file.filename)
        file.save(file_path)

    docs = SimpleDirectoryReader(input_dir=temp_dir).load_data()
    print(f"Loaded {len(docs)} chunks from uploaded files.")

    index = VectorStoreIndex.from_documents(docs, storage_context=storage_context, service_context=service_context)

    for doc in docs:
        embedding = embed_model.get_text_embedding(doc.text)
        collection.insert_one({"text": doc.text, "embedding": embedding})
    shutil.rmtree(temp_dir)

    return jsonify({"message": f"Successfully loaded {len(docs)} documents into MongoDB."}), 200

def generate_embedding(text):
    return embed_model.get_text_embedding(text)

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)


Atlas client initialized
768
LLM is explicitly disabled. Using MockLLM.
 * Serving Flask app '__main__'
 * Debug mode: off


  service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=None)
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.29.235:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [24/May/2024 02:27:25] "[33mGET / HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [24/May/2024 02:27:26] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:192.168.29.235 - - [24/May/2024 02:27:28] "[33mGET / HTTP/1.1[0m" 404 -
INFO:werkzeug:192.168.29.235 - - [24/May/2024 02:27:29] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


Loaded 1 chunks from uploaded files.


INFO:werkzeug:127.0.0.1 - - [24/May/2024 02:31:10] "POST /upload HTTP/1.1" 200 -


In [13]:
#Delete embedding files to add new context into the db
database = mongodb_client[DB_NAME]
collection = database[COLLECTION_NAME]

doc_count = collection.count_documents (filter = {})
print (f"Document count before delete : {doc_count:,}")

result = collection.delete_many(filter= {})
print (f"Deleted docs : {result.deleted_count}")

Document count before delete : 6
Deleted docs : 6


In [None]:
#setting up the tokenizer
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import Settings

Settings.llm = HuggingFaceLLM(
    context_window=2048,
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.1, "do_sample": False},
    tokenizer_name="tinyllama-tokenizer",
    model_name="tinyllama-model",
    tokenizer_kwargs={"max_length": 2048},
    model_kwargs={"torch_dtype": torch.float16}
)

In [39]:
#api to query
from flask import Flask, request, jsonify
import torch
from transformers import pipeline

import pymongo
from IPython.display import Markdown, clear_output, display

app = Flask(__name__)

@app.route('/query', methods=['POST'])
def query_model():
    query_data = request.json
    query = query_data.get("query")


    client = pymongo.MongoClient(uri)
    db = client.langchain_demo
    collection = db.collection_of_text_blobs

    def generate_embedding(quer):
        temp = embed_model.get_text_embedding(quer)
        return temp

    results = collection.aggregate([
        {
            "$vectorSearch": {
                "queryVector": generate_embedding(query),
                "path": "embedding",
                "numCandidates": 50,
                "limit": 1,
                "index": "RAGIndexing",
            }
        }
    ])

    context = ""
    for document in results:
        print(type(document))
        print(f'Text present: {document["text"]}\n')
        context += document["text"]

    print("Query worked")
    print(context)

    pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")

    print("Pipeline code worked")


    def prompt_tinyllama(prompt, system_prompt=""):
        messages = [
            {
                "role": "system",
                "content": system_prompt,
            },
            {"role": "user", "content": prompt},
        ]
        prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
        return outputs[0]["generated_text"].split("<|assistant|>")[1]


    prompt = f"With the following context- {context}\nAnswer the following query {query}"
    print("Querying: "+ prompt)
    system_prompt = "You are an expert in this field and always provide detailed and accurate answers"
    response = prompt_tinyllama(prompt, system_prompt)
    return jsonify({"response": response})

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.29.235:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
Loading checkpoint shards: 100%|██████████| 3/3 [00:20<00:00,  6.68s/it]


Model and tokenizer loaded successfully.
<class 'dict'>
Text present: Long Wait Times: Understaffing: Insufficient pharmacy staff can lead to long waiting times for patients seeking medication or consultation. Repetitive Tasks: Manual processes and excessive paperwork can slow down pharmacists and technicians. Inefficient Workflow: Poorly designed workflow can create bottlenecks and hinder timely service.

Long Wait Times: Understaffing: Insufficient pharmacy staff can lead to long waiting times for patients seeking medication or consultation. Repetitive Tasks: Manual processes and excessive paperwork can slow down pharmacists and technicians. Inefficient Workflow: Poorly designed workflow can create bottlenecks and hinder timely service.


INFO:werkzeug:127.0.0.1 - - [24/May/2024 02:44:16] "POST /query HTTP/1.1" 200 -


In [8]:
#code to process queries and return answers


import torch
from transformers import pipeline

client = pymongo.MongoClient(uri)
db = client.langchain_demo
collection = db.collection_of_text_blobs

query = "what are some problems with the pharmacy? Give me solutions to deal with it"

def generate_embedding(quer):
    temp = embed_model.get_text_embedding(quer)
    return temp

results = collection.aggregate([
    {
        "$vectorSearch": {
            "queryVector": generate_embedding(query),
            "path": "embedding",
            "numCandidates": 50,
            "limit": 1,
            "index": "RAGIndexing",
        }
    }
])

context = ""
for document in results:
    print(type(document))
    print(f'Text present: {document["text"]}\n')
    context += document["text"]

print("Query worked")
print(context)

pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")

print("Pipeline code worked")


def prompt_tinyllama(prompt, system_prompt=""):
    messages = [
        {
            "role": "system",
            "content": system_prompt,
        },
        {"role": "user", "content": prompt},
    ]
    prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
    return outputs[0]["generated_text"].split("<|assistant|>")[1]


prompt = f"With the following context- {context}\nAnswer the following query {query}"
print("Querying: "+ prompt)
system_prompt = "You are an expert in this field and always provide detailed and accurate answers"
response = prompt_tinyllama(prompt, system_prompt)
print(response)





<class 'dict'>
Text present: Long Wait Times: Understaffing: Insufficient pharmacy staff can lead to long waiting times for patients seeking medication or consultation. Repetitive Tasks: Manual processes and excessive paperwork can slow down pharmacists and technicians. Inefficient Workflow: Poorly designed workflow can create bottlenecks and hinder timely service.

Query worked
Long Wait Times: Understaffing: Insufficient pharmacy staff can lead to long waiting times for patients seeking medication or consultation. Repetitive Tasks: Manual processes and excessive paperwork can slow down pharmacists and technicians. Inefficient Workflow: Poorly designed workflow can create bottlenecks and hinder timely service.
Pipeline code worked
Querying: With the following context- Long Wait Times: Understaffing: Insufficient pharmacy staff can lead to long waiting times for patients seeking medication or consultation. Repetitive Tasks: Manual processes and excessive paperwork can slow down pharmac