<a href="https://colab.research.google.com/github/Sourabh92133/medicine_rag_system/blob/main/Medical_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount("/content/drive")
!ls"/content/drive/MyDrive/Medicine_database.csv"

In [None]:
!pip install gradio
!pip install langchain langchain_chroma chromadb scikit-learn plotly
!pip install langchain-groq
!pip install -U langchain_community

In [None]:
!pip install langchain_text_splitters

In [None]:
from langchain_community.document_loaders import CSVLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_core.documents import Document
from langchain_chroma import Chroma
import numpy as np
import plotly.graph_objects as go
from sklearn.manifold import TSNE

In [None]:
import os
import glob
import gradio as gr

In [None]:
loader=CSVLoader(file_path="/content/drive/MyDrive/Medicine_database.csv",source_column="product_manufactured",encoding="ISO-8859-1")
document=[]
lst_doc=loader.load()
print(lst_doc[0])
for doc in lst_doc:
  doc.metadata["manufacture"]=doc.metadata.get("source")
  if "source" in doc.metadata:
    del doc.metadata["source"]
  document.append(doc)

In [None]:
print(len(document))
print(type(document[0]))
document[0]

In [None]:
# chunking
textsplitter=CharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunk=textsplitter.split_documents(document)

In [None]:
print(len(chunk))
type(chunk)


In [None]:
chunk[0]

In [None]:
# creating embedding model
from langchain_community.embeddings import HuggingFaceEmbeddings
embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
db_name="vector_database"
if os.path.exists(db_name):
  Chroma(persist_directory=db_name,embedding_function=embedding).delete_collection()  # to delete vector store if exists

In [None]:
vector_store=Chroma.from_documents(embedding=embedding,documents=chunk,persist_directory=db_name)    # this will create a vector store

In [None]:
# visualization of 2 vector
vector=vector_store._collection
vec=vector.get(limit=2,include=["embeddings","documents","metadatas"])
print(vec)
print(len(vec["embeddings"][0]))

In [None]:
vectors=vector_store._collection
vector_all=vectors.get(include=["embeddings","documents","metadatas"])  # this will show dict of all vectors
print(type(vector_all))
vector_np=np.array(vector_all["embeddings"])        # we converted vectors_all dict into numpy array because mathematics tools can be apply only on numpy array like tsne
manufacture= [metadata["manufacture"] for metadata in vector_all["metadatas"]]
print(manufacture)

In [None]:
# let us create a FAISS database
!pip install faiss-cpu

In [None]:
from langchain_community.vectorstores import FAISS      # in faiss we only store faiss id corresponding to which vector is stored
vector_store_FAISS=FAISS.from_documents(documents=chunk,embedding=embedding)

In [None]:
# let us see FAISS vectorr
number=vector_store_FAISS.index.ntotal     # total number of vectors
dimension=vector_store_FAISS.index.d       # dimension of vectors
print(vector_store_FAISS.index.reconstruct(0))     # vector at index 0
print(number,dimension)
vector_of_faiss=[]
documents=[]
for i in range(number):
  doc_id=vector_store_FAISS.index_to_docstore_id[i]      # to print doc id of document stored corrresponding to Faiss id
  document=vector_store_FAISS.docstore.search(doc_id)
  documents.append(document)
  vector_of_faiss.append(vector_store_FAISS.index.reconstruct(i))
# print(documents[0])
print(type(vector_store_FAISS))
vectors_Faiss_np=np.array(vector_of_faiss)
print(type(vectors_Faiss_np))

In [None]:
# visualization of vectors in 2d of chroma db
tsne=TSNE(n_components=2,random_state=42)
reduced_vec_dimension=tsne.fit_transform(vector_np)
fig=go.Figure(data=go.Scatter(
                              x=reduced_vec_dimension[:,0],
                              y=reduced_vec_dimension[:,1],
                              mode="markers",
                              marker=dict(size=10,opacity=1,color="red"),
                              text=[f"manufacture_by: {t}, Info:{d[:100]}..." for t,d in zip(manufacture,vector_all["documents"])],
                              hoverinfo="text"
))
fig.update_layout(
    title="Chroma_db_vectors2d",
    xaxis_title="X",
    yaxis_title="Y",
    width=800,
    height=800,
    margin=dict(l=0,r=0,b=0,t=50)     # padding (in pixels ) between edges and graph area
)
fig.show()

In [None]:
# to look in 3d
tsne_3d=TSNE(n_components=3,random_state=42)
vec_dimension3d=tsne_3d.fit_transform(vector_np)
fig_new=go.Figure(data=go.Scatter3d(
    x=vec_dimension3d[:,0],
    y=vec_dimension3d[:,1],
    z=vec_dimension3d[:,2],
    mode="markers",
    marker=dict(size=10,opacity=1,color="red"),
    text=[f"manufactured_by: {t} , info:{d[:100]}..." for t,d in zip(manufacture,vector_all["documents"])],
    hoverinfo="text"

))
fig_new.update_layout(
    title="chroma_db_vectors in 3d",
    scene=dict(xaxis_title="X",yaxis_title="Y",zaxis_title="Z"),
    width=800,
    height=800,
    margin=dict(l=0,r=0,b=0,t=50)
)
fig_new.show()

In [None]:
# visualizion of vector in 2d of faiss
tsne=TSNE(n_components=2,random_state=42)
vec_dimension2d=tsne.fit_transform(vectors_Faiss_np)
fig=go.Figure(data=go.Scatter(
    x=reduced_vec_dimension[:,0],
    y=reduced_vec_dimension[:,1],
    mode="markers",
    marker=dict(size=10,opacity=1,color="red"),
    text=[f"manufactured_by: {t} , info:{d[:100]}..." for t, d in zip(manufacture,vector_all["documents"])],
    hoverinfo="text"
))
fig.update_layout(
    title="FAISS_vectors_visualization in 2d",
    xaxis_title="X",
    yaxis_title="Y",
    width=800,
    height=800,
    margin=dict(l=0,r=0,b=0,t=50)
)
fig.show()


In [None]:
import langchain
print(langchain.__version__)


In [None]:
!pip install langchain-classic

In [None]:
from langchain_classic.memory import ConversationBufferMemory
from langchain_classic.chains import ConversationalRetrievalChain
from langchain_groq import ChatGroq


In [None]:
from google.colab import userdata
os.environ["GROQ_API_KEY"]=userdata.get("GROQ_API_KEY")


In [None]:
llm =ChatGroq(model_name="openai/gpt-oss-120b",temperature=0.4)
# setting up memory to store chat
memory=ConversationBufferMemory(memory_key="chat_history",return_messages=True)      # memory_key is simply the variable name under which conversation history is stored and passed into the LLM prompt.
# setting up retriever that will retrieve related info and send to llm
retriever=vector_store.as_retriever()
#setting up chain
conversation_chain=ConversationalRetrievalChain.from_llm(llm=llm,retriever=retriever,memory=memory)


In [None]:
# BATCHING SUPPORT FOR MULTIPLE QUERIES
from concurrent.futures import ThreadPoolExecutor, as_completed
import time, random

# Create a no-memory chain for safe parallel batch queries
conversation_chain_no_mem = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=None
)

def call_chain_single_with_retry(chain, question, retries=3, base_delay=1.0):
    """Call chain with retry & exponential backoff."""
    def _call():
        return chain({"question": question})

    for attempt in range(1, retries+1):
        try:
            return _call()
        except Exception as e:
            if attempt == retries:
                raise
            sleep = base_delay * (2 ** (attempt-1)) + random.random()*0.2
            time.sleep(sleep)

def batch_queries(queries, chain, max_workers=3):
    """Run multiple queries in parallel."""
    results = [None]*len(queries)
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        futures = {ex.submit(call_chain_single_with_retry, chain, q): i
                   for i,q in enumerate(queries)}

        for fut in as_completed(futures):
            i = futures[fut]
            try:
                out = fut.result()
                ans = out.get("answer") or out.get("output_text") or str(out)
            except Exception as e:
                ans = f"ERROR: {e}"
            results[i] = {"question": queries[i], "answer": ans}
    return results


In [None]:
question="medicine for cold?"
result=conversation_chain.invoke({"question":question})
print(result["answer"])

In [None]:
def gradio_function(message, history):

    # Split by lines → detect multiquery batch
    queries = [q.strip() for q in message.split("\n") if q.strip()]

    # If more than 1 question → BATCH MODE
    if len(queries) > 1:
        results = batch_queries(queries, conversation_chain_no_mem, max_workers=3)

        final_output = ""
        for item in results:
            final_output += f"Q: {item['question']}\nA: {item['answer']}\n"
            final_output += "-"*40 + "\n"

        return final_output

    # Single question → use conversational chain with memory
    out = conversation_chain({"question": queries[0]})
    return out.get("answer") or str(out)

In [None]:
gr.ChatInterface(gradio_function).launch(share=True)