In [2]:
%pwd

'd:\\Projects\\Medical-Assistant-Chatbot-app\\research'

In [3]:
import os
os.chdir("../")

In [4]:
%pwd

'd:\\Projects\\Medical-Assistant-Chatbot-app'

In [8]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
def load_pdf(data):
    loader= DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    documents=loader.load()
    return documents

In [12]:
data=load_pdf("Data")

In [15]:
from typing import List
from langchain.schema import Document

def filter_doc(docs: List[Document]) -> List[Document]:
    """
    Filters a list of LangChain documents to retain only 
    page_content and the 'source' field from metadata.
    """
    filtered_docs = []
    
    for doc in docs:
        # Extract the source, defaulting to "Unknown" if the key doesn't exist
        source_info = doc.metadata.get("source")
        
        # Create a new Document object with restricted metadata
        new_doc = Document(
            page_content=doc.page_content,
            metadata={"source": source_info}
        )
        filtered_docs.append(new_doc)
        
    return filtered_docs
    

In [16]:
filtered_documents = filter_doc(data)

In [20]:
len(filtered_documents)

4505

In [21]:
def split_documents(documents):
    text_splitter= RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        separators=["\n\n", "\n", " ", ""]
    )
    chunks=text_splitter.split_documents(documents)
    return chunks

In [23]:
text_chunks=split_documents(filtered_documents)
len(text_chunks)

41267

In [29]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_embedding(model_name: str):
    model=model_name
    embeddings=HuggingFaceEmbeddings(
        model_name=model,
    )
    return embeddings

    

In [30]:
embeddings=download_embedding("sentence-transformers/all-MiniLM-L6-v2")

In [32]:
len(embeddings.embed_query("Hi I am Ankit"))

384

In [39]:
from dotenv import load_dotenv
load_dotenv()

True

In [40]:
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY=os.getenv("GOOGLE_API_KEY")

os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [41]:
from pinecone import Pinecone
pinecone_api_key=PINECONE_API_KEY
pc= Pinecone(api_key=pinecone_api_key)

In [45]:
from pinecone import ServerlessSpec

index_name="medical-assistant-chatbot"

if not pc.has_index(index_name):
    pc.create_index(
        name= index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud='aws', region='us-east-1')
    )

index=pc.Index(index_name)

In [47]:
from langchain_pinecone import PineconeVectorStore

vector_store= PineconeVectorStore.from_documents(
    text_chunks,
    embeddings,
    index_name=index_name
)

In [48]:
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={"k":5})

In [49]:
retriever.invoke("How to treat Acne?")

[Document(id='74ad7284-5383-42be-ab90-7426c4e9afdc', metadata={'source': 'Data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition-staibabussalamsula.ac_.id_.pdf'}, page_content='Alternative treatment\nAlternative treatments for acne focus on proper\ncleansing to keep the skin oil-free; eating a well-\nbalanced diet high in fiber, zinc, and raw foods; and\navoiding alcohol, dairy products, smoking,caffeine,\nsugar, processed foods, and foods high in iodine,\nsuch as salt. Supplementation with herbs such as\nburdock root (Arctium lappa ), red clover (Trifolium\npratense) ,a n dm i l kt h i s t l e(Silybum marianum ), and\nwith nutrients such as essential fatty acids, vitamin B'),
 Document(id='025b6af5-4a0f-4aac-a347-8204fb1966dc', metadata={'source': 'Data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition-staibabussalamsula.ac_.id_.pdf'}, page_content='complex, zinc, vitamin A, and chromium is also\nrecommended. Chinese herbal remedies used for\nacne include cnidium seed (Cnidium monnieri )a n

In [54]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=1.0,  # Gemini 3.0+ defaults to 1.0
    max_tokens=None,
    
)

In [56]:
result=model.invoke("Explain the theory of relativity in 2 lines")
result.content

"1. Space and time are not absolute but relative, intertwining and changing based on an observer's motion and speed.\n2. Gravity is not a force, but the curvature of this four-dimensional spacetime caused by mass and energy."

In [77]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
"""
You are a medical information assistant.

You will receive:
- Retrieved medical context (top 5 results from a vector database)
- A user medical question

Your task:
- Answer the question using ONLY the provided context.
- Be concise, factual, and easy to understand.
- Do NOT add information that is not present in the context.
- If the context is insufficient, say so clearly.

Safety rules:
- Do NOT provide diagnosis, prescriptions, or personalized medical advice.
- If diagnosis or treatment is implied, include:
  "For diagnosis or treatment, consult a qualified healthcare professional."
- If the question suggests a medical emergency, advise immediate professional help.

Output rules:
- 3â€“5 sentences maximum
- Plain medical language
- Single paragraph
- No mention of sources, vector databases, or retrieval process
""")

In [78]:
medical_rag_chat_prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        system_prompt
    ),
    (
        "human",
        """
Context:
{context}

Question:
{input}
"""
    )
])

In [79]:
question_answering_chain = create_stuff_documents_chain(model, medical_rag_chat_prompt)
main_chain= create_retrieval_chain(retriever, question_answering_chain)

In [80]:
response= main_chain.invoke({"input": "Give me prescription for common cold?"})
print(response['answer'])

The provided context does not offer prescriptions for the common cold, as there is no cure and treatment focuses on symptom relief. It mentions over-the-counter medications such as aspirin or NSAIDs for pain, decongestants for stuffiness, and combination remedies containing antihistamines, decongestants, ache relievers like acetaminophen or ibuprofen, and cough suppressants. Aspirin should not be given to children with a cold due to the risk of Reye's syndrome, and infants should not be given over-the-counter remedies without consulting a doctor. For diagnosis or treatment, consult a qualified healthcare professional.
