# Import Libraries


In [50]:
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore  
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from dotenv import load_dotenv
import os
load_dotenv()

True

# Extract text from PDF files

In [4]:

def load_pdf_files(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    
    documents = loader.load()
    return documents

In [5]:
extracted_data=load_pdf_files("../data")

In [6]:
extracted_data[10]

Document(metadata={'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'creator': 'PyPDF', 'creationdate': '2004-12-18T17:00:02-05:00', 'moddate': '2004-12-18T16:15:31-06:00', 'source': '..\\data\\Medical_book.pdf', 'total_pages': 637, 'page': 10, 'page_label': '11'}, page_content='Rhonda Cloos, R.N.\nMedical Writer\nAustin, TX\nGloria Cooksey, C.N.E\nMedical Writer\nSacramento, CA\nAmy Cooper, M.A., M.S.I.\nMedical Writer\nVermillion, SD\nDavid A. Cramer, M.D.\nMedical Writer\nChicago, IL\nEsther Csapo Rastega, R.N., B.S.N.\nMedical Writer\nHolbrook, MA\nArnold Cua, M.D.\nPhysician\nBrooklyn, NY\nTish Davidson, A.M.\nMedical Writer\nFremont, California\nDominic De Bellis, Ph.D.\nMedical Writer/Editor\nMahopac, NY\nLori De Milto\nMedical Writer\nSicklerville, NJ\nRobert S. Dinsmoor\nMedical Writer\nSouth Hamilton, MA\nStephanie Dionne, B.S.\nMedical Writer\nAnn Arbor, MI\nMartin W. Dodge, Ph.D.\nTechnical Writer/Editor\nCentinela Hospital and Medical\nCenter\nInglewood, CA\nDavid Doermann\nMedical

In [7]:
len(extracted_data)

637

# filter_to_minimal_documents

In [8]:
from typing import List
from langchain.schema import Document
def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    """
    Given a list of Document objects, return a new list of Document objects
    containing only 'source' in metadata and the original page_content.
    """
    minimal_docs: list[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src},
            )
        )
    
    return minimal_docs

In [9]:
minimal_docs=filter_to_minimal_docs(extracted_data)


# Split the documents into smaller chunks

In [10]:

def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20
    )
    texts_chunks= text_splitter.split_documents(minimal_docs)
    return texts_chunks

In [11]:
texts_chunks = text_split(minimal_docs)
print(f"Number of chunks:{len(texts_chunks)}")

Number of chunks:5859


# Vector Embeddings

In [12]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings


In [13]:
embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [14]:
query_text = "What is a common allergy?"
embedding_vector = embeddings.embed_query(query_text)

In [15]:
print("Sample of the embedding vector (first 20 numbers):")
print(embedding_vector[:20])

Sample of the embedding vector (first 20 numbers):
[0.04702260345220566, -0.006634933408349752, -0.01139666885137558, -0.031360842287540436, 0.033007990568876266, 0.03826579824090004, 0.07354530692100525, 0.08192341774702072, -0.08451011031866074, 0.027469314634799957, 0.0592004731297493, -0.06414079666137695, -0.026036029681563377, 0.05833462253212929, -0.04474414139986038, 0.06623373180627823, -0.011161021888256073, -0.023217888548970222, 0.014017648063600063, -0.05721267685294151]


# Storing Embeddings in Pinecone

In [51]:

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [27]:
index_name = "medical-chatbot"

In [None]:
vector_store = PineconeVectorStore.from_documents(
    texts_chunks,
    embeddings,
    index_name=index_name
)

In [28]:
print("Connecting to existing index in Pinecone...")
vector_store = PineconeVectorStore.from_existing_index(
    index_name,
    embeddings
)
print("Connection successful!")

Connecting to existing index in Pinecone...
Connection successful!


# Similarity Search & Retrieval

In [20]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [21]:
retrieved_docs = retriever.invoke("What is Acne?")
retrieved_docs

[Document(id='1811b710-fbe5-43b2-8394-d83e0536b4c5', metadata={'source': '..\\data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='d4a1434f-3129-432c-a390-1e9117f54591', metadata={'source': '..\\data\\Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25\nAcne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceous\nglands become inflamed. (Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(id='ea02179f-578f-49a5-b56d-a4bcaa38bdc4', metadata={'source': '..\\data\\Medical_book.pdf'}, page_content='Acidosis see Respiratory acidosis; Renal\ntubular acidosis; Metabolic acidosis\nAcne\nDefinition\nAcne is a common skin disease characterized by\npimples on the face, chest, and back. It occurs when the\npores of the skin becom

In [22]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [52]:
from langchain_groq import ChatGroq


# Initialize the ChatGroq LLM with the correct model name and your key
llm = ChatGroq(
    model_name="gemma2-9b-it", 
    api_key=GROQ_API_KEY,
    temperature=0  
)

In [62]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
system_prompt = (
    "You are an Medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)



prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
    ]
)

In [64]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableWithMessageHistory
from langchain_core.output_parsers import StrOutputParser


# 2. Helper function to format the retrieved documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# 3. Build the RAG Chain
# This chain combines the retriever, document formatter, prompt, and LLM
rag_chain = (
    RunnablePassthrough.assign(
        context=lambda x: format_docs(retriever.get_relevant_documents(x["input"]))
    )
    | prompt
    | llm
    | StrOutputParser()
)

print("✅ RAG chain built successfully!")

✅ RAG chain built successfully!


In [65]:
from langchain.memory import ChatMessageHistory
session_history = ChatMessageHistory()

# This is the new, memory-aware chain
chain_with_history = RunnableWithMessageHistory(
    rag_chain,
    lambda session_id: session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)



In [70]:
user_query = "What are the symptoms of Acne?"

config = {"configurable": {"session_id": "user123"}}
final_answer = chain_with_history.invoke(
    {"input": user_query},
    config=config
)
print(final_answer)

Acne symptoms include blemishes, which may be inflamed or not. These blemishes can be deep or superficial and may cause scarring or skin discoloration.  



In [71]:
query = "What are the symptoms of an allergy?"
final_answer = chain_with_history.invoke(
    {"input": user_query},
    config=config
)
print(final_answer)

Acne symptoms include blemishes, which may be inflamed or not. These blemishes can be deep or superficial and may cause scarring or skin discoloration.  



In [73]:

query ="what is Acromegaly and gigantism?"
final_answer = chain_with_history.invoke(
    {"input": user_query},
    config=config
)
print(final_answer)

Acne symptoms include blemishes, which may be inflamed or not. They can be deep or superficial and may cause scarring or skin discoloration.  



In [56]:
print("\n✅ RAG chain built successfully! The chatbot is ready.")


✅ RAG chain built successfully! The chatbot is ready.


In [75]:
while True:
    user_query = input("\nAsk a medical question: ")
    print('User_Question:',user_query)
    if user_query.lower() in ["exit", "quit"]:
        print("Exiting chatbot. Goodbye!")
        break

    if not user_query.strip():
        continue

    # Invoke the new chain. It requires a 'config' to specify the session.
    config = {"configurable": {"session_id": "user123"}}
    final_answer = chain_with_history.invoke(
        {"input": user_query},
        config=config
    )
    
    print("\n--- Answer ---")
    print(final_answer)
    print("----------------")


User_Question: why do we get fever

--- Answer ---
Fever is your body's natural defense mechanism against infection or illness. 

When you get sick, your immune system releases chemicals that raise your body temperature. This helps to kill off invading germs and speed up your body's healing process. 

----------------
User_Question: what are the symptoms of it

--- Answer ---
Common symptoms of fever include:

*  Feeling hot to the touch
*  Chills
*  Sweating
*  Headache
*  Fatigue
*  Loss of appetite 




----------------
User_Question: how to cure

--- Answer ---
It's important to understand that fever itself isn't a disease, but a symptom.  

To "cure" a fever, you need to treat the underlying cause. This could involve:

* **Rest:** Give your body time to heal.
* **Fluids:** Stay hydrated by drinking plenty of water, broth, or electrolyte drinks.
* **Over-the-counter medications:** Acetaminophen (Tylenol) or ibuprofen (Advil, Motrin) can help reduce fever and discomfort.
* **Treatin