In [10]:
from langchain_classic.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_classic.text_splitter import RecursiveCharacterTextSplitter

In [8]:
def load_pdf_files(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [9]:
%pwd

'e:\\chatbot\\medical-assistance-chatbot\\research'

In [10]:
extracted_data = load_pdf_files("../data")

In [11]:
from typing import List
from langchain.schema import Document

def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:
    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source": src},
            )
        )
    return minimal_docs

In [12]:
minimal_docs = filter_to_minimal_docs(extracted_data)
len(minimal_docs)

637

## Chunking 

In [13]:
def text_split(minimal_docs):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20,
        length_function=len,
    )
    texts_chunks = text_splitter.split_documents(minimal_docs)
    return texts_chunks

In [14]:
texts_chunks = text_split(minimal_docs)
len(texts_chunks)

5859

## Embeddings

In [12]:
from langchain_classic.embeddings import HuggingFaceEmbeddings

def download_embeddings():
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name,
    )
    return embeddings

In [13]:
embedding = download_embeddings()

  embeddings = HuggingFaceEmbeddings(


In [5]:
# vector = embedding.embed_query("This is a test")
# len(vector)

## Pinecone

In [14]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [15]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")


In [16]:
from pinecone import Pinecone
pinecone_client = Pinecone(api_key=PINECONE_API_KEY)

In [17]:
pinecone_client

<pinecone.pinecone.Pinecone at 0x1d5011ec980>

In [18]:
from pinecone import ServerlessSpec

index_name = "medical-assistance-chatbot"

if not pinecone_client.has_index(index_name):
    pinecone_client.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud = "aws",
            region = "us-east-1",
        ),
    )

index = pinecone_client.Index(index_name)

<pinecone.db_data.index.Index at 0x1ccd58042f0>

In [None]:
# from langchain_pinecone import PineconeVectorStore

# docsearch = PineconeVectorStore.from_documents(
#     documents = texts_chunks,
#     embedding=embedding,
#     index_name=index_name,
# )


In [19]:
## Load from existing index

from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_existing_index(
    embedding=embedding,
    index_name=index_name,
)

In [20]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [13]:
retriever_docs = retriever.invoke("What are the symptoms of diabetes?")

In [14]:
retriever_docs

[Document(id='b741ffff-5a44-4b39-8e54-5c743d8ec2fc', metadata={'source': '..\\data\\Medical_book.pdf'}, page_content='• Type I diabetes mellitus. Characterized by fatigue and\nan abnormally high level of glucose in the blood\n(hyperglycemia).\n• Amyotrophic lateral schlerosis. First signs are stum-\nbling and difficulty climbing stairs. Later, muscle\ncramps and twitching may be observed as well as\nweakness in the hands making fastening buttons or\nturning a key difficult. Speech may become slowed or\nslurred. There may also be difficluty swallowing. As\nrespiratory muscles atrophy, there is increased danger'),
 Document(id='74d234d5-d350-4bad-98d5-71a7ea0e99d2', metadata={'source': '..\\data\\Medical_book.pdf'}, page_content='begin to fall. A person with diabetes mellitus either does\nnot make enough insulin, or makes insulin that does not\nwork properly. The result is blood sugar that remains\nhigh, a condition called hyperglycemia.\nDiabetes must be diagnosed as early as possible. 

## LLM

In [21]:
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

In [22]:
# !pip install -q -U google-genai

# from google import genai

# # The client gets the API key from the environment variable `GEMINI_API_KEY`.
# client = genai.Client()

# response = client.models.generate_content(
#     model="gemini-2.5-flash", contents="Explain how AI works in a few words"
# )
# print(response.text)

In [23]:
# !pip install -q -U langchain-google-genai

from langchain_google_genai import ChatGoogleGenerativeAI


In [24]:
model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [None]:
# messages = [
#     (
#         "system",
#         "You are a helpful assistant that translates English to French. Translate the user sentence.",
#     ),
#     ("human", "I love programming."),
# ]
# ai_msg = model.invoke(messages)
# ai_msg

AIMessage(content="J'adore la programmation.", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': [], 'model_provider': 'google_genai'}, id='lc_run--019af891-0d90-7881-86d2-1d8aad662749-0', usage_metadata={'input_tokens': 21, 'output_tokens': 7, 'total_tokens': 28, 'input_token_details': {'cache_read': 0}})

In [27]:
# from langchain.chains import create_retrieval_chain
from langchain_classic.chains import create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [53]:
system_prompt = (
    "You are a medical assistant for question-answering with the patient."
    "\n\n"
    "Below is the RAG obtained context that might help:"
    "{context}"
    
)

In [54]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [55]:
# from langchain.chains.combine_documents import StuffDocumentsChain
# from langchain_community.document_combiners import StuffDocumentsChain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain


# question_answer_chain = StuffDocumentsChain(
#     llm=model,
#     prompt=prompt,
#     document_variable_name="context"
# )

question_answer_chain = create_stuff_documents_chain(model, prompt)

rag_chain = create_retrieval_chain(
    retriever, question_answer_chain
)

In [56]:
response = rag_chain.invoke({"input": "what is are the symptoms of diabetes? explain in detail"})
print(response["answer"])

Based on the provided information, the symptoms of diabetes include:

*   **Fatigue:** This is a characteristic symptom, particularly noted for Type I diabetes mellitus.
*   **Hyperglycemia:** This refers to an abnormally high level of glucose (sugar) in the blood. It's a core condition of diabetes, resulting from the body not making enough insulin or the insulin not working properly, causing blood sugar to remain high.


In [57]:
response = rag_chain.invoke({"input": "what is are the different types of diabetes? explain in detail"})
print(response["answer"])

Based on the information provided, the resources define **diabetes mellitus** as a disorder of carbohydrate metabolism caused by a combination of hereditary and environmental factors.

The text explains that a person with diabetes either:
*   Does not make enough insulin, OR
*   Makes insulin that does not work properly.

This leads to high blood sugar, a condition called **hyperglycemia**.

The provided context emphasizes the importance of early diagnosis, as untreated diabetes can damage or cause failure of the eyes, kidneys, nerves, heart, blood vessels, and other body organs. It also mentions that low blood sugar (**hypoglycemia**) can be discovered through blood sugar testing.

However, the provided text **does not detail the different types of diabetes** (such as Type 1, Type 2, gestational diabetes, etc.). It only provides a general definition and consequences of the condition.


In [58]:
response = rag_chain.invoke({"input": "hy my name is acne"})
print(response["answer"])

Hello Acne, it's nice to meet you. How can I help you today?


In [59]:
response = rag_chain.invoke({"input": "have you seen this movie 3 idiots which have child delivery scene, can you find out what was the problem mother faced while delivery?"})
print(response["answer"])

Based on the information provided, the problem the mother faced during delivery was a **breech delivery with a trapped head**.

In this situation:
*   The baby's head gets stuck, making delivery difficult.
*   Once the baby's body is born, the umbilical cord usually stops pulsating, which cuts off the oxygen supply from the mother to the baby.
*   Since the baby cannot yet breathe on its own, it is critical to deliver the head quickly.
*   If the head remains trapped, there is a high possibility of injury to the baby's neck or head, which could result in permanent handicaps or, in extreme cases, death if the baby cannot be delivered within a few minutes.
