In [1]:
import os
os.chdir("../")

In [1]:
from langchain_ollama import OllamaLLM
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

In [2]:
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

In [5]:
#Extract Data From the PDF File
def load_pdf_file(data):
    loader= DirectoryLoader(data,
                            glob="*.pdf",
                            loader_cls=PyPDFLoader)

    documents=loader.load()

    return documents

In [6]:
extracted_data=load_pdf_file(data='data/')

In [6]:
extracted_data

[Document(metadata={'source': 'data\\11. Cancer Prevention and Control in India Author Cherian Varghese.pdf', 'page': 0}, page_content='50 Years of Cancer Control in India\n48\nCANCER PREVENTION AND CONTROL IN INDIA\nCherian Varghese\nNon-communicable diseases including cancer are emerging as\nmajor public health problems in India. These diseases are lifestyle\nrelated, have a long latent period and needs specialised infrastructure\nand human resource for treatment. India still has a high burden of\npreventable communicable diseases and will offer competition for the\nresource allocation.\nThe risk factors of the major non-communicable diseases\n(Diabetes Mellitus, Cardiovascular Diseases, Diabetes, and many types\nof Cancer) are tobacco, dietary habits, inadequate physical activity\nand alcohol consumption. This offers the prospect for integrated\nprimary prevention strategies.\nMagnitude of the problem\nPopulation based cancer registries within the National Cancer Registry\nProgramme

In [8]:
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [9]:
text_chunks=text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 2219


In [10]:
text_chunks

[Document(metadata={'source': 'data\\11. Cancer Prevention and Control in India Author Cherian Varghese.pdf', 'page': 0}, page_content='50 Years of Cancer Control in India\n48\nCANCER PREVENTION AND CONTROL IN INDIA\nCherian Varghese\nNon-communicable diseases including cancer are emerging as\nmajor public health problems in India. These diseases are lifestyle\nrelated, have a long latent period and needs specialised infrastructure\nand human resource for treatment. India still has a high burden of\npreventable communicable diseases and will offer competition for the\nresource allocation.\nThe risk factors of the major non-communicable diseases\n(Diabetes Mellitus, Cardiovascular Diseases, Diabetes, and many types\nof Cancer) are tobacco, dietary habits, inadequate physical activity\nand alcohol consumption. This offers the prospect for integrated\nprimary prevention strategies.\nMagnitude of the problem\nPopulation based cancer registries within the National Cancer Registry\nProgramme

In [8]:
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


NOTE: embedding dimension = 384

In [11]:
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [12]:
vectordb = Chroma.from_documents(text_chunks, embedding=embeddings, persist_directory='./chroma')

In [9]:
# Load vector database from persisted directory
retriever = Chroma(
    persist_directory="./chroma",
    embedding_function=embeddings
).as_retriever(search_type="similarity", k=3)

In [19]:
llm = OllamaLLM(model="hf.co/sathvik123/llama3-ChatDoc")

In [40]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory

We make 2 calls to the llm 
- First to contextualize the prompt with the chat history using `create_history_aware_retriever`
- Second to actually answer the question with rag data using `create_stuff_documents_chain` and `create_retrieval_chain`

In [27]:
### Contextualize question ###
contextualize_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [28]:
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)

In [30]:
sys_prompt = """You are a medical professional. \
Use the following pieces of retrieved context to answer the question. \
It should consist of paragraph and conversational aspect rather than just a summary. \
If you don't know the answer, just say that you don't know. \

{context}"""

In [31]:
my_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", sys_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [33]:
qa_chain=create_stuff_documents_chain(llm, my_prompt)

In [63]:
rag_chain=create_retrieval_chain(history_aware_retriever, qa_chain)

Manual approach

In [46]:
chat_history = []

In [47]:
from langchain_core.messages import HumanMessage, AIMessage

In [49]:
response1 = rag_chain.invoke({"input": "What causes asthma?", "chat_history":chat_history})
response1

{'input': 'What causes asthma?',
 'chat_history': [],
 'context': [Document(metadata={'page': 191, 'source': 'RAG\\data\\Medical Symptoms_ A Visual Guide, 2nd Edition_ The Easy Way -- Miezan van Zyl -- 2, 2022 -- DK; 2nd edition (April 5, 2022) -- 9780744051650 -- c43b321a088a9e7d5d2a65c429208c23 -- Anna’s Archive.pdf'}, page_content='house dust mites, or pet dander (minute \nscales from animal hair, feathers, and skin), \nbut an attack may also be triggered by \nfactors such as inhaled chemicals, irritants, \nor dusts; certain medications; stress; \nexercise; or respiratory infections. \nAn asthma attack causes the sudden \nonset of wheezing, shortness of breath, \ntightness of the chest, and coughing. In  \na severe attack, breathlessness may be so \nbad that speaking is impossible; the lips, \nfingers, and toes may turn blue; and the \nperson may become unconscious. Asthma \nis treated with medication to prevent or \nrelieve attacks. A severe attack requires \nurgent medical help.\n

In [50]:
chat_history.extend(
    [
        HumanMessage(content = response1["input"]),
        AIMessage(content = response1["answer"])
    ]
)

In [51]:
response2 = rag_chain.invoke({"input": "What are its symptoms?", "chat_history":chat_history})
response2

{'input': 'What are its symptoms?',
 'chat_history': [HumanMessage(content='What causes asthma?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Asthma is a lung disorder that affects the air passages leading to your lungs. The airway inflammation occurs when your immune system produces histamine as an allergic response to certain substances.\n\nThere are many triggers of asthma that include:\n\n1. Allergies: Exposure to dust mites, mold, animal dander, and insect bites may cause symptoms.\n2. Irritants: You can develop asthma symptoms from breathing in pollutants such as smoke, car fumes, or aerosols.\n3. Respiratory infections: Infections such as the common cold or flu can trigger an asthma attack.\n4. Exercise: If you have exercise-induced asthma, your airways may narrow when you exercise and become inflamed due to excessive exercise.\n\nThe symptoms of asthma are similar to those associated with allergies, which include:\n\n1. Wheezing (a high-pitched sound while

Automated approach from langchain documentation

In [72]:
store = {}

In [65]:
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [66]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [73]:
conversational_rag_chain.invoke(
    {"input": "What causes asthma?"},
    config={"configurable": {"session_id": "qq1"}
    }, 
)["answer"]

"Hi! It's nice to know that you are interested in knowing the causes of asthma. Asthma is a chronic inflammatory disease that affects the respiratory tract and is characterized by periodic episodes of wheezing, coughing, chest tightness, and shortness of breath. The exact causes of this condition are still not well understood but are known to be influenced by various factors. Some of these include: 1) Genetic predisposition - if you have a family history of asthma or other allergies, you may be at greater risk for developing it. 2) Allergies - exposure to allergens such as dust mites, pollen, and pet dander can trigger an allergic reaction in the airways. This is often accompanied by symptoms like wheezing, coughing, and shortness of breath. 3) Respiratory infections - viral infections that cause respiratory tract infections can also trigger asthma attacks. 4) Air pollution - exposure to polluted air can irritate your airways and make them more susceptible to inflammation. 5) Stress - 

In [74]:
conversational_rag_chain.invoke(
    {"input": "What are its symptoms"},
    config={"configurable": {"session_id": "qq1"}
    }, 
)["answer"]

'Asthma is a respiratory disease characterized by recurring episodes of wheezing, coughing, chest tightness, and shortness of breath. Symptoms may vary from person to person but generally include:\n\n* Wheezing (a whistling or high-pitched sound)\n* Coughing (often persistent and worsened at night)\n* Chest tightness or heaviness\n* Shortness of breath, especially after physical activity\n* A feeling of constriction in the chest\n* Painful breathing\n* In some cases, nausea, vomiting, dizziness, and headache may occur\nIn severe asthma attacks, symptoms can be quite intense. They include:\n\n* Sudden onset of wheezing or coughing\n* Difficulty speaking due to shortness of breath\n* Blue-tinged fingers and toes (cyanosis)\n* Inability to sleep at night because of breathing difficulty\nAsthma symptoms are typically exacerbated by exposure to certain allergens, irritants, stress, physical activity, or respiratory infections. If you experience any of these symptoms for an extended period, 

In [75]:
store

{'qq1': InMemoryChatMessageHistory(messages=[HumanMessage(content='What causes asthma?', additional_kwargs={}, response_metadata={}), AIMessage(content="Hi! It's nice to know that you are interested in knowing the causes of asthma. Asthma is a chronic inflammatory disease that affects the respiratory tract and is characterized by periodic episodes of wheezing, coughing, chest tightness, and shortness of breath. The exact causes of this condition are still not well understood but are known to be influenced by various factors. Some of these include: 1) Genetic predisposition - if you have a family history of asthma or other allergies, you may be at greater risk for developing it. 2) Allergies - exposure to allergens such as dust mites, pollen, and pet dander can trigger an allergic reaction in the airways. This is often accompanied by symptoms like wheezing, coughing, and shortness of breath. 3) Respiratory infections - viral infections that cause respiratory tract infections can also tr

In [76]:
conversational_rag_chain.invoke(
    {"input": "For the past 2 days I have pain in my lower back, and i also loose my apatite sometimes. Should i get medical help?"},
    config={"configurable": {"session_id": "qq2"}
    }, 
)["answer"]

'Hi dear, Thanks for contacting Chat Doctor. As per your complaint you are suffering from a low backache which is not of serious nature and there is no need to worry about it. This kind of pain can be caused by muscle strain or stress. Sometimes, it can also be due to improper lifting technique while doing some physical work. So better to avoid lifting heavy objects until the pain subsides. You should take a warm bath with water for 15 minutes and then apply hot fomentation over the affected area. This will help in quick relief of your symptoms. If still you do not find any relief, please consult a medical specialist who can assess your condition and prescribe you some medicines which would really help in reducing your pain and discomfort. I hope this information helps you to understand your health issue better and I wish you good health. Thanks for using Chat Doctor. Bye take care.'

In [77]:
store

{'qq1': InMemoryChatMessageHistory(messages=[HumanMessage(content='What causes asthma?', additional_kwargs={}, response_metadata={}), AIMessage(content="Hi! It's nice to know that you are interested in knowing the causes of asthma. Asthma is a chronic inflammatory disease that affects the respiratory tract and is characterized by periodic episodes of wheezing, coughing, chest tightness, and shortness of breath. The exact causes of this condition are still not well understood but are known to be influenced by various factors. Some of these include: 1) Genetic predisposition - if you have a family history of asthma or other allergies, you may be at greater risk for developing it. 2) Allergies - exposure to allergens such as dust mites, pollen, and pet dander can trigger an allergic reaction in the airways. This is often accompanied by symptoms like wheezing, coughing, and shortness of breath. 3) Respiratory infections - viral infections that cause respiratory tract infections can also tr