Medical Chatbot 

In [1]:
from langchain import PromptTemplate 
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

  from tqdm.autonotebook import tqdm


Extract Data from PDF file

In [2]:
#Extract data from PDF 
def load_pdf(data):
    loader = DirectoryLoader(data, 
                        glob="*.pdf",
                        loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [3]:
extracted_data = load_pdf("C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data")

Convert PDF to text chunks 

In [4]:
#Create text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [5]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 7020


Initialise Pinecone

In [6]:
PINECONE_API_KEY="ec7f1a2d-e8f6-4843-841f-a0d789cc86b9"

In [7]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

In [8]:
import time

index_name = "medical-chatbot3"  # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

Create Vector Embeddings

In [9]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  warn_deprecated(


In [10]:
query_result = embeddings.embed_query("Hello World")
print("Length", len(query_result))

Length 384


Store Vector Embeddings in Pinecone

In [11]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [12]:
from uuid import uuid4

from langchain_core.documents import Document


In [13]:
uuids = [str(uuid4()) for _ in range(len(text_chunks))]

vector_store.add_documents(documents=text_chunks, ids=uuids)

['c088d0d1-c231-466e-9991-abc13a5d9445',
 '24837188-bcfa-4c22-89b8-213051f06efc',
 'e6b05116-d94c-4bc0-a6b9-2ae6c25f4cf1',
 '367ae473-13fc-4c22-8200-1b1c070d0dc2',
 'b8dc15b2-130c-40a4-8852-0198aa10edaf',
 '903c71f5-1302-4185-8757-18fa9a93477d',
 '2a11e54c-5cdb-47d1-beb3-e02d9ef07851',
 'a95d9b02-b1b4-44bb-ad28-ef349b03af67',
 '48595a31-2329-4e83-abc6-7be47771c07f',
 'd97e0b92-321f-483c-a74a-4d572b2acdfb',
 '1b65d962-a1eb-44d3-92d5-775a93523f15',
 '9fd49dc2-b9d7-4571-8008-35433137a580',
 'cd9e18a8-cd9e-4412-9e21-f2f7ea4075ec',
 'cefcb134-39fc-4f97-8df4-d40f58c1083d',
 '463b34eb-e268-42e0-86e4-56faa2fc4afd',
 '73625c35-dfec-47d0-bca5-c633c755ff30',
 '9909c57e-5be9-4173-8efd-3b71f57f8d62',
 '2a83e2ff-7326-458c-bdda-dfd4a985b4ae',
 '7bbdb525-2107-44d1-ba71-7bc3cbc8bead',
 '10d55d02-2e97-4f29-aef6-ec1032a48c98',
 'de6e2963-008e-4621-a047-7d2f6172053e',
 'df4e14b3-d50a-4aba-b90d-8da644c0f140',
 '2c13e3cb-e02c-401d-b0fa-04b4ea25638b',
 'f5e12675-24a9-4d3d-a94b-2327dd147bfc',
 '2fd8906e-b078-

In [14]:
index_stats = index.describe_index_stats()
print("Index Statistics:", index_stats)

Index Statistics: {'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 7192}},
 'total_vector_count': 7192}


Document Retrieval and Contextual Similarity Search

In [15]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})

retrieved_docs = retriever.invoke("What are Allergies?")

len(retrieved_docs)

3

In [16]:
retrieved_docs

[Document(metadata={'page': 130.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data\\Medical_book.pdf'}, page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE"),
 Document(metadata={'page': 135.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data\\Medical_book.pdf'}, page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'),
 Document(metadata={'page': 129.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data\\Medical_bo

In [17]:
print(retrieved_docs[0].page_content)

GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies
Allergic rhinitis is commonly triggered by
exposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.
The presence of an allergen causes the
body's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.
IgE molecules attach to mast
cells, which contain histamine.HistaminePollen grains
Lymphocyte
FIRST EXPOSURE


In [18]:
print(retrieved_docs[1].page_content)

the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .
The particular allergens to which a person is sensi-


In [19]:
print(retrieved_docs[2].page_content)

allergens are the following:
• plant pollens
• animal fur and dander
• body parts from house mites (microscopic creatures
found in all houses)
• house dust• mold spores• cigarette smoke• solvents• cleaners
Common food allergens include the following:
• nuts, especially peanuts, walnuts, and brazil nuts
• fish, mollusks, and shellfish• eggs• wheat• milk• food additives and preservatives
The following types of drugs commonly cause aller-
gic reactions:
• penicillin or other antibiotics


LLM Model: Hugging Face

In [20]:
# from huggingface_hub import login
# login(token = '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')


In [21]:
# llm = CTransformers(model=r"C:\Users\TIFFANY MUN\Medical-Chatbot-2\model\llama-2-7b-chat.ggmlv3.q4_0.bin",
#                     model_type="llama",
#                     config={'max_new_tokens': 150, 'temperature': 0.5})

LLM Model: Open AI

In [22]:
# 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

In [25]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")

Retrieval Augmented Generation

In [26]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""


custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

result = rag_chain.invoke("What are Allergens?")
print(result)

Allergens are substances that can trigger allergic reactions in some individuals. Common allergens include plant pollens, animal fur, certain foods like nuts and shellfish, and medications such as penicillin. Thanks for asking!


In [27]:
while True:
    user_input = input("Input Prompt: ")

    if user_input.lower() in ['exit', 'quit']:
        print("Exiting the loop.")
        break

    try:
        # Directly pass the user input string
        print(f"Querying with: {user_input}")
        
        result = rag_chain.invoke(user_input)
        
        # Print the result
        print("Response: ", result)
    except Exception as e:
        # Print any exceptions that occur
        print(f"An error occurred: {e}")


Querying with: what are allergens?
Response:  Allergens are substances that can trigger allergic reactions in the body, such as plant pollens, animal fur, and certain foods like nuts and shellfish. Thanks for asking!
Exiting the loop.


Retrieval Augmented Generation Check

In [None]:
formatted_docs = format_docs(retrieved_docs)
print(formatted_docs)  # Print formatted documents to verify


In [None]:
query = "What are Allergens?"
retrieved_docs = retriever.invoke(query)
formatted_docs = format_docs(retrieved_docs)
prompt = custom_rag_prompt.format(context=formatted_docs, question=query)
print("Prompt:", prompt)  # Print the prompt to see if it includes the context

response = llm(prompt)  # Get the LLM response
print("LLM Response:", response)


In [None]:
while True:
    user_input = input("Input Prompt: ")

    if user_input.lower() in ['exit', 'quit']:
        print("Exiting the loop.")
        break

    try:
        # Retrieve relevant documents based on the user input
        retrieved_docs = retriever.invoke(user_input)
        
        # Format the retrieved documents
        formatted_docs = format_docs(retrieved_docs)
        
        # Create the prompt using the formatted context
        prompt = custom_rag_prompt.format(context=formatted_docs, question=user_input)
        print("Prompt:", prompt)  # Print the prompt to see if it includes the context
        
        # Get the LLM response
        response = llm(prompt)
        
        # Print the result
        print("Response:", response)
        
    except Exception as e:
        # Print any exceptions that occur
        print(f"An error occurred: {e}")


Draft

In [None]:
# from langchain_core.prompts import ChatPromptTemplate

# system_prompt = (
#     "You are an assistant for question-answering tasks. "
#     "Use the following pieces of retrieved context to answer "
#     "the question. If you don't know the answer, say that you "
#     "don't know. Use three sentences maximum and keep the "
#     "answer concise."
#     "\n\n"
#     "{context}"
# )

# PROMPT = ChatPromptTemplate.from_messages(
#     [
#         ("system", system_prompt),
#         ("human", "{input}"),
#     ]
# )

# from langchain.chains import create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain
# question_answer_chain = create_stuff_documents_chain(llm, PROMPT)
# rag_chain = create_retrieval_chain(retriever, question_answer_chain)
# response = rag_chain.invoke({"input": "What is Acne?"})
# response
# print(response["answer"])

Adding Chat History

In [36]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import ChatPromptTemplate


contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [30]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [31]:
from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = "What is Anal Cancer?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

second_question = "What are some prevention ways?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

To prevent anal cancer, reducing the risks of sexually transmitted diseases such as HPV and HIV is essential, as these infections are linked to an increased risk of anal cancer. Additionally, quitting smoking can help lower the risk of developing anal cancer. Screening and early detection of precancerous conditions can also aid in prevention.


In [35]:
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory


store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [53]:
conversational_rag_chain.invoke(
    {"input": "What is Anal Cancer?"},
    config={"configurable": {"session_id": "abc123"}},  # constructs a key "abc123" in `store`.
)["answer"]

Error in RootListenersTracer.on_chain_end callback: KeyError('answer')
Error in callback coroutine: KeyError('answer')


'Anal cancer is an uncommon form of cancer affecting the anus, which is the end portion of the large intestine. Most cases of anal cancer are squamous cell carcinomas that originate from cells lining the anal margin and anal canal. Symptoms include pain, itching, bleeding, changes in bowel habits, and swollen lymph nodes in the anal or groin area.'

In [55]:
conversational_rag_chain.invoke(
    {"input": "What are the symptoms?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

Error in RootListenersTracer.on_chain_end callback: KeyError('answer')
Error in callback coroutine: KeyError('answer')


'Symptoms of anal cancer include pain, itching, bleeding, straining during bowel movements, changes in bowel habits, changes in stool diameter, discharge from the anus, and swollen lymph nodes in the anal or groin area. These symptoms can often be mistaken for those of other benign conditions, making it important to seek medical evaluation if experiencing any persistent symptoms associated with anal cancer.'

In [56]:
result = conversational_rag_chain.invoke(
    {"input": "Is it Preventable?"},
    config={"configurable": {"session_id": "abc123"}}
)
print(result)  # Check what the output looks like

Error in RootListenersTracer.on_chain_end callback: KeyError('answer')
Error in callback coroutine: KeyError('answer')


{'input': 'Is it Preventable?', 'chat_history': [HumanMessage(content='What is Anal Cancer?'), AIMessage(content='Anal cancer is a rare form of cancer that affects the anus, which is the end portion of the large intestine responsible for expelling solid waste from the body. Most cases of anal cancer are squamous cell carcinomas that originate from the cells lining the anal margin and anal canal. Symptoms of anal cancer can include pain, itching, bleeding, changes in bowel habits, and swollen lymph nodes in the anal or groin area.'), HumanMessage(content='What are the symptoms?'), AIMessage(content='Symptoms of anal cancer include pain, itching, bleeding, straining during bowel movements, changes in bowel habits, changes in stool diameter, discharge from the anus, and swollen lymph nodes in the anal or groin area. These symptoms can often be mistaken for those of other benign conditions, making it important to seek medical evaluation if experiencing any persistent symptoms associated wi

In [57]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: What is Anal Cancer?

AI: Anal cancer is a rare form of cancer that affects the anus, which is the end portion of the large intestine responsible for expelling solid waste from the body. Most cases of anal cancer are squamous cell carcinomas that originate from the cells lining the anal margin and anal canal. Symptoms of anal cancer can include pain, itching, bleeding, changes in bowel habits, and swollen lymph nodes in the anal or groin area.

User: What are the symptoms?

AI: Symptoms of anal cancer include pain, itching, bleeding, straining during bowel movements, changes in bowel habits, changes in stool diameter, discharge from the anus, and swollen lymph nodes in the anal or groin area. These symptoms can often be mistaken for those of other benign conditions, making it important to seek medical evaluation if experiencing any persistent symptoms associated with anal cancer.

User: What is Anal Cancer?

AI: Anal cancer is an uncommon form of cancer that affects the anus, whi

Chatbot with Memory

In [44]:
# Interactive loop for user input
session_id = "abc124"  # You can change this to track multiple sessions if needed

chat_history = []

while True:
    user_input = input("You: ")

    if user_input.lower() in ['exit', 'quit']:
        print("Exiting the conversation.")
        break

    try:
        result = conversational_rag_chain.invoke(
            {"input": user_input},
            config={"configurable": {"session_id": session_id}}
        )
        
        # Extracting the AI's response
        ai_response = result["answer"]
        
        print(f"You: {user_input}")
        print(f"AI: {ai_response}")

        # Updating the chat history
        chat_history.extend(
            [
                HumanMessage(content=user_input),
                AIMessage(content=ai_response),
            ]
        )
    except Exception as e:
        print(f"An error occurred: {e}")

Error in RootListenersTracer.on_chain_end callback: KeyError('answer')
Error in callback coroutine: KeyError('answer')


You: what is anal cancer
AI: Anal cancer is a rare form of cancer that affects the anus, which is the end portion of the large intestine responsible for eliminating solid wastes from the body. Most cases of anal cancer are squamous cell carcinomas that develop from cells lining the anal margin and anal canal. Symptoms of anal cancer can include pain, itching, bleeding, changes in bowel habits, and swollen lymph nodes in the anal or groin area.


Error in RootListenersTracer.on_chain_end callback: KeyError('answer')
Error in callback coroutine: KeyError('answer')


You: what are its first symptoms
AI: The first symptoms of anal cancer can include pain, itching, bleeding, changes in bowel habits, and swollen lymph nodes in the anal or groin area. These symptoms may resemble those of other conditions initially, such as hemorrhoids or anal fissures. It is important to consult a healthcare provider if experiencing persistent or concerning symptoms to determine the cause.


Error in RootListenersTracer.on_chain_end callback: KeyError('answer')
Error in callback coroutine: KeyError('answer')


You: is it preventable?
AI: To prevent anal cancer, reducing the risks of sexually transmitted diseases like HPV and HIV can help lower the risk of developing anal cancer. Additionally, quitting smoking has been linked to a decreased risk of anal cancer. Regular screenings and early detection can also help in preventing anal cancer or catching it in its early stages when treatment is most effective.
Exiting the conversation.


Chatbot with Memory Breakdown

In [45]:
# Interactive loop for user input
session_id = "abc125"  # You can change this to track multiple sessions if needed

chat_history = []

while True:
    user_input = input("You: ")

    if user_input.lower() in ['exit', 'quit']:
        print("Exiting the conversation.")
        break

    try:
        result = conversational_rag_chain.invoke(
            {"input": user_input},
            config={"configurable": {"session_id": session_id}}
        )
        print("Result:", result)  # Print the whole result to debug
        ai_response = result["answer"]
        print(f"You: {user_input}")
        print(f"AI: {ai_response}")
    except KeyError as e:
        print(f"KeyError: {e}. The result structure is:", result)
    except Exception as e:
        print(f"An error occurred: {e}")


Error in RootListenersTracer.on_chain_end callback: KeyError('answer')
Error in callback coroutine: KeyError('answer')


Result: {'input': 'what is anal cancer', 'chat_history': [], 'context': [Document(metadata={'page': 183.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data\\Medical_book.pdf'}, page_content='Anal cancer\nDefinition\nAnal cancer is an uncommon form of cancer affect-\ning the anus. The anus is the inch-and-a-half-long endportion of the large intestine, which opens to allow solidwastes to exit the body. Other parts of the large intestineinclude the colon and the rectum.\nDescription\nDifferent cancers can develop in different parts of'), Document(metadata={'page': 183.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data\\Medical_book.pdf'}, page_content='anal cancers are squamous cell carcinomas, which arisefrom the cells lining the anal margin and the anal canal.The anal margin is the part of the anus that is half insideand half outside the body, and the anal canal is the partof the anus that is inside the body. The earliest form ofsquamous cell carcinoma is known as c

Error in RootListenersTracer.on_chain_end callback: KeyError('answer')
Error in callback coroutine: KeyError('answer')


Result: {'input': 'what are its first symptoms?', 'chat_history': [HumanMessage(content='what is anal cancer'), AIMessage(content='Anal cancer is an uncommon form of cancer that affects the anus, which is the end portion of the large intestine. It can develop from different types of cells lining the anal margin and anal canal. Symptoms of anal cancer include pain, itching, bleeding, changes in bowel habits, and swollen lymph nodes in the anal or groin area.')], 'context': [Document(metadata={'page': 183.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\data\\Medical_book.pdf'}, page_content='Most individuals with anal cancer are over the age of 50.\nSymptoms of anal cancer resemble those found in\nother harmless conditions. They include pain ,itching\nand bleeding, straining during a bowel movement,change in bowel habits, change in the diameter of thestool, discharge from the anus, and swollen lymph nodesin the anal or groin area.\nDiagnosis\nAnal cancer is sometimes diagnosed d

Error in RootListenersTracer.on_chain_end callback: KeyError('answer')
Error in callback coroutine: KeyError('answer')


Result: {'input': 'is it preventable?', 'chat_history': [HumanMessage(content='what is anal cancer'), AIMessage(content='Anal cancer is an uncommon form of cancer that affects the anus, which is the end portion of the large intestine. It can develop from different types of cells lining the anal margin and anal canal. Symptoms of anal cancer include pain, itching, bleeding, changes in bowel habits, and swollen lymph nodes in the anal or groin area.'), HumanMessage(content='what are its first symptoms?'), AIMessage(content='The first symptoms of anal cancer may include pain, itching, bleeding, changes in bowel habits, straining during a bowel movement, discharge from the anus, and swollen lymph nodes in the anal or groin area. These symptoms can resemble those found in other harmless conditions, so it is important to consult a healthcare provider for proper diagnosis and treatment.')], 'context': [Document(metadata={'page': 184.0, 'source': 'C:\\Users\\TIFFANY MUN\\Medical-Chatbot-2\\dat