In [2]:
import pandas as pd

from dotenv import load_dotenv
from src.services import LLMFactory, VectorStoreFactory, EmbeddingsFactory
from src.ingest import Ingestion
from langchain_core.prompts import (
    ChatPromptTemplate,
)
from langchain.retrievers.ensemble import EnsembleRetriever
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.memory import ConversationBufferWindowMemory, SQLChatMessageHistory

_ = load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
data = pd.read_parquet("data/medqa.parquet", engine="pyarrow")
data.head()

## Ingesting the medical QnA data


In [3]:
ingestion = Ingestion(
    embeddings_service="cohere",
    vectorstore_service="milvus",
)

2024-11-22 23:48:06 - root - INFO - Application Logger Initialized
2024-11-22 23:48:06 - root - INFO - Log Directory: logs
2024-11-22 23:48:06 - src.services.embeddings_factory - INFO - Using Cohere embeddings model.
2024-11-22 23:48:06 - src.services.vectorstore_factory - INFO - Using Milvus
2024-11-22 23:48:07 - pymilvus.milvus_client.milvus_client - DEBUG - Created new connection using: 422e1f8b965c4f2e8e82dfc00cdd096f
2024-11-22 23:48:08 - pymilvus.milvus_client.milvus_client - DEBUG - Created new connection using: 387e434db089446da9f453b77d95048c


In [None]:
# ingestion.ingest_document(
#     file_path="data/medqa.parquet",
#     category="medical",
#     sub_category="conversation",
#     exclude_columns=["instruction"],
# )

### User doc upload

In [13]:
# Data url: https://lab.mlaw.gov.sg/files/Sample-filled-in-MR.pdf
# https://cdn1.lalpathlabs.com/live/reports/WM17S.pdf
# https://med.ucf.edu/media/2018/08/Sample-Adult-History-And-Physical-By-M2-Student.pdf

ingestion.ingest_document(
    file_path="data/anxiety-patient.pdf",
    category="medical",
    sub_category="document",
)

2024-11-23 00:40:46 - src.ingest - INFO - Using PDF Reader
2024-11-23 00:40:46 - src.ingest - INFO - Ingesting document to vectorstore


## Creating QnA Chain


In [14]:
embeddings_instance = EmbeddingsFactory.get_embeddings(embeddings_service="cohere")
vectorstore_instance = VectorStoreFactory.get_vectorstore(
    vectorstore_service="milvus", embeddings=embeddings_instance
)
llm = LLMFactory.get_chat_model(llm_service="cohere")

2024-11-23 00:40:54 - src.services.embeddings_factory - INFO - Using Cohere embeddings model.
2024-11-23 00:40:54 - src.services.vectorstore_factory - INFO - Using Milvus
2024-11-23 00:40:54 - pymilvus.milvus_client.milvus_client - DEBUG - Created new connection using: 8beb37f088414aa9a121c8ca5c21d613
2024-11-23 00:40:55 - pymilvus.milvus_client.milvus_client - DEBUG - Created new connection using: 216df11c89b048fabc963d8bbf7cc34c
2024-11-23 00:40:57 - src.services.llm_factory - INFO - Using Cohere chat model.


In [15]:
conversation_retriever = vectorstore_instance.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": 6,
        "fetch_k": 12,
        "filter": {
            "category": "medical",
            "sub_category": "conversation",
        },
    },
)

document_retriever = vectorstore_instance.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": 6,
        "fetch_k": 12,
        "filter": {
            "category": "medical",
            "sub_category": "document",
        },
    },
)

history = SQLChatMessageHistory(
    session_id="ghdcfhdxgfx",
    connection_string="sqlite:///.cache/chat_history.db",
    table_name="message_store",
    session_id_field_name="session_id",
)

memory = ConversationBufferWindowMemory(chat_memory=history)

In [12]:
vectorstore_instance.delete(
    expr="metadata['file_name'] like 'test-report.pdf'"
)

(insert count: 0, delete count: 12, upsert count: 0, timestamp: 0, success count: 0, err count: 0, cost: 12)

In [16]:
ensambled_retriever = EnsembleRetriever(
    retrievers=[conversation_retriever, document_retriever],
    weights=[0.4, 0.6],
)

In [17]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are a helpful chatbot doctor. You need to answer the user's queries in extreme detail from the document context. 
            # Instructions
            1. You have access to one tool: vectorstore_retriever. Always use the vectorstore_retriever tool to retrieve the context and answer the question.
            2.The first part of the context contains conversation between doctor and patient, which you can use to answer the user query as close as possible to a real doctor.
            3. The second part of the contect is a document on which the user asks query about.
            4. Utilize the conversation and the document contexts to answer the queries as good as possible.
            5. When asked about a patient, always refer to the patient from the second part of the context i.e. the patient in the document.
            6. DO NOT answer for queries for which the context has no information about. Just reply with 'Sorry, can you rewrite your query please?' when user asks irrelevant questions.
            # Context
            {context}""",
        ),
        ("placeholder", "{chat_history}"),
        ("human", "{input}"),
    ]
)

In [9]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
history_aware_retriever = create_history_aware_retriever(
    llm, ensambled_retriever, prompt
)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [18]:
response = rag_chain.invoke(
    {
        "input": "Give me a list of major axiety issues of Ann.",
    }
)

print(response["answer"])

2024-11-23 00:42:09 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/chat "HTTP/1.1 200 OK"


Ann's major anxiety issues, as mentioned in the case study, include:

1. Difficulty interacting with others, especially males, due to past traumas and PTSD, leading to feelings of anxiety and agitation in social situations.

2. Severe distress and anxiety when in public or large groups, making it hard for her to leave the house or use public transportation.

3. Agitation and psychomotor restlessness during therapy sessions, often leading her to abruptly end them.

4. Frequent and repeated calls to her therapist when anxious, sometimes every hour.

5. Difficulty concentrating, which varies from day-to-day, partly dependent on her anxiety levels.

6. Fear of conflict, leading to avoidance behaviour and an inability to resolve workplace issues, impacting her job performance and retention.

7. Agitation and anxiety stemming from family conflict, which escalates her symptoms, including flashbacks.

8. Feeling threatened and fearful in most situations, even when no apparent threat exists.

9

In [20]:
response = rag_chain.invoke(
    {
        "input": "Ann seems to have insomnia. What can she do to fix it?",
    }
)

print(response["answer"])

2024-11-23 00:43:09 - httpx - INFO - HTTP Request: POST https://api.cohere.com/v1/chat "HTTP/1.1 200 OK"


Yes, Ann appears to be experiencing insomnia, which is a common symptom of depression. The doctor's advice for Ann would be to consult a psychiatrist to receive appropriate medication and detail psychological therapy. 

Ann can also try some general tips to improve her sleep quality. These include avoiding stimulants like tea, coffee, and cola before bed and avoiding heavy meals or exercises close to bedtime. Light exercises, such as walking, can help improve sleep. It's also recommended to avoid watching TV or reading novels right before sleep. 

Additionally, as Ann's insomnia is linked to her depression and anxiety, addressing these underlying issues with the help of a psychiatrist and therapist could also help improve her sleep over time.


In [19]:
response

{'input': 'Give me a list of major axiety issues of Ann.',
 'context': [Document(metadata={'file_name': 'anxiety-patient.pdf', 'page_no': '5', 'total_pages': '7', 'category': 'medical', 'sub_category': 'document'}, page_content='husband, and including her sister’s former boyfriend who raped her.  It is difficult for her to \narticulate if this is a change, or just something she has become aware of recently. She cannot \ndescribe a fulfilling social relationship in her life. Her PTSD and depression prevents her from \nexperiencing emotional stability for any length of time. Ann has expressed difficulty interacting \nwith many individuals, but even more so with males. Her PTSD and past traumas hinder all \ninteractions. \n \nAnn has expressed, especially in recent months, significant difficulty being in or around large \ngroups of people. In particular, being out in public is very distressing to her. She relies on family \nmembers to bring her to appointments because she cannot tolerate 

In [21]:
response

{'input': 'Ann seems to have insomnia. What can she do to fix it?',
 'context': [Document(metadata={'file_name': 'anxiety-patient.pdf', 'page_no': '6', 'total_pages': '7', 'category': 'medical', 'sub_category': 'document'}, page_content='She has trouble waking in the morning and trouble sleeping through the night. She reports that \nshe knows how to cook, but has little motivation to do so. Ann used to keep up with her \nhousehold chores, but now she has little motivation or desire due to depression. At a recent \nappointment, she presented disheveled and was wearing slippers. Her deficits in ADLs stem \nfrom her lack of motivation and feelings of worthlessness that hinder her ability to complete day-\nto-day tasks. \n \nAnn’s day consists of staying at home and trying to make it through the day. She wakes up most \nmornings around 10-11a.m. and says, “It’s an effort just to get out of bed.” She notes constant \nfeelings of guilt, worthlessness, agitation, and suicidal thoughts.  \n \n