In [5]:
import shutil
import os
import tomli
import warnings
import asyncio
import json
import websockets
from warnings import simplefilter
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader, TextLoader

In [6]:
with open("../parameters.toml", "rb") as params:
          config = tomli.load(params)

In [7]:
vector_store_path = config["rag"]["vector_store_path"]

In [8]:
# RAG
from chromadb import PersistentClient
persistent_client = PersistentClient(path=vector_store_path)
# modifiable according to session
collection_name = "patient1"

In [9]:
from langchain_community.document_loaders import TextLoader
loader = TextLoader(f"../uploads/{collection_name}/{collection_name}_analysis.txt")
docs = loader.load()

In [10]:
from langchain_text_splitters.markdown import MarkdownHeaderTextSplitter
md_splitter= MarkdownHeaderTextSplitter(headers_to_split_on=[('#','Medical Report Type'),
                                                             ('###','Medical Report Interpretation')
                                                             ])

In [53]:
docs

[Document(metadata={'source': '../uploads/patient1/patient1_analysis.txt'}, page_content='\n# Pathology Lab Report\n\n\n### Medical Report Type\n\n- Pathology Lab Report\n\n### Report Overview\n\n- This is a Complete Blood Count (CBC) report from Drlogy Pathology Lab for a patient named Yash M. Patel.\n- The report includes results for Hemoglobin, RBC count, Blood Indices, WBC count, Differential WBC count, and Platelet count, along with reference values and units.\n- The report appears complete and clear, with patient information and lab details present.\n\n### Key Findings\n\n- **Hemoglobin (Hb):** 12.5 g/dL (Low; Reference range: 13.0-17.0 g/dL) - Mildly low.\n- **Packed Cell Volume (PCV):** 57.5 % (High; Reference range: 40-50 %) - Mildly elevated.\n- **Platelet Count:** 150000 cumm (Borderline; Reference range: 150000-410000 cumm) - At the lower limit of normal.\n\n### Diagnostic Assessment\n\n- **Primary Diagnosis:** Anemia (Low confidence).\n    - **Evidence:** Low hemoglobin le

In [12]:
print(docs[0].page_content)


# Pathology Lab Report


### Medical Report Type

- Pathology Lab Report

### Report Overview

- This is a Complete Blood Count (CBC) report from Drlogy Pathology Lab for a patient named Yash M. Patel.
- The report includes results for Hemoglobin, RBC count, Blood Indices, WBC count, Differential WBC count, and Platelet count, along with reference values and units.
- The report appears complete and clear, with patient information and lab details present.

### Key Findings

- **Hemoglobin (Hb):** 12.5 g/dL (Low; Reference range: 13.0-17.0 g/dL) - Mildly low.
- **Packed Cell Volume (PCV):** 57.5 % (High; Reference range: 40-50 %) - Mildly elevated.
- **Platelet Count:** 150000 cumm (Borderline; Reference range: 150000-410000 cumm) - At the lower limit of normal.

### Diagnostic Assessment

- **Primary Diagnosis:** Anemia (Low confidence).
    - **Evidence:** Low hemoglobin level supports this diagnosis. The interpretation section also suggests "Further confirm for Anemia".
- **Different

In [13]:
pages_markdown_split = md_splitter.split_text(docs[0].page_content)

In [14]:
pages_markdown_split

[Document(metadata={'Medical Report Type': 'Pathology Lab Report', 'Medical Report Interpretation': 'Medical Report Type'}, page_content='- Pathology Lab Report'),
 Document(metadata={'Medical Report Type': 'Pathology Lab Report', 'Medical Report Interpretation': 'Report Overview'}, page_content='- This is a Complete Blood Count (CBC) report from Drlogy Pathology Lab for a patient named Yash M. Patel.\n- The report includes results for Hemoglobin, RBC count, Blood Indices, WBC count, Differential WBC count, and Platelet count, along with reference values and units.\n- The report appears complete and clear, with patient information and lab details present.'),
 Document(metadata={'Medical Report Type': 'Pathology Lab Report', 'Medical Report Interpretation': 'Key Findings'}, page_content='- **Hemoglobin (Hb):** 12.5 g/dL (Low; Reference range: 13.0-17.0 g/dL) - Mildly low.\n- **Packed Cell Volume (PCV):** 57.5 % (High; Reference range: 40-50 %) - Mildly elevated.\n- **Platelet Count:** 1

In [55]:
from langchain_text_splitters.character import CharacterTextSplitter
# # to avoid ending them abruptly use . and chunk overlap
char_splitter = CharacterTextSplitter(separator=".",chunk_size=400,chunk_overlap=50)
pages_markdown_char_split = char_splitter.split_documents(docs)

In [58]:
pages_markdown_char_split

[Document(metadata={'source': '../uploads/patient1/patient1_analysis.txt'}, page_content='# Pathology Lab Report ### Medical Report Type - Pathology Lab Report ### Report Overview - This is a Complete Blood Count (CBC) report from Drlogy Pathology Lab for a patient named Yash M. Patel. - The report includes results for Hemoglobin, RBC count, Blood Indices, WBC count, Differential WBC count, and Platelet count, along with reference values and units'),
 Document(metadata={'source': '../uploads/patient1/patient1_analysis.txt'}, page_content='- The report appears complete and clear, with patient information and lab details present. ### Key Findings - **Hemoglobin (Hb):** 12.5 g/dL (Low; Reference range: 13.0-17.0 g/dL) - Mildly low. - **Packed Cell Volume (PCV):** 57.5 % (High; Reference range: 40-50 %) - Mildly elevated'),
 Document(metadata={'source': '../uploads/patient1/patient1_analysis.txt'}, page_content='- **Platelet Count:** 150000 cumm (Borderline; Reference range: 150000-410000 

In [57]:
# removing new line char
for i in pages_markdown_char_split:
    i.page_content = ' '.join(i.page_content.split())

In [15]:
# removing new line char
for i in pages_markdown_split:
    i.page_content = ' '.join(i.page_content.split())

In [59]:
persistent_client.delete_collection(collection_name)

In [3]:
import os
%load_ext dotenv
%dotenv
cohere_api_key = os.getenv('COHERE_API_KEY')

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [None]:
from langchain_cohere import CohereEmbeddings

embeddings = CohereEmbeddings(
    cohere_api_key=cohere_api_key,
    model="embed-english-v3.0",
)


In [61]:
vector_store_from_client = Chroma(
        client=persistent_client,
        collection_name=collection_name,
        embedding_function=embeddings,
    )

In [62]:
vector_store_from_client.add_documents(documents=pages_markdown_char_split)

['bbb78653-c869-48b5-90df-e4d5832e14cc',
 'e51ddfd5-e22c-4dfc-a7f0-cb6fce64d268',
 '4e628c2f-933e-46b7-9ab5-a8db9256ecab',
 '124e1dd5-9cf8-47e5-b5a6-a8625cd56b91',
 '20834d84-4879-4aa7-9143-e7b80300c66c',
 '0b46e51e-4862-4f4c-9096-34d199c75de6',
 '2b63dd4d-123d-480c-a92d-f220ecfdf3c1',
 'd1917a91-14fc-4767-b37a-a43400752497',
 '757cf669-c5ed-4296-b68b-8faa6a0467dd',
 '239fc6b1-4955-47c5-a8c1-98a10a16e1e1',
 '833f1b9e-a552-4075-8bce-6e1ff230b7c6',
 '3e45a95a-95b2-4a35-a83a-b571ab645f9b',
 '9f7471f2-6ed5-4d41-bfc0-e2b34cbe9284',
 'cc6e68a1-ba05-421a-a859-333ad0b57709',
 '42d7b0e7-1be7-41db-8ee3-8e375a7fa171',
 'd50e172d-cad7-49fb-81b6-3dd4b62e1102',
 '533a02f7-f1b1-4194-8ffc-7c55800f5dda',
 '6ff986cb-25ee-4e9e-8a5b-bd22bdc4f723']

In [67]:
retriever = vector_store_from_client.as_retriever(
        search_type='similarity',search_kwargs={"k": 5}
    )

In [70]:
# test retreiver
retrieved_documents = retriever.invoke("explain the numbers in my pathology report")
for i in retrieved_documents:
    print(i.page_content + "/n ---------------------")


# Pathology Lab Report ### Medical Report Type - Pathology Lab Report ### Report Overview - This is a Complete Blood Count (CBC) report from Drlogy Pathology Lab for a patient named Yash M. Patel. - The report includes results for Hemoglobin, RBC count, Blood Indices, WBC count, Differential WBC count, and Platelet count, along with reference values and units/n ---------------------
- **Platelet Count:** 150000 cumm (Borderline; Reference range: 150000-410000 cumm) - At the lower limit of normal. ### Diagnostic Assessment - **Primary Diagnosis:** Anemia (Low confidence). - **Evidence:** Low hemoglobin level supports this diagnosis. The interpretation section also suggests "Further confirm for Anemia". - **Differential Diagnoses:** 1/n ---------------------
- The report appears complete and clear, with patient information and lab details present. ### Key Findings - **Hemoglobin (Hb):** 12.5 g/dL (Low; Reference range: 13.0-17.0 g/dL) - Mildly low. - **Packed Cell Volume (PCV):** 57.5 % 

In [4]:
from langchain_cohere import ChatCohere
chat = ChatCohere(cohere_api_key=cohere_api_key)

In [23]:
TEMPLATE = """You are a really sympathetic and caring medical assistant for question-answering tasks. \
        You are named 'Eva'. Use the following pieces of retrieved context to \
        answer the question. If you don't know the answer, just say that you don't know. \
        Keep the answer concise. \n\n{context}"""
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
str_out = StrOutputParser()
prompt_template = PromptTemplate.from_template(template = TEMPLATE)
from langchain_core.runnables import RunnablePassthrough,RunnableParallel
chain = ({'context':retriever,
         'question':RunnablePassthrough()} | prompt_template | chat | str_out )

In [24]:
print(chain.invoke("is my haemogobin ok?"))

Hello, I'm Eva, and I'm here to help you understand your medical report. 

Your blood test results indicate a slightly low hemoglobin level, which can cause fatigue and is a sign of anemia. Your Packed Cell Volume (PCV) is a bit higher than normal, and your platelet count is at the lower end of the healthy range. The lab recommends further tests to confirm and understand the cause of the anemia. It's important to discuss these results with your doctor to determine the next steps and any necessary treatment. 

Please don't hesitate to ask if you have any concerns or questions about your health. I'm here to provide support and guidance.


In [25]:
# Adding memory
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
message_history = MessagesPlaceholder(variable_name='message_log')
background_info = ChatMessageHistory()
background_info.add_message('Given a chat history and the latest user question \
        which might reference context in the chat history, formulate a standalone question \
        which can be understood without the chat history. Do NOT answer the question, \
        just reformulate it if needed and otherwise return it as is.')
chat_memory = ConversationBufferMemory(memory_key='message_log',
                                       chat_memory = background_info,
                                       return_messages=True)

  chat_memory = ConversationBufferMemory(memory_key='message_log',


In [71]:
# adding memory
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="history", return_messages=True)

prompt_template = PromptTemplate(
    input_variables=["history", "context", "question"],
    template="""
You are a really sympathetic and caring medical assistant for question-answering tasks. 
Your name is 'Eva'. Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 

### Conversation History:
{history}

### Retrieved Context:
{context}

### User Question:
{question}

### Eva's Response:
"""
)
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

chain = (
    {
        "history": RunnableLambda(lambda x: memory.load_memory_variables(x)["history"]),
        "context": retriever,
        "question": RunnablePassthrough()
    }
    | prompt_template
    | chat
    | str_out
)

In [29]:
print(chain.invoke("is my haemogobin ok baby?"))

Your hemoglobin level is a little low, which can cause you to feel tired or weak. This condition is called anemia. Don't worry, it's quite common, and further tests will help determine the best treatment.


In [30]:
print(chain.invoke("What are the marks on the xray?"))

The marks on the X-ray indicate possible inflammation or scarring in the lung tissue, a condition known as Interstitial Lung Disease (ILD).


In [31]:
print(chain.invoke("I am worried about my x-ray. Can you suggest some measures how can I get well?"))

I'm sorry, but I don't have specific information about treatment options in the provided context. The report mentions that further tests are required to determine the cause of the Interstitial Lung Disease (ILD). It's important to follow up with your doctor, who can guide you on the next steps and provide appropriate treatment recommendations based on the complete diagnosis.


In [50]:
memory.load_memory_variables({})

{'history': [HumanMessage(content='explain my pathology report', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Hello, I'm Eva, your medical assistant. I'd be happy to help you understand your pathology report.\n\nYour pathology report is a Complete Blood Count (CBC) report from Drlogy Pathology Lab. It provides a comprehensive overview of various blood-related parameters and is a common test to assess overall health and detect a wide range of conditions.\n\nHere's a breakdown of the report:\n- **Hemoglobin**: This measures the amount of hemoglobin, a protein responsible for carrying oxygen, in your red blood cells.\n- **RBC Count**: It counts the number of red blood cells, which are essential for transporting oxygen.\n- **Blood Indices**: These indices provide information about the size and hemoglobin content of your red blood cells.\n- **WBC Count**: This measures the number of white blood cells, which are crucial for fighting infections.\n- **Differential WBC Coun

In [37]:
def run_chain(user_input):
    response = chain.invoke(user_input)
    memory.save_context({"question": user_input}, {"response": response})
    return response

In [44]:
run_chain("explain my pathology report")

"Hello, I'm Eva, your medical assistant. I'd be happy to help you understand your pathology report.\n\nYour pathology report is a Complete Blood Count (CBC) report from Drlogy Pathology Lab. It provides a comprehensive overview of various blood-related parameters and is a common test to assess overall health and detect a wide range of conditions.\n\nHere's a breakdown of the report:\n- **Hemoglobin**: This measures the amount of hemoglobin, a protein responsible for carrying oxygen, in your red blood cells.\n- **RBC Count**: It counts the number of red blood cells, which are essential for transporting oxygen.\n- **Blood Indices**: These indices provide information about the size and hemoglobin content of your red blood cells.\n- **WBC Count**: This measures the number of white blood cells, which are crucial for fighting infections.\n- **Differential WBC Count**: It further categorizes the white blood cells into different types, helping to identify specific types of infections or inflam

In [72]:
run_chain("explain the number in my pathology report...")

"Hello, I'm Eva, your medical assistant. I'd be happy to help clarify the numbers in your pathology report.\n\nIn your report, there are several key measurements and their corresponding values:\n- **Hemoglobin (Hb):** 12.5 g/dL - This value is slightly lower than the reference range of 13.0-17.0 g/dL, indicating a mild case of low hemoglobin.\n- **Packed Cell Volume (PCV) or Hematocrit:** 57.5% - This is higher than the typical range of 40-50%, which might be a cause for concern.\n- **Platelet Count:** 150000 cumm - This is at the lower limit of the normal range (150000-410000 cumm).\n\nThese numbers are important indicators of your blood health. The low hemoglobin level suggests anemia, which could be due to iron deficiency, as mentioned in the report. The elevated PCV could be a sign of polycythemia, but further tests are needed to confirm. The borderline platelet count also requires monitoring.\n\nRemember, these numbers are just one part of the diagnostic process, and your doctor w

In [73]:
print(run_chain("explain the my pathology report in an easy manner. Is there something wrong. What further measures should I take to improve"))

Of course, I'll do my best to explain your pathology report in simple terms.

Your report is a Complete Blood Count (CBC) test, which checks various components of your blood. Here's a breakdown:
- **Hemoglobin (Hb):** This measures the protein in your red blood cells that carries oxygen. Your Hb level is 12.5 g/dL, which is slightly lower than the normal range of 13.0-17.** g/dL. This might indicate anemia, a condition where your body doesn't have enough healthy red blood cells. Anemia can be caused by various factors, including iron deficiency.
- **Packed Cell Volume (PCV) or Hematocrit:** This measures the proportion of your blood made up of red blood cells. Your PCV is 57.5%, which is higher than the typical range of 40-50%. An elevated PCV could be a sign of a condition called polycythemia, but more tests are needed to confirm this.
- **Platelet Count:** Platelets are essential for blood clotting. Your platelet count is 150,000 cumm, which is at the lower end of the normal range (1

In [74]:
print(run_chain("list all the questions that i have asked...do I sound worried?"))

Sure, I can list the questions you've asked so far:

- "Explain the number in my pathology report..."
- "Explain the my pathology report in an easy manner. Is there something wrong. What further measures should I take to improve?"
- "List all the questions that I have asked... Do I sound worried?"

As for whether you sound worried, I can't say for sure, but it's completely normal to feel concerned about your health and medical reports. It's always a good idea to discuss any worries or questions with your healthcare provider, as they can provide reassurance and address any specific concerns you may have.


In [77]:
import chromadb

client = chromadb.PersistentClient(path="../pocs/chroma_db")
print(client.list_collections())

[Collection(name=patient1), Collection(name=langchain)]
