### Installation

In [None]:
!pip install langchain-qdrant qdrant-client langchain-openai

## Basic Imports

In [2]:
import pandas as pd
from langchain_core.documents import Document

#### Read Dataframe

In [3]:
df = pd.read_csv('../data/cleaned_data.csv')

In [4]:
df.head()

Unnamed: 0,Disease,Description,Immediate Action,Medical Advice,Dietary Guidance,Long-term Management,prepared_text
0,Malaria,An infectious disease caused by protozoan para...,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out,Disease: Malaria Description: An infectious di...
1,Allergy,An allergy is an immune system response to a f...,apply calamine,cover area with bandage,,use ice to compress itching,Disease: Allergy Description: An allergy is an...
2,Hypothyroidism,"Hypothyroidism, also called underactive thyroi...",reduce stress,exercise,eat healthy,get proper sleep,Disease: Hypothyroidism Description: Hypothyro...
3,Psoriasis,Psoriasis is a common skin disorder that forms...,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths,Disease: Psoriasis Description: Psoriasis is a...
4,GERD,"Gastroesophageal reflux disease, or GERD, is a...",avoid fatty spicy food,avoid lying down after eating,maintain healthy weight,exercise,Disease: GERD Description: Gastroesophageal re...


#### Creating LangChain Documents

In [5]:
documents = []

for index, row in df.iterrows():
    document = Document(
        page_content=row["prepared_text"],
        metadata={"Disease": row["Disease"]}
    )
    documents.append(document)

In [6]:
len(documents)

37

#### Document UUID

In [7]:
from uuid import uuid4
uuids = [str(uuid4()) for _ in range(len(documents))]

#### Connecting To Qdrant

In [12]:
from qdrant_client import QdrantClient
from dotenv import load_dotenv
import os

%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [13]:
qdrant_client = QdrantClient(
    api_key=os.getenv("QDRANT_API_KEY"),
    url=os.getenv("QDRANT_URL")
)

In [15]:
qdrant_client.get_collections()

CollectionsResponse(collections=[CollectionDescription(name='6fab1c82b511defb65fa617773341ebe'), CollectionDescription(name='14a7cfe4b7e62d0e7f29d462eb8da5b4'), CollectionDescription(name='fe65eeca69c53a5905e508bb6f41b974'), CollectionDescription(name='7aa03e9d29a1b8371f9b454f44e3e6bd')])

#### Creating New Collection

In [17]:
from qdrant_client.http.models import Distance, VectorParams

In [18]:
COLLECTION_NAME ="healthcare_collection"

In [19]:
qdrant_client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(
        size=os.getenv("QDRANT_VECTOR_DIMENSION", 1536), 
        distance=Distance.COSINE),
)

True

#### Connecting To Vector Store

In [23]:
from langchain_qdrant import QdrantVectorStore
from langchain_openai import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(
    model=os.getenv("EMBEDDING_MODEL", 
                    default="text-embedding-3-small"
                )
)

In [24]:
vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=COLLECTION_NAME,
    embedding=embedding_model,
    # retrieval_mode=RetrievalMode.DENSE
)

In [None]:
vector_store.add_documents(documents=documents, ids=uuids)

#### Quering Vector Store

In [26]:
query = "What is malaria?"

In [27]:
results = vector_store.similarity_search(
    query, k=2
)

In [28]:
results

[Document(metadata={'Disease': 'Malaria', '_id': '1497e650-06e7-458c-a542-9f8815e7303f', '_collection_name': 'healthcare_collection'}, page_content='Disease: Malaria Description: An infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type. Immediate Action: Consult nearest hospital Medical Advice: avoid oily food Dietary Guidance: avoid non veg food Long-term Management: keep mosquitos out'),
 Document(metadata={'Disease': 'Dengue', '_id': '8320500c-c4d2-4143-9263-83fbd5e4170b', '_collection_name': 'healthcare_collection'}, page_content='Disease: Dengue Description: an acute infectious disease caused by a flavivirus (species Dengue virus of the genus Flavivirus), transmitted by aedes mosquitoes, and characterized by headache, severe joint pain, and a rash. — called also breakbone fever, dengue fever. Immediate Action: dr

In [29]:
results[0].metadata

{'Disease': 'Malaria',
 '_id': '1497e650-06e7-458c-a542-9f8815e7303f',
 '_collection_name': 'healthcare_collection'}

In [30]:
results[0].page_content

'Disease: Malaria Description: An infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type. Immediate Action: Consult nearest hospital Medical Advice: avoid oily food Dietary Guidance: avoid non veg food Long-term Management: keep mosquitos out'

In [31]:
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Disease: Malaria Description: An infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type. Immediate Action: Consult nearest hospital Medical Advice: avoid oily food Dietary Guidance: avoid non veg food Long-term Management: keep mosquitos out [{'Disease': 'Malaria', '_id': '1497e650-06e7-458c-a542-9f8815e7303f', '_collection_name': 'healthcare_collection'}]
* Disease: Dengue Description: an acute infectious disease caused by a flavivirus (species Dengue virus of the genus Flavivirus), transmitted by aedes mosquitoes, and characterized by headache, severe joint pain, and a rash. — called also breakbone fever, dengue fever. Immediate Action: drink papaya leaf juice Medical Advice: avoid fatty spicy food Dietary Guidance: keep mosquitos away Long-term Management: keep hydrated [{'Disease': 'Dengue', '_id': '8320500c-c4d2

#### Information Retrieval

In [32]:
def information_retriever(question: str) -> str:
    """
    Search and retrieve information from the Qdrant vector store.
    """
    
    # Filter the search results based on the product name
    unstructured_data_results = vector_store.similarity_search(
        query=query,
        k=3
    )
    
    return unstructured_data_results

In [33]:
information_retriever("What is malaria?")

[Document(metadata={'Disease': 'Malaria', '_id': '1497e650-06e7-458c-a542-9f8815e7303f', '_collection_name': 'healthcare_collection'}, page_content='Disease: Malaria Description: An infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type. Immediate Action: Consult nearest hospital Medical Advice: avoid oily food Dietary Guidance: avoid non veg food Long-term Management: keep mosquitos out'),
 Document(metadata={'Disease': 'Dengue', '_id': '8320500c-c4d2-4143-9263-83fbd5e4170b', '_collection_name': 'healthcare_collection'}, page_content='Disease: Dengue Description: an acute infectious disease caused by a flavivirus (species Dengue virus of the genus Flavivirus), transmitted by aedes mosquitoes, and characterized by headache, severe joint pain, and a rash. — called also breakbone fever, dengue fever. Immediate Action: dr

#### Actual Chatbot

In [34]:
from langchain_core.runnables import (
    RunnableParallel,
    RunnablePassthrough,
)

In [57]:
bot_chat_template = """
You are an AI healthcare chatbot with extensive knowledge of medical conditions and diseases. Your task is to provide accurate, concise answers to user questions based solely on the given context.

Context: 
{context}

Question: 
{question}

Instructions:
- Respond using natural language, ensuring your answer is clear and to the point.
- After your answer, explicitly state the context used without modification of the information. You are only to modify the datatype.
- If no context is provided, simply reply with: "No context was provided."

Answer:
Context Used:
"""

In [58]:
from pydantic import Field, BaseModel
from typing import List, Union

In [59]:
class ResponseFormat(BaseModel):
    """Identifying information about Products."""

    answer: str = Field(
        ...,
        description="Your response to the user query."
    )
    sources: List[Union[str| Document]] = Field(
        ...,
        description="The sources, contexts used to generate the response."
    )

In [60]:
from langchain.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [61]:
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [62]:
bot_chat_prompt = ChatPromptTemplate.from_template(bot_chat_template)

chat_chain = (
    RunnableParallel(
        {
            "context": information_retriever,
            "question": RunnablePassthrough(),
        }
    )
    | bot_chat_prompt
    | llm.with_structured_output(ResponseFormat)
)  

In [63]:
response = chat_chain.invoke("What is malaria?")

In [65]:
response

ResponseFormat(answer='Malaria is an infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type.', sources=[Document(metadata={'Disease': 'Malaria', '_id': '1497e650-06e7-458c-a542-9f8815e7303f', '_collection_name': 'healthcare_collection'}, page_content='Disease: Malaria Description: An infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type. Immediate Action: Consult nearest hospital Medical Advice: avoid oily food Dietary Guidance: avoid non veg food Long-term Management: keep mosquitos out')])

In [66]:
response.answer

'Malaria is an infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type.'

In [67]:
response.sources

[Document(metadata={'Disease': 'Malaria', '_id': '1497e650-06e7-458c-a542-9f8815e7303f', '_collection_name': 'healthcare_collection'}, page_content='Disease: Malaria Description: An infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type. Immediate Action: Consult nearest hospital Medical Advice: avoid oily food Dietary Guidance: avoid non veg food Long-term Management: keep mosquitos out')]