In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd

df1=pd.read_csv("/content/drive/MyDrive/Datasets/disease_symptoms.csv")

df1.head(10)

Unnamed: 0,Disease,Symptoms
0,Fungal infection,"itching, skin_rash, nodal_skin_eruptions, d..."
1,Fungal infection,"skin_rash, nodal_skin_eruptions, dischromic..."
2,Fungal infection,"itching, nodal_skin_eruptions, dischromic _p..."
3,Fungal infection,"itching, skin_rash, dischromic _patches"
4,Fungal infection,"itching, skin_rash, nodal_skin_eruptions"
5,Allergy,"continuous_sneezing, shivering, chills, wa..."
6,Allergy,"shivering, chills, watering_from_eyes"
7,Allergy,"continuous_sneezing, chills, watering_from_..."
8,Allergy,"continuous_sneezing, shivering, watering_fr..."
9,Allergy,"continuous_sneezing, shivering, chills"


In [3]:
df2=pd.read_csv("/content/drive/MyDrive/Datasets/disease_medicine_dosage.csv")

df2.head(10)

Unnamed: 0,Disease,Medicine,Dosage
0,Fungal infection,Clotrimazole,Apply twice daily
1,Allergy,Cetirizine,10 mg once daily
2,GERD,Omeprazole,20 mg before breakfast
3,Chronic cholestasis,Ursodeoxycholic acid,300 mg twice daily
4,Drug Reaction,Prednisolone,10-20 mg daily
5,Peptic ulcer disease,Ranitidine,150 mg twice daily
6,AIDS,Tenofovir + Lamivudine + Efavirenz,As prescribed by doctor
7,Diabetes,Metformin,500 mg twice daily
8,Gastroenteritis,ORS + Loperamide,"ORS after each loose stool, Loperamide 4 mg on..."
9,Bronchial Asthma,Salbutamol inhaler,2 puffs every 6 hours as needed


In [4]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 304 entries, 0 to 303
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Disease   304 non-null    object
 1   Symptoms  304 non-null    object
dtypes: object(2)
memory usage: 4.9+ KB


In [5]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Disease   41 non-null     object
 1   Medicine  41 non-null     object
 2   Dosage    41 non-null     object
dtypes: object(3)
memory usage: 1.1+ KB


In [6]:
df1.isnull().sum()

Unnamed: 0,0
Disease,0
Symptoms,0


In [7]:
df2.isnull().sum()

Unnamed: 0,0
Disease,0
Medicine,0
Dosage,0


In [8]:
df1.duplicated().sum()

0

In [9]:
df2.duplicated().sum()

0

In [10]:
!pip install chromadb
!pip install sentence-transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transforme

In [11]:
import chromadb
from sentence_transformers import SentenceTransformer

client = chromadb.Client()
collection_name = "disease_collection"
collection = client.get_or_create_collection(name=collection_name, metadata={"distance_function": "cosine"})

model = SentenceTransformer('all-mpnet-base-v2')

for index, row in df1.iterrows():
    disease = row["Disease"]
    symptoms = row["Symptoms"]
    embedding = model.encode(symptoms).tolist()
    medicine_info = df2[df2["Disease"] == disease]

    if not medicine_info.empty:
        medicine = medicine_info.iloc[0]["Medicine"]
        dosage = medicine_info.iloc[0]["Dosage"]
    else:
        medicine = "No specific medicine found"
        dosage = "Consult a doctor"

    collection.add(
        embeddings=[embedding],
        documents=[symptoms],
        metadatas=[{"Disease": disease, "Medicine": medicine, "Dosage": dosage}],
        ids=[str(index)]
    )

In [13]:
def get_disease_medicine(user_symptoms):
    query_embedding = model.encode(user_symptoms).tolist()
    results = collection.query(query_embeddings=[query_embedding], n_results=1)

    if results and results["metadatas"]:
        metadata = results["metadatas"][0][0]
        return metadata["Disease"], metadata["Medicine"], metadata["Dosage"]
    else:
        return "No matching disease found", "No medicine available", "Consult a doctor"

while(True):
  user_input = input("Enter the symptoms: ")
  if user_input == "exit":
    break
  disease, medicine, dosage = get_disease_medicine(user_input)
  print(f"Disease: {disease}\nMedicine: {medicine}\nDosage: {dosage}")

Enter the symptoms: high_fever,blister,red_sore_around_nose
Disease: Impetigo
Medicine: Mupirocin ointment
Dosage: Apply three times daily
Enter the symptoms: exit
