In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util

  from .autonotebook import tqdm as notebook_tqdm





In [10]:
import time
print("Loading data file now, this could take a while depending on file size")
start = time.time()
df = pd.read_csv('Diseases_Symptoms.csv')
end = time.time()
print("Loading took " + str(round(end - start, 2)) + " seconds")
missing_values = df.isna().sum().sum()
duplicated_values = df.duplicated().sum()
print(f'\nMissing values: {missing_values}')
print(f'Duplicated values: {duplicated_values}')
if missing_values >= 1:
    print('\nMissing values by column:')
    print(df.isna().sum())
print("\nUnique Values in Each Column:")
print(df.nunique())

Loading data file now, this could take a while depending on file size
Loading took 0.01 seconds

Missing values: 1
Duplicated values: 0

Missing values by column:
Code          0
Name          0
Symptoms      0
Treatments    1
dtype: int64

Unique Values in Each Column:
Code          400
Name          392
Symptoms      395
Treatments    386
dtype: int64


In [None]:
df.head()

Unnamed: 0,Code,Name,Symptoms,Treatments
0,1,Panic disorder,"Palpitations, Sweating, Trembling, Shortness o...","Antidepressant medications, Cognitive Behavior..."
1,2,Vocal cord polyp,"Hoarseness, Vocal Changes, Vocal Fatigue","Voice Rest, Speech Therapy, Surgical Removal"
2,3,Turner syndrome,"Short stature, Gonadal dysgenesis, Webbed neck...","Growth hormone therapy, Estrogen replacement t..."
3,4,Cryptorchidism,"Absence or undescended testicle(s), empty scro...",Observation and monitoring (in cases of mild o...
4,5,Ethylene glycol poisoning-1,"Nausea, vomiting, abdominal pain, General mala...","Supportive Measures, Gastric Decontamination, ..."


In [5]:
model = SentenceTransformer('all-MiniLM-L6-v2')
df['Symptom_Embedding'] = df['Symptoms'].apply(lambda x: model.encode(x))

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [6]:
df.head()

Unnamed: 0,Code,Name,Symptoms,Treatments,Symptom_Embedding
0,1,Panic disorder,"Palpitations, Sweating, Trembling, Shortness o...","Antidepressant medications, Cognitive Behavior...","[0.07893878, -0.027037544, 0.07101002, 0.09278..."
1,2,Vocal cord polyp,"Hoarseness, Vocal Changes, Vocal Fatigue","Voice Rest, Speech Therapy, Surgical Removal","[0.041374017, -0.05340638, 0.04670723, 0.02566..."
2,3,Turner syndrome,"Short stature, Gonadal dysgenesis, Webbed neck...","Growth hormone therapy, Estrogen replacement t...","[0.016158279, 0.07126421, 0.035114612, -0.0060..."
3,4,Cryptorchidism,"Absence or undescended testicle(s), empty scro...",Observation and monitoring (in cases of mild o...,"[0.04706858, 0.010907559, -0.08268524, 0.04357..."
4,5,Ethylene glycol poisoning-1,"Nausea, vomiting, abdominal pain, General mala...","Supportive Measures, Gastric Decontamination, ...","[0.058652643, -0.03139774, -0.018973239, 0.066..."


In [31]:
def find_condition_by_symptoms(input_symptoms):
    # Generate embedding for the input symptoms
    input_embedding = model.encode(input_symptoms)
    # Calculate similarity scores with each condition
    df['Similarity'] = df['Symptom_Embedding'].apply(lambda x: util.cos_sim(input_embedding, x).item())
    # Find the most similar condition
    best_match = df.loc[df['Similarity'].idxmax()]
    return best_match['Name'], best_match['Treatments']

In [32]:
input_symptoms = "Fever, Weight loss, Loss of appetite"
condition_name, treatments = find_condition_by_symptoms(input_symptoms)

print("Condition:", condition_name)
print("Recommended Treatments:", treatments)

Condition: Chagas Disease
Recommended Treatments: Antiparasitic medications, medications to manage symptoms, supportive care


In [33]:
input_symptoms = "Itchy rash, Sneezing, Runny nose, Watery eyes, Cough, Shortness of breath, Wheezing, Fatigue"
condition_name, treatments = find_condition_by_symptoms(input_symptoms)

print("Condition:", condition_name)
print("Recommended Treatments:", treatments)

Condition: Food Allergy
Recommended Treatments: Avoidance of allergenic food, antihistamines, epinephrine (in severe cases)


In [34]:
input_symptoms = "Abdominal pain, Diarrhea, Nausea, Vomiting, Fever, Loss of appetite, Fatigue, Dehydration"
condition_name, treatments = find_condition_by_symptoms(input_symptoms)

print("Condition:", condition_name)
print("Recommended Treatments:", treatments)

Condition: Gastroenteritis (Stomach Flu)
Recommended Treatments: Rest, fluids (electrolyte solutions), bland diet, avoiding irritating foods, over-the-counter medications for symptom relief
