In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.tree import DecisionTreeClassifier
import difflib

In [11]:
def split_symptoms(symptom_string):
    return [s.strip().lower() for s in symptom_string.split(',')]

def encode_symptoms(symptom_lists):
    mlb = MultiLabelBinarizer()
    X = mlb.fit_transform(symptom_lists)
    return X, mlb

def encode_diseases(diseases):
    disease_to_num = {d: i for i, d in enumerate(diseases)}
    num_to_disease = {i: d for d, i in disease_to_num.items()}
    return disease_to_num, num_to_disease

def prepare_training_data(data):
    symptom_lists = data['Symptoms'].apply(split_symptoms)
    X, mlb = encode_symptoms(symptom_lists)
    disease_to_num, num_to_disease = encode_diseases(data['Disease'])
    y = data['Disease'].map(disease_to_num)
    return X, y, mlb, disease_to_num, num_to_disease

In [13]:
data = pd.read_csv('disease_symptom_large.csv')
print(data.head(10))

        Disease                                           Symptoms
0   Common Cold           cough, sneezing, sore throat, runny nose
1           Flu     fever, chills, muscle aches, fatigue, headache
2      Diabetes  increased thirst, frequent urination, fatigue,...
3  Hypertension  high blood pressure, headache, dizziness, nose...
4        Asthma  shortness of breath, wheezing, coughing, chest...
5      Migraine   headache, nausea, sensitivity to light, vomiting
6    Chickenpox           fever, rash, tiredness, loss of appetite
7  Tuberculosis  persistent cough, chest pain, weight loss, nig...
8       Malaria          fever, chills, sweating, headache, nausea
9        Dengue  high fever, severe headache, joint pain, rash,...


In [15]:
def train_model():
    
    X, y, mlb, disease_to_num, num_to_disease = prepare_training_data(data)
    model = DecisionTreeClassifier()
    model.fit(X, y)
    return model, mlb, data, disease_to_num, num_to_disease

def convert_input_to_vector(input_symptoms, mlb):
    input_symptoms = [s.strip().lower() for s in input_symptoms]
    input_vector = np.zeros(len(mlb.classes_))
    for s in input_symptoms:
        if s in mlb.classes_:
            index = mlb.classes_.tolist().index(s)
            input_vector[index] = 1
    return input_vector

def predict_disease(input_symptoms, model, mlb, num_to_disease):
    input_vector = convert_input_to_vector(input_symptoms, mlb)
    prediction = model.predict([input_vector])[0]
    return num_to_disease[prediction]

def calculate_bmi():
    try:
        weight = float(input("Enter your weight in kilograms: "))
        height = float(input("Enter your height in meters: "))
        
        if weight <= 0 or height <= 0:
            print("Weight and height must be positive numbers.")
            return

        bmi = weight / (height ** 2)
        print(f"Your BMI is: {bmi:.2f}")

        if bmi < 18.5:
            print("Category: Underweight")
        elif 18.5 <= bmi < 25:
            print("Category: Normal weight")
        elif 25 <= bmi < 30:
            print("Category: Overweight")
        else:
            print("Category: Obesity")

    except ValueError:
        print("Invalid input. Please enter numeric values.")


In [None]:
model, mlb, data, disease_to_num, num_to_disease = train_model()
all_known_symptoms = sorted(list(mlb.classes_))
disease_to_symptoms = data.set_index('Disease')['Symptoms'].to_dict()

name = input("Enter your name: ").strip()
age = input("Enter your age: ").strip()

while True:
    print("\nMenu:")
    print("1. Predict Disease (Provide symptoms)")
    print("2. Predict Symptoms (Provide disease)")
    print("3. Calculate BMI")
    print("4. Exit")

    choice = input("Enter your choice (1-4): ").strip()

    if choice == '1':
        print("\n========== AVAILABLE SYMPTOMS ==========")
        print(", ".join(all_known_symptoms))
        print("========================================\n")

        user_symptoms = input("Enter symptoms separated by commas: ").lower().split(',')
        user_symptoms = [s.strip() for s in user_symptoms]
    
        cleaned_symptoms = []
        for symptom in user_symptoms:
            if symptom in all_known_symptoms:
                cleaned_symptoms.append(symptom)
            else:
                close_matches = difflib.get_close_matches(symptom, all_known_symptoms, n=1, cutoff=0.6)
                if close_matches:
                    print(f"Did you mean '{close_matches[0]}' instead of '{symptom}'? [y/n]")
                    answer = input().strip().lower()
                    if answer == 'y':
                        cleaned_symptoms.append(close_matches[0])
                else:
                    print(f"'{symptom}' not recognized and no close match found.")
    
        if not cleaned_symptoms:
            print("No valid symptoms entered. Try again.")
            continue
    
        predicted = predict_disease(cleaned_symptoms, model, mlb, num_to_disease)
        print("\nBased on your symptoms, the predicted disease is:", predicted)
    
    elif choice == '2':
        diseases_list = sorted(data['Disease'].unique())
        print("\n========== AVAILABLE DISEASES ==========")
        print(", ".join(diseases_list))
        print("========================================\n")
    
        disease = input("Enter the disease name: ").strip()
        matched_disease = None
        for d in disease_to_symptoms:
            if d.lower().strip() == disease.lower():
                matched_disease = d
                break
    
        if not matched_disease:
            close_matches = difflib.get_close_matches(disease, diseases_list, n=1, cutoff=0.6)
            if close_matches:
                print(f"Did you mean '{close_matches[0]}' instead of '{disease}'? [y/n]")
                answer = input().strip().lower()
                if answer == 'y':
                    matched_disease = close_matches[0]
    
        if matched_disease:
            symptoms = disease_to_symptoms[matched_disease]
            print(f"Symptoms associated with {matched_disease}: {symptoms}")
        else:
            print("Disease not found in the dataset.")

    elif choice == '3':
        calculate_bmi()

    elif choice == '4':
        print("Exiting the program. Stay healthy!")
        break

    else:
        print("Invalid choice. Please select a valid option.")

Enter your name:  SS
Enter your age:  20



Menu:
1. Predict Disease (Provide symptoms)
2. Predict Symptoms (Provide disease)
3. Calculate BMI
4. Exit


Enter your choice (1-4):  1



appetite changes, bleeding, blurred vision, chest pain, chest tightness, chills, cough, coughing, dizziness, fatigue, fever, frequent urination, headache, high blood pressure, high fever, increased thirst, joint pain, loss of appetite, loss of interest, loss of taste, muscle aches, nausea, night sweats, nosebleeds, pale skin, persistent cough, persistent sadness, rash, reduced motion, runny nose, sensitivity to light, severe headache, shortness of breath, sneezing, sore throat, stiffness, sweating, swelling, tiredness, vomiting, weakness, weight loss, wheezing



Enter symptoms separated by commas:  cough, coughing, dizziness, feverr


Did you mean 'fever' instead of 'feverr'? [y/n]


 y



Based on your symptoms, the predicted disease is: Common Cold

Menu:
1. Predict Disease (Provide symptoms)
2. Predict Symptoms (Provide disease)
3. Calculate BMI
4. Exit


Enter your choice (1-4):  2



Anemia, Arthritis, Asthma, Chickenpox, Common Cold, Covid-19, Dengue, Depression, Diabetes, Flu, Hypertension, Malaria, Migraine, Pneumonia, Tuberculosis



Enter the disease name:  Asthma


Symptoms associated with Asthma: shortness of breath, wheezing, coughing, chest tightness

Menu:
1. Predict Disease (Provide symptoms)
2. Predict Symptoms (Provide disease)
3. Calculate BMI
4. Exit
