## Preparing of the dataset and loading all necessary tools

In [3]:
import pandas as pd

In [4]:
dataset = pd.read_csv("dataset/Training.csv")

In [5]:
dataset.head() #showing the first 5 columns for example of entire training dataset

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [6]:
dataset.shape #total of 133 symptoms and lots of combined diseases according to the symptoms

(4920, 133)

In [7]:
dataset['prognosis'].unique() #names of all unique diseases

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

In [8]:
len(dataset['prognosis'].unique()) #total no. of unique diseases

41

## Train/Test split

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [10]:
X = dataset.drop('prognosis',axis=1) #columnwise delete
y = dataset['prognosis'] 

In [11]:
#converting "Y" dataset to numpy arrary and lable encode unique values ie.Symptoms
le = LabelEncoder()
le.fit(y)
Y = le.transform(y)

In [12]:
y # before label encoding

0                              Fungal infection
1                              Fungal infection
2                              Fungal infection
3                              Fungal infection
4                              Fungal infection
                         ...                   
4915    (vertigo) Paroymsal  Positional Vertigo
4916                                       Acne
4917                    Urinary tract infection
4918                                  Psoriasis
4919                                   Impetigo
Name: prognosis, Length: 4920, dtype: object

In [13]:
Y # after label endcoding (machine_readable)

array([15, 15, 15, ..., 38, 35, 27], shape=(4920,))

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.3, random_state=20) #training 70% of data for training set and remaining 30% for test

In [15]:
X_train.shape #70% split

(3444, 132)

In [16]:
X_test.shape #30% split

(1476, 132)

In [17]:
y_train.shape, y_test.shape  #70-30% split

((3444,), (1476,))

## Training top models

In [18]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB #all the algorithms used

from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

# Create a dictionary to store models
models = {
    "SVC": SVC(kernel='linear'),
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
    "GradientBoosting": GradientBoostingClassifier(n_estimators=100, random_state=42),
    "KNeighbors": KNeighborsClassifier(n_neighbors=5),
    "MultinomialNB": MultinomialNB()
}

# Loop through the models, train, test, and print results
for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Test the model
    predictions = model.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions)

    # Calculate confusion matrix
    cm = confusion_matrix(y_test, predictions)

    print(f"{model_name} accuracy : {accuracy}")
    print(f"{model_name} Confusion Matrix:")
    print(np.array2string(cm, separator=', '))

SVC accuracy : 1.0
SVC Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
RandomForest accuracy : 1.0
RandomForest Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
GradientBoosting accuracy : 1.0
GradientBoosting Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,  0,  0, 31]]
KNeighbors accuracy : 1.0
KNeighbors Confusion Matrix:
[[40,  0,  0, ...,  0,  0,  0],
 [ 0, 43,  0, ...,  0,  0,  0],
 [ 0,  0, 28, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 34,  0,  0],
 [ 0,  0,  0, ...,  0, 41,  0],
 [ 0,  0,  0, ...,

**Single Prediction**

In [19]:
# selecting svc
svc = SVC(kernel='linear')
svc.fit(X_train,y_train)
ypred = svc.predict(X_test)
accuracy_score(y_test,ypred)

1.0

In [20]:
import pickle
pickle.dump(svc,open('models/svc.pkl','wb'))

In [21]:
svc = pickle.load(open('models/svc.pkl','rb'))

In [22]:
#test-1
print("Predicted Model:", svc.predict(X_test.iloc[0].values.reshape(1, -1)))  # Extracting first element from prediction
print("Actual Model:", y_test[0])

Predicted Model: [40]
Actual Model: 40




In [23]:
#test-2
print("Predicted Model:", svc.predict(X_test.iloc[10].values.reshape(1, -1)))  # Extracting first element from prediction
print("Actual Model:", y_test[10])

Predicted Model: [20]
Actual Model: 20




## Load dataset and utilise the logic for the prediction

In [24]:
sys_def = pd.read_csv("dataset/symptoms.csv")
precautions = pd.read_csv("dataset/precautions.csv")
advices = pd.read_csv("dataset/advices.csv")
description = pd.read_csv("dataset/description.csv")
medications = pd.read_csv("dataset/medications.csv")
diets = pd.read_csv("dataset/diets.csv")

In [25]:
medications

Unnamed: 0,Disease,Medication_1,Medication_2,Medication_3,Medication_4,Medication_5
0,Fungal infection,Antifungal Cream,Fluconazole,Terbinafine,Clotrimazole,Ketoconazole
1,Allergy,Antihistamines,Decongestants,Epinephrine,Corticosteroids,Immunotherapy
2,GERD,Proton Pump Inhibitors (PPIs),H2 Blockers,Antacids,Prokinetics,Antibiotics
3,Chronic cholestasis,Ursodeoxycholic acid,Cholestyramine,Methotrexate,Corticosteroids,Liver transplant
4,Drug Reaction,Antihistamines,Epinephrine,Corticosteroids,Antibiotics,Antifungal Cream
5,Peptic ulcer disease,Antibiotics,Proton Pump Inhibitors (PPIs),H2 Blockers,Antacids,Cytoprotective agents
6,AIDS,Antiretroviral drugs,Protease inhibitors,Integrase inhibitors,Entry inhibitors,Fusion inhibitors
7,Diabetes,Insulin,Metformin,Sulfonylureas,DPP-4 inhibitors,GLP-1 receptor agonists
8,Gastroenteritis,Antibiotics,Antiemetic drugs,Antidiarrheal drugs,IV fluids,Probiotics
9,Bronchial Asthma,Bronchodilators,Inhaled corticosteroids,Leukotriene modifiers,Mast cell stabilizers,Anticholinergics


In [26]:
def helper(dis):
    desc = description[description['Disease'] == predicted_disease]['Description']
    desc = " ".join([w for w in desc])

    pre = precautions[precautions['Disease'] == dis][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
    pre = [col for col in pre.values]

    med = medications[medications['Disease'] == dis][['Medication_1','Medication_2','Medication_3','Medication_4','Medication_5']]
    med = [bol for bol in med.values]

    die = diets[diets['Disease'] == dis][['Diet_1','Diet_2','Diet_3','Diet_4','Diet_5']]
    die = [mol for mol in die.values]

    adv = advices[advices['Disease'] == dis] ['Advices']


    return desc,pre,med,die,adv

In [27]:
symptoms_dict = {'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4, 'chills': 5, 'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9, 'muscle_wasting': 10, 'vomiting': 11, 'burning_micturition': 12, 'spotting_ urination': 13, 'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 'cold_hands_and_feets': 17, 'mood_swings': 18, 'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 'patches_in_throat': 22, 'irregular_sugar_level': 23, 'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 'breathlessness': 27, 'sweating': 28, 'dehydration': 29, 'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 'dark_urine': 33, 'nausea': 34, 'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 'constipation': 38, 'abdominal_pain': 39, 'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 'yellowing_of_eyes': 43, 'acute_liver_failure': 44, 'fluid_overload': 45, 'swelling_of_stomach': 46, 'swelled_lymph_nodes': 47, 'malaise': 48, 'blurred_and_distorted_vision': 49, 'phlegm': 50, 'throat_irritation': 51, 'redness_of_eyes': 52, 'sinus_pressure': 53, 'runny_nose': 54, 'congestion': 55, 'chest_pain': 56, 'weakness_in_limbs': 57, 'fast_heart_rate': 58, 'pain_during_bowel_movements': 59, 'pain_in_anal_region': 60, 'bloody_stool': 61, 'irritation_in_anus': 62, 'neck_pain': 63, 'dizziness': 64, 'cramps': 65, 'bruising': 66, 'obesity': 67, 'swollen_legs': 68, 'swollen_blood_vessels': 69, 'puffy_face_and_eyes': 70, 'enlarged_thyroid': 71, 'brittle_nails': 72, 'swollen_extremeties': 73, 'excessive_hunger': 74, 'extra_marital_contacts': 75, 'drying_and_tingling_lips': 76, 'slurred_speech': 77, 'knee_pain': 78, 'hip_joint_pain': 79, 'muscle_weakness': 80, 'stiff_neck': 81, 'swelling_joints': 82, 'movement_stiffness': 83, 'spinning_movements': 84, 'loss_of_balance': 85, 'unsteadiness': 86, 'weakness_of_one_body_side': 87, 'loss_of_smell': 88, 'bladder_discomfort': 89, 'foul_smell_of urine': 90, 'continuous_feel_of_urine': 91, 'passage_of_gases': 92, 'internal_itching': 93, 'toxic_look_(typhos)': 94, 'depression': 95, 'irritability': 96, 'muscle_pain': 97, 'altered_sensorium': 98, 'red_spots_over_body': 99, 'belly_pain': 100, 'abnormal_menstruation': 101, 'dischromic _patches': 102, 'watering_from_eyes': 103, 'increased_appetite': 104, 'polyuria': 105, 'family_history': 106, 'mucoid_sputum': 107, 'rusty_sputum': 108, 'lack_of_concentration': 109, 'visual_disturbances': 110, 'receiving_blood_transfusion': 111, 'receiving_unsterile_injections': 112, 'coma': 113, 'stomach_bleeding': 114, 'distention_of_abdomen': 115, 'history_of_alcohol_consumption': 116, 'fluid_overload.1': 117, 'blood_in_sputum': 118, 'prominent_veins_on_calf': 119, 'palpitations': 120, 'painful_walking': 121, 'pus_filled_pimples': 122, 'blackheads': 123, 'scurring': 124, 'skin_peeling': 125, 'silver_like_dusting': 126, 'small_dents_in_nails': 127, 'inflammatory_nails': 128, 'blister': 129, 'red_sore_around_nose': 130, 'yellow_crust_ooze': 131}
diseases_list = {15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic cholestasis', 14: 'Drug Reaction', 33: 'Peptic ulcer diseae', 1: 'AIDS', 12: 'Diabetes ', 17: 'Gastroenteritis', 6: 'Bronchial Asthma', 23: 'Hypertension ', 30: 'Migraine', 7: 'Cervical spondylosis', 32: 'Paralysis (brain hemorrhage)', 28: 'Jaundice', 29: 'Malaria', 8: 'Chicken pox', 11: 'Dengue', 37: 'Typhoid', 40: 'hepatitis A', 19: 'Hepatitis B', 20: 'Hepatitis C', 21: 'Hepatitis D', 22: 'Hepatitis E', 3: 'Alcoholic hepatitis', 36: 'Tuberculosis', 10: 'Common Cold', 34: 'Pneumonia', 13: 'Dimorphic hemmorhoids(piles)', 18: 'Heart attack', 39: 'Varicose veins', 26: 'Hypothyroidism', 24: 'Hyperthyroidism', 25: 'Hypoglycemia', 31: 'Osteoarthristis', 5: 'Arthritis', 0: '(vertigo) Paroymsal  Positional Vertigo', 2: 'Acne', 38: 'Urinary tract infection', 35: 'Psoriasis', 27: 'Impetigo'}

# Model Prediction function
def get_predicted_value(patient_symptoms):
    input_vector = np.zeros(len(symptoms_dict))
    for item in patient_symptoms:
        input_vector[symptoms_dict[item]] = 1
    return diseases_list[svc.predict([input_vector])[0]]
    

In [28]:
# Test 1
# Split the user's input into a list of symptoms (assuming they are comma-separated) # itching,skin_rash,nodal_skin_eruptions
symptoms = input("Enter your symptoms:")
user_symptoms = [s.strip() for s in symptoms.split(',')] # Remove any extra characters, if any
user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]
predicted_disease = get_predicted_value(user_symptoms)

desc, pre, med, die, adv = helper(predicted_disease)
print("Predicted Disease:")
print(predicted_disease)
print("Description:")
print(desc)
print("Precautions:")
i = 1
for p_i in pre[0]:
    print(i, ": ", p_i)
    i += 1

print("Medications:")
for m_i in med[0]:
    print(i, ": ", m_i)
    i += 1

print("Advices:")
for a_i in adv:
    print(i, ": ", a_i)
    i += 1

print("Diet:")
for d_i in die[0]:
    print(i, ": ", d_i)
    i += 1


Predicted Disease:
Gastroenteritis
Description:
Gastroenteritis is an inflammation of the stomach and intestines, typically caused by a virus or bacteria.
Precautions:
1 :  stop eating solid food for while
2 :  try taking small sips of water
3 :  rest
4 :  ease back into eating
Medications:
5 :  Antibiotics
6 :  Antiemetic drugs
7 :  Antidiarrheal drugs
8 :  IV fluids
9 :  Probiotics
Advices:
10 :  Stay hydrated
11 :  Consume clear fluids
12 :  Follow the BRAT diet (bananas, rice, applesauce, toast)
13 :  Include bland foods
14 :  Avoid fatty and greasy foods
15 :  Limit caffeine and alcohol
16 :  Avoid spicy foods
17 :  Consult a healthcare professional
18 :  Gradually reintroduce solid foods
19 :  Avoid dairy products
Diet:
20 :  Bland Diet
21 :  Bananas
22 :  Rice
23 :  Applesauce
24 :  Toast




In [29]:
# continued with pycharm/flask app
import sklearn
print(sklearn.__version__)

1.6.1


In [30]:
import sys
print(sys.version)


3.12.3 (main, Feb  4 2025, 14:48:35) [GCC 13.3.0]


In [None]:
import pandas as pd

# Load the CSV file
file_path = "dataset/medications.csv"
df = pd.read_csv(file_path)

# Display the first few rows to understand its structure
df.head()


FileNotFoundError: [Errno 2] No such file or directory: 'dataset/Medications.csv'