# Disease Prediction

## Import Libraries

In [60]:
import numpy as np
import pandas as pd 
import pickle
import ipysheet
import ipyaggrid
import qgrid
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

## Loading Dataset

In [61]:
df_main = pd.read_csv('dataset.csv')
df_severity = pd.read_csv('Symptom-severity.csv')
df_description = pd.read_csv('symptom_Description.csv')
df_precaution = pd.read_csv('symptom_precaution.csv')

In [74]:
df_main.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,


## Diseases Data

In [62]:
uni_diseases = list(df_main['Disease'].unique())
uni_diseases.sort()

In [63]:
uni_symptoms = list(df_severity['Symptom'].unique())
uni_symptoms.sort()

## Data Preprocessing

In [64]:
diseases = df_main[['Disease']]

In [65]:
diseases = diseases.reindex(columns = diseases.columns.tolist() + uni_symptoms)

In [66]:
for row in range(4920):
    columns = df_main.loc[row].tolist()[1:]
    columns = [x.replace(" ","") for x in columns if pd.isnull(x) == False]
    for col in columns:
        diseases.at[row, col] = 1

In [67]:
diseases = diseases.fillna(0)

In [68]:
ds_train = diseases.sample(frac = 0.7, random_state = 42)
ds_test = diseases.drop(index = ds_train.index)

x_train, y_train  =  ds_train.drop('Disease', axis = 1), ds_train['Disease']
x_test, y_test = ds_test.drop('Disease', axis = 1), ds_test['Disease']

x_test = x_test.reset_index(drop=True)

In [73]:
x_train.head()

Unnamed: 0,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,bladder_discomfort,...,vomiting,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin
373,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1550,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3081,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3857,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0


## Training Model

In [69]:
model = RandomForestClassifier()
model.fit(x_train, y_train)

RandomForestClassifier()

## Testing Model

In [70]:
result = model.predict(x_test)
print(classification_report(y_true=y_test.values, y_pred=result))

                                         precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        43
                                   AIDS       1.00      1.00      1.00        36
                                   Acne       1.00      1.00      1.00        33
                    Alcoholic hepatitis       1.00      1.00      1.00        38
                                Allergy       1.00      1.00      1.00        34
                              Arthritis       1.00      1.00      1.00        34
                       Bronchial Asthma       1.00      1.00      1.00        33
                   Cervical spondylosis       1.00      1.00      1.00        33
                            Chicken pox       1.00      1.00      1.00        33
                    Chronic cholestasis       1.00      1.00      1.00        33
                            Common Cold       1.00      1.00      1.00        44
                           

## Save the model and data

In [71]:
# save the model to disk
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [72]:
# save the data to disk
filename = 'data.pck'
data = {'Diseases':uni_diseases, 'Symptoms': uni_symptoms}
pickle.dump(data, open(filename, 'wb'))