# Identifying the disease

In [1]:
from google.colab import files
files.upload()
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [2]:
!kaggle datasets download -d itachi9604/disease-symptom-description-dataset

!unzip disease-symptom-description-dataset

Downloading disease-symptom-description-dataset.zip to /content
  0% 0.00/30.1k [00:00<?, ?B/s]
100% 30.1k/30.1k [00:00<00:00, 25.4MB/s]
Archive:  disease-symptom-description-dataset.zip
  inflating: Symptom-severity.csv    
  inflating: dataset.csv             
  inflating: symptom_Description.csv  
  inflating: symptom_precaution.csv  


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras import losses, optimizers, models, layers, callbacks
from sklearn.preprocessing import label_binarize

In [4]:
df = pd.read_csv('/content/dataset.csv')
df

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,,,,,,,,,,,
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,,,,,,,,,,,,,
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,,,,,,,,,,,,,
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,,,,,,,,,,,


In [5]:
diseases = ['Acne','Common Cold','Diabetes ','Heart attack','Hyperthyroidism','Hypothyroidism','Malaria','Pneumonia']

#diseases = df['Disease'].unique().tolist()

In [6]:
df = df[df['Disease'].isin(diseases)]
df = df.drop_duplicates(ignore_index=True)
df['Disease'].value_counts()

Pneumonia          9
Common Cold        9
Hyperthyroidism    9
Diabetes           9
Hypothyroidism     8
Malaria            8
Acne               5
Heart attack       5
Name: Disease, dtype: int64

In [7]:
symptoms = []
for i in range(1, 18):
    symptoms.extend(df['Symptom_'+str(i)].values.tolist())

symptoms = list(set(symptoms))
symptoms.remove(np.nan)

symptoms = [x.strip() for x in symptoms]

len(symptoms), symptoms

(49,
 ['cold_hands_and_feets',
  'skin_rash',
  'pus_filled_pimples',
  'dizziness',
  'chest_pain',
  'excessive_hunger',
  'malaise',
  'headache',
  'blackheads',
  'lethargy',
  'high_fever',
  'weight_loss',
  'scurring',
  'muscle_weakness',
  'enlarged_thyroid',
  'obesity',
  'increased_appetite',
  'puffy_face_and_eyes',
  'fatigue',
  'sinus_pressure',
  'swelled_lymph_nodes',
  'runny_nose',
  'diarrhoea',
  'throat_irritation',
  'continuous_sneezing',
  'irritability',
  'vomiting',
  'loss_of_smell',
  'muscle_pain',
  'swollen_extremeties',
  'abnormal_menstruation',
  'depression',
  'redness_of_eyes',
  'rusty_sputum',
  'nausea',
  'phlegm',
  'congestion',
  'fast_heart_rate',
  'blurred_and_distorted_vision',
  'restlessness',
  'irregular_sugar_level',
  'cough',
  'brittle_nails',
  'weight_gain',
  'polyuria',
  'chills',
  'sweating',
  'mood_swings',
  'breathlessness'])

In [8]:
X = np.empty((0, len(symptoms)))
y = np.empty((0, len(diseases)))

for i in range(len(df)):
    x = np.zeros((1, len(symptoms)))
    y = np.append(y, label_binarize([diseases.index(df.iloc[i,0])], classes=range(len(diseases))), axis=0)
    for s in df.iloc[i,:].values[1:]:
        if isinstance(s, str):
            x[0,symptoms.index(s.strip())] = 1
    X = np.append(X, x, axis = 0)
    #print(X, y)

print(X.shape, y.shape)

(62, 49) (62, 8)


In [9]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.2, random_state = 13)

In [10]:
model = models.Sequential([
    layers.Dense(16, activation='relu', input_shape=Xtrain[0].shape),
    layers.Dense(8, activation='relu'),
    layers.Dense(len(diseases), activation='softmax')
])

cb = [callbacks.EarlyStopping(patience=5, restore_best_weights=True)]

In [11]:
model.compile(loss=losses.CategoricalCrossentropy(),
              optimizer=optimizers.Adam(0.01),
              metrics=['accuracy'])

model.fit(Xtrain, ytrain, validation_data=(Xtest, ytest), epochs=256, callbacks=cb)

Epoch 1/256
Epoch 2/256
Epoch 3/256
Epoch 4/256
Epoch 5/256
Epoch 6/256
Epoch 7/256
Epoch 8/256
Epoch 9/256
Epoch 10/256
Epoch 11/256
Epoch 12/256
Epoch 13/256
Epoch 14/256
Epoch 15/256
Epoch 16/256
Epoch 17/256
Epoch 18/256
Epoch 19/256
Epoch 20/256
Epoch 21/256
Epoch 22/256
Epoch 23/256
Epoch 24/256
Epoch 25/256
Epoch 26/256
Epoch 27/256
Epoch 28/256
Epoch 29/256
Epoch 30/256


<tensorflow.python.keras.callbacks.History at 0x7fe29eccb290>

In [12]:
model.evaluate(X, y)



[0.11392734944820404, 1.0]

In [13]:
model.save('disease.h5')