In [10]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.python.saved_model import builder
from tensorflow.python.saved_model.signature_def_utils import predict_signature_def
from tensorflow.python.saved_model import tag_constants

import numpy as np
import json

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import sagemaker

import boto3

In [11]:
s3 = boto3.client('s3')
s3.download_file(Bucket='medical.train.data', Key='input/dataset.csv', Filename = 'dataset.csv')

In [12]:
df = pd.read_csv('dataset.csv')
df.describe

<bound method NDFrame.describe of                                       Disease             Symptom_1  \
0                            Fungal infection               itching   
1                            Fungal infection             skin_rash   
2                            Fungal infection               itching   
3                            Fungal infection               itching   
4                            Fungal infection               itching   
...                                       ...                   ...   
4915  (vertigo) Paroymsal  Positional Vertigo              vomiting   
4916                                     Acne             skin_rash   
4917                  Urinary tract infection   burning_micturition   
4918                                Psoriasis             skin_rash   
4919                                 Impetigo             skin_rash   

                  Symptom_2              Symptom_3                  Symptom_4  \
0                 skin_rash   no

In [13]:
df = shuffle(df, random_state=2)

In [14]:
cols = [i for i in df.iloc[:,1:].columns]
cols


['Symptom_1',
 'Symptom_2',
 'Symptom_3',
 'Symptom_4',
 'Symptom_5',
 'Symptom_6',
 'Symptom_7',
 'Symptom_8',
 'Symptom_9',
 'Symptom_10',
 'Symptom_11',
 'Symptom_12',
 'Symptom_13',
 'Symptom_14',
 'Symptom_15',
 'Symptom_16',
 'Symptom_17']

In [15]:
tmp = pd.melt(df.reset_index() ,id_vars = ['index'], value_vars = cols )
tmp['add1'] = 1
tmp

Unnamed: 0,index,variable,value,add1
0,1343,Symptom_1,acidity,1
1,3343,Symptom_1,joint_pain,1
2,2213,Symptom_1,itching,1
3,4094,Symptom_1,muscle_weakness,1
4,258,Symptom_1,chills,1
...,...,...,...,...
83635,3335,Symptom_17,,1
83636,1099,Symptom_17,,1
83637,2514,Symptom_17,,1
83638,3606,Symptom_17,,1


In [16]:

diseases = pd.pivot_table(tmp, 
                          values = 'add1',
                          index = 'index',
                          columns = 'value')

diseases.insert(0,'label',df['Disease'])
diseases = diseases.fillna(0)

diseases.head()

value,label,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,...,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin,itching
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [17]:
diseaseLabels = diseases['label']
diseases = diseases.drop(columns = ['label'])

diseases.head()

value,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,bladder_discomfort,...,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin,itching
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [18]:
diseaseNames =  diseaseLabels.unique()

s = pd.Series(diseaseNames)
diseaseNamesDict =  s.to_dict()

In [19]:
diseaseNamesInvertedDict = {v: k for k, v in diseaseNamesDict.items()}
diseaseLabels = diseaseLabels.map(diseaseNamesInvertedDict)

In [20]:

x, y = diseases, diseaseLabels
x_train, x_test, y_train, y_test = train_test_split(x,y)

In [21]:
model = Sequential([
    Dense(800, activation='relu', input_shape=(len(x_train.iloc[0]),)),
    Dense(160, activation='relu'),
    Dense(41)]
)

[2021-03-21 11:40:35.913 tensorflow-2-3-cpu-py-ml-t3-medium-dbca98283d57d615662c4efa28c8:23 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2021-03-21 11:40:36.231 tensorflow-2-3-cpu-py-ml-t3-medium-dbca98283d57d615662c4efa28c8:23 INFO profiler_config_parser.py:102] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.


In [22]:
model.compile(optimizer= 'adam', #tf.keras.optimizers.RMSprop(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [23]:
model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f93e4f0d550>

In [24]:
test_loss, test_acc = model.evaluate(x_test,  y_test, verbose=2)

print('\nTest accuracy:', test_acc)

39/39 - 0s - loss: 1.9565e-04 - accuracy: 1.0000

Test accuracy: 1.0


In [34]:
num_examples = len(x_test)
probability_model = tf.keras.Sequential([model,
                                         tf.keras.layers.Softmax()])
predictions = probability_model.predict(x_test[:num_examples])


guessed = 0

leastAccuracy = 1

for i in range(num_examples):
    accuracy = np.max(predictions[i])
    if accuracy < leastAccuracy :
        leastAccuracy = accuracy
    
#     print('Predicted val: ', np.argmax(predictions[i]))
#     print('Actual val: ',y_test.iloc[i])
#     print('Expected accuracy: ',100*np.max(predictions[i]), '%')
#     print()

print('least accuracy: ',100*leastAccuracy, '%')

least accuracy:  99.25748109817505 %
