In [19]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.python.saved_model import builder
from tensorflow.python.saved_model import tag_constants

import numpy as np
import json

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import sagemaker

import boto3

In [20]:
s3 = boto3.client('s3')
s3.download_file(Bucket='medical.train.data', Key='input/dataset.csv', Filename = 'dataset.csv')

In [21]:
df = pd.read_csv('dataset.csv')
df.head

<bound method NDFrame.head of                                       Disease             Symptom_1  \
0                            Fungal infection               itching   
1                            Fungal infection             skin_rash   
2                            Fungal infection               itching   
3                            Fungal infection               itching   
4                            Fungal infection               itching   
5                            Fungal infection             skin_rash   
6                            Fungal infection               itching   
7                            Fungal infection               itching   
8                            Fungal infection               itching   
9                            Fungal infection               itching   
10                                    Allergy   continuous_sneezing   
11                                    Allergy             shivering   
12                                    Allergy  

In [22]:
df = shuffle(df, random_state=2)

In [23]:
cols = [i for i in df.iloc[:,1:].columns]
cols


['Symptom_1',
 'Symptom_2',
 'Symptom_3',
 'Symptom_4',
 'Symptom_5',
 'Symptom_6',
 'Symptom_7',
 'Symptom_8',
 'Symptom_9',
 'Symptom_10',
 'Symptom_11',
 'Symptom_12',
 'Symptom_13',
 'Symptom_14',
 'Symptom_15',
 'Symptom_16',
 'Symptom_17']

In [24]:
tmp = pd.melt(df.reset_index() ,id_vars = ['index'], value_vars = cols )
tmp['add1'] = 1
tmp

Unnamed: 0,index,variable,value,add1
0,1343,Symptom_1,acidity,1
1,3343,Symptom_1,joint_pain,1
2,2213,Symptom_1,itching,1
3,4094,Symptom_1,muscle_weakness,1
4,258,Symptom_1,chills,1
5,4244,Symptom_1,fatigue,1
6,2841,Symptom_1,back_pain,1
7,4004,Symptom_1,chills,1
8,4788,Symptom_1,fatigue,1
9,2874,Symptom_1,itching,1


In [25]:

diseases = pd.pivot_table(tmp, 
                          values = 'add1',
                          index = 'index',
                          columns = 'value')

diseases.insert(0,'label',df['Disease'])
diseases = diseases.fillna(0)

diseases.head()

value,label,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,...,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin,itching
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,Fungal infection,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [26]:
diseaseLabels = diseases['label']
diseases = diseases.drop(columns = ['label'])

diseases.head()

value,abdominal_pain,abnormal_menstruation,acidity,acute_liver_failure,altered_sensorium,anxiety,back_pain,belly_pain,blackheads,bladder_discomfort,...,watering_from_eyes,weakness_in_limbs,weakness_of_one_body_side,weight_gain,weight_loss,yellow_crust_ooze,yellow_urine,yellowing_of_eyes,yellowish_skin,itching
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [27]:
diseaseNames =  diseaseLabels.unique()

s = pd.Series(diseaseNames)
diseaseNamesDict =  s.to_dict()

In [28]:
diseaseNamesInvertedDict = {v: k for k, v in diseaseNamesDict.items()}
diseaseLabels = diseaseLabels.map(diseaseNamesInvertedDict)

In [29]:

x, y = diseases, diseaseLabels
x_train, x_test, y_train, y_test = train_test_split(x,y)


In [30]:
model = Sequential([
    Dense(800, activation='relu', input_shape=(len(x_train.iloc[0]),)),
    Dense(160, activation='relu'),
    Dense(41)]
)

In [31]:
model.compile(optimizer= 'adam', #tf.keras.optimizers.RMSprop(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [32]:
model.fit(x_train, y_train, epochs=10)

Train on 3690 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fb0ec75bdd0>

In [33]:
test_loss, test_acc = model.evaluate(x_test,  y_test, verbose=2)

print('\nTest accuracy:', test_acc)

1230/1230 - 0s - loss: 1.7469e-04 - acc: 1.0000

Test accuracy: 1.0


In [34]:


num_examples = len(x_test)
probability_model = tf.keras.Sequential([model,
                                         tf.keras.layers.Softmax()])

predictions = probability_model.predict(x_test[:num_examples])


guessed = 0

leastAccuracy = 1

for i in range(num_examples):
    accuracy = np.max(predictions[i])
    if accuracy < leastAccuracy :
        leastAccuracy = accuracy
    
#     print('Predicted val: ', np.argmax(predictions[i]))
#     print('Actual val: ',y_test.iloc[i])
#     print('Expected accuracy: ',100*np.max(predictions[i]), '%')
#     print()

print('least accuracy: ',100*leastAccuracy, '%')

least accuracy:  99.32799935340881 %


In [35]:
model_version = '1'
export_dir = 'export/Servo/' + model_version

In [36]:
builder = builder.SavedModelBuilder(export_dir)

In [37]:
from tensorflow.compat.v1.saved_model import predict_signature_def
from tensorflow.python.saved_model import tag_constants

In [38]:
signature = predict_signature_def(
    inputs={"inputs": probability_model.input}, outputs={"score": probability_model.output})


# signature = predict_signature_def(
#     inputs={"inputs": model.input}, outputs={"score": model.output})



Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.


In [39]:
from tensorflow.keras import backend as K

with K.get_session() as sess:
    # Save the meta graph and variables
    builder.add_meta_graph_and_variables(
        sess=sess, tags=[tag_constants.SERVING], signature_def_map={"serving_default": signature})
    builder.save()

INFO:tensorflow:No assets to save.


INFO:tensorflow:No assets to save.


INFO:tensorflow:No assets to write.


INFO:tensorflow:No assets to write.


INFO:tensorflow:SavedModel written to: export/Servo/1/saved_model.pb


INFO:tensorflow:SavedModel written to: export/Servo/1/saved_model.pb


In [40]:
import tarfile
with tarfile.open('model.tar.gz', mode='w:gz') as archive:
    archive.add('export', recursive=True)

In [41]:
sagemaker_session = sagemaker.Session()
inputs = sagemaker_session.upload_data(path='model.tar.gz', key_prefix='model')

In [42]:
from sagemaker.tensorflow.model import TensorFlowModel
from sagemaker import get_execution_role

role = get_execution_role()

# needed for api call
!touch train.py

sagemaker_model = TensorFlowModel(model_data = 's3://' + sagemaker_session.default_bucket() + '/model/model.tar.gz',
                                  role = role,
                                  framework_version = '1.12',
                                  entry_point = 'train.py')

In [43]:

predictor = sagemaker_model.deploy(initial_instance_count=1,
                                   instance_type='ml.t2.medium')

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker:Creating model with name: sagemaker-tensorflow-serving-2021-05-16-19-20-34-348
INFO:sagemaker:Creating endpoint with name sagemaker-tensorflow-serving-2021-05-16-19-20-34-645


-------------------------------------------------------------------------!

In [44]:
# update with output from previous command
endpoint_name = 'sagemaker-tensorflow-serving-2021-03-27-15-35-15-207'

# not used currently
predictor=sagemaker.tensorflow.model.TensorFlowPredictor(endpoint_name, sagemaker_session)

In [45]:
# cleanup
!rm -r export
!rm model.tar.gz

In [46]:
with open('diagnosesDict.json', 'w', encoding='utf-8') as f:
    json.dump(diseaseNamesDict, f, ensure_ascii=False, indent=4)
    
s3.put_object(
     Body=bytes(json.dumps(diseaseNamesDict, ensure_ascii=False, indent=4).encode('UTF-8')),
     Bucket='medical.train.data',
     Key='dataMapping/diagnosesDict.json'
) 

{'ResponseMetadata': {'RequestId': 'D7452XB6P9T1J2GT',
  'HostId': 'ASSSdxMDhCt+exDuDw/4mHiyNjYTL7AX3gJambfy1X5Yd9h7MbZOXGHPUjKfG2Dol5MmOqGDHLo=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'ASSSdxMDhCt+exDuDw/4mHiyNjYTL7AX3gJambfy1X5Yd9h7MbZOXGHPUjKfG2Dol5MmOqGDHLo=',
   'x-amz-request-id': 'D7452XB6P9T1J2GT',
   'date': 'Sun, 16 May 2021 19:57:15 GMT',
   'etag': '"e410c85cbd636564dd3b6370d61dfe96"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"e410c85cbd636564dd3b6370d61dfe96"'}

In [None]:
symptomNames = diseases.columns.values.tolist()

symptomNames = [x.replace(' ', '') for x in symptomNames]

with open('symptomNames.json', 'w', encoding='utf-8') as f:
    json.dump(symptomNames, f, ensure_ascii=False, indent=4)
    
# s3SymptomNamesList = s3.Object('medical.train.data', 'dataMapping/symptomNamesList.json')    
    
# s3SymptomNamesList.put(
#     Body=(bytes(json.dumps(symptomNames, ensure_ascii=False, indent=4).encode('UTF-8')))
# )    


s3.put_object(
     Body=bytes(json.dumps(symptomNames, ensure_ascii=False, indent=4).encode('UTF-8')),
     Bucket='medical.train.data',
     Key='dataMapping/symptomNamesList.json'
)