## Predicting Diabetes with Tensorflow

### Imports 

In [1]:
import tensorflow as tf 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split

### Get Data 

In [2]:
data=pd.read_csv('data/diabetes.csv')
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
print("Grouping by The number of individuals in each class:\n",data.Outcome.value_counts())

Grouping by The number of individuals in each class:
 0    500
1    268
Name: Outcome, dtype: int64


### Split Data into Train and  Test Sets 

In [14]:
#train test split 
df=data.copy()
X=df.drop('Outcome',axis=1)
y=df.pop('Outcome')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [15]:
X_train.shape,y_train.shape,X_test.shape,y_test.shape

((537, 8), (537,), (231, 8), (231,))

### Scaling Data 

In [26]:
# scale data 
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.fit_transform(X_test)
# put them into dataframes 
X_train = pd.DataFrame(data=X_train_scaled ,columns = X_train.columns,index=X_train.index)
X_test = pd.DataFrame(data=X_test_scaled ,columns = X_test.columns,index=X_test.index)


### Prepare the Feature Columns List 

In [22]:
#show the columns 
data.keys()

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [23]:
#create a list of feature columns 
num_preg = tf.feature_column.numeric_column('Pregnancies')
glucose = tf.feature_column.numeric_column('Glucose')
blood_press = tf.feature_column.numeric_column('BloodPressure')
skin_thick = tf.feature_column.numeric_column('SkinThickness')
insulin = tf.feature_column.numeric_column('Insulin')
bmi = tf.feature_column.numeric_column('BMI')
diabetes_pedigree = tf.feature_column.numeric_column('DiabetesPedigreeFunction')
age = tf.feature_column.numeric_column('Age')

# put them on a list 
feat_cols=[num_preg ,glucose,blood_press ,skin_thick ,insulin,bmi,diabetes_pedigree, age]

### Set the Input Function 

In [42]:
# set the input functon 
input_func =tf.compat.v1.estimator.inputs.pandas_input_fn(x=X_train,y=y_train ,batch_size=10,num_epochs=None,shuffle=True)

### Linear Classifier 

In [43]:
# create the model 
model = tf.estimator.LinearClassifier(feature_columns=feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\hp\\AppData\\Local\\Temp\\tmp3wd0egsd', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002A7F96E4448>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [44]:
model.train(input_fn=input_func,steps=10000)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\hp\AppData\Local\Temp\tmp3wd0egsd\model.ckpt.
INFO:tensorflow:loss = 0.6931472, step = 0
INFO:tensorflow:global_step/sec: 116.98
INFO:tensorflow:loss = 0.65030706, step = 100 (0.859 sec)
INFO:tensorflow:global_step/sec: 119.452
INFO:tensorflow:loss = 0.7114356, step = 200 (0.840 sec)
INFO:tensorflow:global_step/sec: 122.723
INFO:tensorflow:loss = 0.70673877, step = 300 (0.814 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 117.24
INFO:tensorflow:loss = 0.662324, step = 7300 (0.861 sec)
INFO:tensorflow:global_step/sec: 109.675
INFO:tensorflow:loss = 0.4941237, step = 7400 (0.910 sec)
INFO:tensorflow:global_step/sec: 140.971
INFO:tensorflow:loss = 0.59433293, step = 7500 (0.707 sec)
INFO:tensorflow:global_step/sec: 138.066
INFO:tensorflow:loss = 0.5708329, step = 7600 (0.722 sec)
INFO:tensorflow:global_step/sec: 192.769
INFO:tensorflow:loss = 0.28084275, step = 7700 (0.513 sec)
INFO:tensorflow:global_step/sec: 249.982
INFO:tensorflow:loss = 0.66999024, step = 7800 (0.403 sec)
INFO:tensorflow:global_step/sec: 150.623
INFO:tensorflow:loss = 0.53497666, step = 7900 (0.662 sec)
INFO:tensorflow:global_step/sec: 126.92
INFO:tensorflow:loss = 0.4196803, step = 8000 (0.826 sec)
INFO:tensorflow:global_step/sec: 105.852
INFO:tensorflow:loss = 0.38426423, step = 8100 (0.928 sec)
INFO:tensorflow:global_step/sec: 178.651
INFO:tensorflow:loss = 0.20896216, step = 8200 (0.537 sec)
INFO:te

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x2a7f8cb9248>

In [45]:
# make predictions 
predict_input_func = tf.compat.v1.estimator.inputs.pandas_input_fn(
      x=X_test,
      batch_size=100,
      num_epochs=1,
      shuffle=False)

In [46]:
predictions = list(model.predict(predict_input_func))
predictions[0]

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\hp\AppData\Local\Temp\tmp3wd0egsd\model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


{'logits': array([-0.6527128], dtype=float32),
 'logistic': array([0.3423785], dtype=float32),
 'probabilities': array([0.6576215 , 0.34237847], dtype=float32),
 'class_ids': array([0], dtype=int64),
 'classes': array([b'0'], dtype=object),
 'all_class_ids': array([0, 1]),
 'all_classes': array([b'0', b'1'], dtype=object)}

In [47]:
# get the list of predictions 
# go through the predictions and get the prediction value from the list 
preds=[]
for pred in predictions:
    preds.append(pred['class_ids'][0])
    
preds[:10]

[0, 1, 0, 0, 0, 1, 1, 0, 0, 0]

In [48]:
from sklearn.metrics import classification_report,accuracy_score
print(classification_report(preds,y_test))
print('\n')
print('Accuracy :', accuracy_score(preds,y_test))

              precision    recall  f1-score   support

           0       0.87      0.79      0.83       165
           1       0.57      0.70      0.63        66

    accuracy                           0.76       231
   macro avg       0.72      0.74      0.73       231
weighted avg       0.78      0.76      0.77       231



Accuracy : 0.7619047619047619


### DNN Classifier 

In [34]:
# restore the input function 
input_function =tf.compat.v1.estimator.inputs.pandas_input_fn(x=X_train,y=y_train ,batch_size=10,num_epochs=None,
                                            shuffle=True)

In [49]:
# create the model 
estimator =  tf.compat.v1.estimator.DNNClassifier(hidden_units= [20,20,16],feature_columns=feat_cols,n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\hp\\AppData\\Local\\Temp\\tmpqlcuo0yr', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002A7F8DA4808>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [50]:
estimator.train(input_fn=input_function,steps=10000)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\hp\AppData\Local\Temp\tmpqlcuo0yr\model.ckpt.
INFO:tensorflow:loss = 6.928531, step = 0
INFO:tensorflow:global_step/sec: 92.1929
INFO:tensorflow:loss = 5.9709773, step = 100 (1.100 sec)
INFO:tensorflow:global_step/sec: 141.385
INFO:tensorflow:loss = 2.9564908, step = 200 (0.696 sec)
INFO:tensorflow:global_step/sec: 120.159
INFO:tensorflow:loss = 4.0950227, step = 300 (0.831 sec)
INFO:tensorflow:global_st

INFO:tensorflow:loss = 0.77175653, step = 7000 (0.408 sec)
INFO:tensorflow:global_step/sec: 233.257
INFO:tensorflow:loss = 5.099098, step = 7100 (0.428 sec)
INFO:tensorflow:global_step/sec: 250.383
INFO:tensorflow:loss = 4.5731516, step = 7200 (0.399 sec)
INFO:tensorflow:global_step/sec: 249.288
INFO:tensorflow:loss = 1.8993578, step = 7300 (0.404 sec)
INFO:tensorflow:global_step/sec: 231.879
INFO:tensorflow:loss = 6.5847845, step = 7400 (0.427 sec)
INFO:tensorflow:global_step/sec: 237.407
INFO:tensorflow:loss = 3.6808355, step = 7500 (0.428 sec)
INFO:tensorflow:global_step/sec: 153.609
INFO:tensorflow:loss = 3.8522851, step = 7600 (0.646 sec)
INFO:tensorflow:global_step/sec: 167.387
INFO:tensorflow:loss = 2.6064973, step = 7700 (0.596 sec)
INFO:tensorflow:global_step/sec: 208.287
INFO:tensorflow:loss = 1.6214743, step = 7800 (0.503 sec)
INFO:tensorflow:global_step/sec: 167.102
INFO:tensorflow:loss = 2.529646, step = 7900 (0.576 sec)
INFO:tensorflow:global_step/sec: 146.526
INFO:tensor

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x2a7f8da4848>

In [51]:
predict_input_function = tf.compat.v1.estimator.inputs.pandas_input_fn(
      x=X_test,
      batch_size=10,
      num_epochs=1,
      shuffle=False)

In [52]:
predictions=list(estimator.predict(predict_input_function))

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\hp\AppData\Local\Temp\tmpqlcuo0yr\model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [53]:
#show the first element. we're intresed in the value of class_ids value 
predictions[0]

{'logits': array([2.0616317], dtype=float32),
 'logistic': array([0.88711774], dtype=float32),
 'probabilities': array([0.11288233, 0.8871177 ], dtype=float32),
 'class_ids': array([1], dtype=int64),
 'classes': array([b'1'], dtype=object),
 'all_class_ids': array([0, 1]),
 'all_classes': array([b'0', b'1'], dtype=object)}

In [54]:
results=[]
for pred in predictions:
    results.append(pred['class_ids'][0])

# print the results of prediction 
print(results[:20])

[1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0]


In [55]:
from sklearn.metrics import classification_report,accuracy_score
print(classification_report(results,y_test))
print('\n')
print('Accuracy :', accuracy_score(results,y_test))

              precision    recall  f1-score   support

           0       0.78      0.84      0.81       140
           1       0.72      0.64      0.67        91

    accuracy                           0.76       231
   macro avg       0.75      0.74      0.74       231
weighted avg       0.75      0.76      0.75       231



Accuracy : 0.7575757575757576
