# Step 1: Import Pandas to read the data

In [81]:
import pandas as pd

In [82]:
diabetes = pd.read_csv('pima-indians-diabetes.csv')

In [83]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,6,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,1,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,8,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,1,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


In [84]:
diabetes.columns

Index(['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree', 'Age', 'Class', 'Group'],
      dtype='object')

### Normalise the columns

In [85]:
cols_to_norm = ['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree']

In [86]:
# Class column not normalising because thats what need to predict

In [87]:
# Use Pandas to normalise

In [88]:
diabetes[cols_to_norm] = diabetes[cols_to_norm].apply(lambda x:(x-x.min())
                                                      / (x.max()-x.min())  )  

In [89]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


# Step 2 : Import Tensorflow and Create Feature Columns with API Estimator

In [90]:
import tensorflow as tf

In [91]:
diabetes.columns

Index(['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree', 'Age', 'Class', 'Group'],
      dtype='object')

In [92]:
# create feature columns

In [93]:
# numeric columns are continous values
num_preg = tf.feature_column.numeric_column('Number_pregnant')
plasma_gluc = tf.feature_column.numeric_column('Glucose_concentration')
dias_press = tf.feature_column.numeric_column('Blood_pressure')
tricep = tf.feature_column.numeric_column('Triceps')
insulin = tf.feature_column.numeric_column('Insulin')
bmi = tf.feature_column.numeric_column('BMI')
diabetes_pedigree = tf.feature_column.numeric_column('Pedigree')
age = tf.feature_column.numeric_column('Age')

In [94]:
assigned_group = tf.feature_column.categorical_column_with_vocabulary_list('Group',['A','B','C','D'])
# Here in the above case only 4 group so we can go for vocab list , if you dont have defined group
# Alternative(Hashbucket)
# assigned_group = tf.feature_column.categorical_column_with_hash_bucket('Group', hash_bucket_size=10

In [95]:
# How to convert continue to categorical value (age)

In [96]:
#import matplotlib.pyplot as plt
#%matplotlib inline

In [97]:
#diabetes['Age'].hist(bins=30)

In [98]:
# convert age from numeric to categorical column
age_bucket = tf.feature_column.bucketized_column(age,boundaries=[20,30,40,50,60,70])

In [99]:
# Create Feature Columns

In [100]:
feat_cols = [num_preg,
             plasma_gluc,
             dias_press,
             tricep,
             insulin,
             bmi,
             diabetes_pedigree,
             age_bucket]

# Step 2: Perfrom Train Test Split

In [101]:
from sklearn.model_selection import train_test_split

In [102]:
x_data = diabetes.drop('Class',axis = 1)
# becasue class is what we need to find

In [103]:
x_data.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Group
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,B
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,C
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,B
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,B
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,C


In [104]:
y = diabetes['Class']

In [105]:
X_train, X_test, y_train, y_test = train_test_split(x_data, y, test_size=0.33, random_state=101)

# Step 4: Create Input Function

In [106]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,
                                                y=y_train,
                                                batch_size=10,
                                                num_epochs=1000,
                                                shuffle=True)

# Step 5: Create Model - Linear

In [107]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols,n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\DHINAG~1.A\\AppData\\Local\\Temp\\tmpe91mye52', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000020954DABD30>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


# Step 6: Train the model

In [108]:
model.train(input_fn=input_func,steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\DHINAG~1.A\AppData\Local\Temp\tmpe91mye52\model.ckpt.
INFO:tensorflow:loss = 6.931472, step = 1
INFO:tensorflow:global_step/sec: 213.3
INFO:tensorflow:loss = 5.206046, step = 101 (0.481 sec)
INFO:tensorflow:global_step/sec: 462.617
INFO:tensorflow:loss = 5.8133173, step = 201 (0.215 sec)
INFO:tensorflow:global_step/sec: 464.007
INFO:tensorflow:loss = 5.514282, step = 301 (0.221 sec)
INFO:tensorflow:global_step/sec: 432.231
INFO:tensorflow:loss = 3.6432333, step = 401 (0.224 sec)
INFO:tensorflow:global_step/sec: 498.833
INFO:tensorflow:loss = 5.037243, step = 501 (0.205 sec)
INFO:tensorflow:global_step/sec: 430.961
INFO:tensorflow:loss = 5.407897, step = 601 (0.225 sec)
INFO:tensorflow:global_step/se

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifier at 0x20954dab550>

# Step 7 : Evaluate the model

In [109]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test,
                                                     y=y_test,
                                                     batch_size=10,
                                                     num_epochs=1,
                                                      shuffle=False)

In [110]:
results = model.evaluate(eval_input_func)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-11-30T20:46:04Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\DHINAG~1.A\AppData\Local\Temp\tmpe91mye52\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-11-30-20:46:05
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.72440946, accuracy_baseline = 0.65748036, auc = 0.7853259, auc_precision_recall = 0.6341324, average_loss = 0.5347419, global_step = 1000, label/mean = 0.34251967, loss = 5.2240167, precision = 0.60493827, prediction/mean = 0.37247145, recall = 0.5632184
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: C:\Users\DHINAG~1.A\AppData\Local\Temp\tmpe91mye52\model.ckpt-1000


#### accuracy = 0.7401575, accuracy_baseline = 0.66141737, auc = 0.7687223

# Step 8: Predict the model & get predictions

In [111]:
pred_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test,
                                                     batch_size=10,
                                                     num_epochs=1,
                                                     shuffle=False)

In [112]:
predictions = model.predict(pred_input_func)

In [113]:
my_pred =list(predictions)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\DHINAG~1.A\AppData\Local\Temp\tmpe91mye52\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Repeat Step 5 : Create the model
### Dense Neural Network Model -----> for DNN


In [114]:
dnn_model = tf.estimator.DNNClassifier(hidden_units=[10,10,10],feature_columns=feat_cols)
# Hiddedn Units is number of neurons ,3 layers with 10 neurons

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\DHINAG~1.A\\AppData\\Local\\Temp\\tmpzqwm7sp7', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002094E278048>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


# Step 6 : Train the Model ----> Repeat for DNN

In [115]:
dnn_model.train(input_fn=input_func,steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\DHINAG~1.A\AppData\Local\Temp\tmpzqwm7sp7\model.ckpt.
INFO:tensorflow:loss = 7.062519, step = 1
INFO:tensorflow:global_step/sec: 183.015
INFO:tensorflow:loss = 6.3886137, step = 101 (0.551 sec)
INFO:tensorflow:global_step/sec: 382.53
INFO:tensorflow:loss = 5.500318, step = 201 (0.260 sec)
INFO:tensorflow:global_step/sec: 415.662
INFO:tensorflow:loss = 4.0120683, step = 301 (0.247 sec)
INFO:tensorflow:global_step/sec: 463.524
INFO:tensorflow:loss = 1.9753537, step = 401 (0.223 sec)
INFO:tensorflow:global_step/sec: 376.104
INFO:tensorflow:loss = 4.159457, step = 501 (0.251 sec)
INFO:tensorflow:global_step/sec: 437.772
INFO:tensorflow:loss = 3.362772, step = 601 (0.231 sec)
INFO:tensorflow:global_step/

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x2094e2789b0>

In [116]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,
                                                y=y_train,batch_size=10,
                                                num_epochs=1000,shuffle=True)

In [117]:
# Train the Model 

In [118]:
dnn_model =tf.estimator.DNNClassifier(hidden_units=[10,20,20,10,10],feature_columns=feat_cols,
                                      n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\DHINAG~1.A\\AppData\\Local\\Temp\\tmp34um3ad_', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000020951E088D0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [119]:
# Train the model

In [120]:
dnn_model.train(input_fn=input_func,steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\DHINAG~1.A\AppData\Local\Temp\tmp34um3ad_\model.ckpt.
INFO:tensorflow:loss = 6.942944, step = 1
INFO:tensorflow:global_step/sec: 240.762
INFO:tensorflow:loss = 3.672792, step = 101 (0.434 sec)
INFO:tensorflow:global_step/sec: 449.454
INFO:tensorflow:loss = 4.7693872, step = 201 (0.217 sec)
INFO:tensorflow:global_step/sec: 431.976
INFO:tensorflow:loss = 4.5583835, step = 301 (0.229 sec)
INFO:tensorflow:global_step/sec: 463.897
INFO:tensorflow:loss = 6.1392975, step = 401 (0.220 sec)
INFO:tensorflow:global_step/sec: 434.025
INFO:tensorflow:loss = 5.4618397, step = 501 (0.234 sec)
INFO:tensorflow:global_step/sec: 361.142
INFO:tensorflow:loss = 4.0032225, step = 601 (0.262 sec)
INFO:tensorflow:global_st

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x20951f0bb38>

# Step 7 : Evaluate Input Function ---> Repeat

In [121]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test,
                                                     y=y_test,
                                                     batch_size=10,
                                                     num_epochs=1,
                                                      shuffle=False)

In [122]:
dnn_model.evaluate(eval_input_func)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-11-30T20:46:29Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\DHINAG~1.A\AppData\Local\Temp\tmp34um3ad_\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-11-30-20:46:30
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.72440946, accuracy_baseline = 0.65748036, auc = 0.83137167, auc_precision_recall = 0.72632325, average_loss = 0.50367177, global_step = 1000, label/mean = 0.34251967, loss = 4.920486, precision = 0.5726496, prediction/mean = 0.4280158, recall = 0.77011496
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: C:\Users\DHINAG~1.A\AppData\Local\Temp\tmp34um3ad_\model.ckpt-1000


{'accuracy': 0.72440946,
 'accuracy_baseline': 0.65748036,
 'auc': 0.83137167,
 'auc_precision_recall': 0.72632325,
 'average_loss': 0.50367177,
 'label/mean': 0.34251967,
 'loss': 4.920486,
 'precision': 0.5726496,
 'prediction/mean': 0.4280158,
 'recall': 0.77011496,
 'global_step': 1000}

In [123]:
# in DNN we are getting 82%
# Just increase the nuerons and run again
# not realy better looks like data set reached the limit max 82 % accuracy 