# Classifiation Using the Tensorflow

In [1]:
import pandas as pd

In [2]:
diabetes = pd.read_csv('pima-indians-diabetes.csv')

In [3]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,6,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,1,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,8,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,1,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


In [4]:
diabetes.columns

Index(['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree', 'Age', 'Class', 'Group'],
      dtype='object')

In [5]:
diabetes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 10 columns):
Number_pregnant          768 non-null int64
Glucose_concentration    768 non-null float64
Blood_pressure           768 non-null float64
Triceps                  768 non-null float64
Insulin                  768 non-null float64
BMI                      768 non-null float64
Pedigree                 768 non-null float64
Age                      768 non-null int64
Class                    768 non-null int64
Group                    768 non-null object
dtypes: float64(6), int64(3), object(1)
memory usage: 60.1+ KB


## Preprocessing 

In [7]:
cols_to_norm = ['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree']

In [8]:
diabetes[cols_to_norm] = diabetes[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

In [9]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


# Tensorflow

In [10]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [11]:
num_preg = tf.feature_column.numeric_column('Number_pregnant')
plasma_gluc = tf.feature_column.numeric_column('Glucose_concentration')
dias_press = tf.feature_column.numeric_column('Blood_pressure')
tricep = tf.feature_column.numeric_column('Triceps')
insulin = tf.feature_column.numeric_column('Insulin')
bmi = tf.feature_column.numeric_column('BMI')
diabetes_pedigree = tf.feature_column.numeric_column('Pedigree')
age = tf.feature_column.numeric_column('Age')

In [12]:
assigned_group = tf.feature_column.categorical_column_with_vocabulary_list('Group',['A','B','C','D'])

In [13]:
age_buckets = tf.feature_column.bucketized_column(age, boundaries=[20,30,40,50,60,70,80])

In [14]:
feat_cols = [num_preg ,plasma_gluc,dias_press ,tricep ,insulin,bmi,diabetes_pedigree ,assigned_group, age_buckets]

In [15]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


# Training and testing data split

In [16]:
x_data = diabetes.drop('Class',axis=1)

In [17]:
labels = diabetes['Class']

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
X_train, X_test, y_train, y_test = train_test_split(x_data,labels,test_size=0.33, random_state=101)

In [20]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=10,num_epochs=1000,shuffle=True)

In [27]:
dnn_model = tf.estimator.DNNClassifier(hidden_units=[10,10,10],feature_columns=feat_cols,n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\~Aniket~\\AppData\\Local\\Temp\\tmp6v65u5rn', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002BDB3BFD7B8>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [29]:
embedded_group_column = tf.feature_column.embedding_column(assigned_group, dimension=4)

In [30]:
feat_cols = [num_preg ,plasma_gluc,dias_press ,tricep ,insulin,bmi,diabetes_pedigree ,embedded_group_column, age_buckets]

In [31]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=10,num_epochs=1000,shuffle=True)

In [33]:
dnn_model = tf.estimator.DNNClassifier(hidden_units=[10,10,10],feature_columns=feat_cols,n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\~Aniket~\\AppData\\Local\\Temp\\tmp187f4a2p', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002BDB3CFEE10>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [34]:
dnn_model.train(input_fn=input_func,steps=1000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\~Aniket~\AppData\Local\Temp\tmp187f4a2p\model.ckpt.
INFO:tensorflow:loss = 6.802306, step = 1
INFO:tensorflow:global_step/sec: 331.006
INFO:tensorflow:loss = 5.744315, step = 101 (0.302 sec)
INFO:tensorflow:global_step/sec: 524.885
INFO:tensorflow:loss = 6.785672, step = 201 (0.191 sec)
INFO:tensorflow:global_step/sec: 528.015
INFO:tensorflow:loss = 5.6388073, step = 301 (0.189 sec)
INFO:tensorflow:global_step/sec: 538.667
INFO:tensorflow:loss = 4.531732, step = 401 (0.186 sec)
INFO:tensorflow:global_step/sec: 549
INFO:tensorflow:loss = 4.9841013, step = 501 (0.182 sec)
INFO:tensorflow:global_step/sec: 536.65
INFO:tensorflow:loss = 3.9131045, step = 601 (0.190 sec)
INFO:tensorflow:global_step/sec: 533.733
INFO:tensorflow:loss = 6.013022, step = 701 (0.183 sec)
INFO:tensorflow:global_step/sec: 516.805
INFO:tensorflow:loss = 4.276779, step = 801 (0.197 sec)
INFO:tensorflow:global_step/sec: 

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x2bdb3cfec50>

In [35]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(
      x=X_test,
      y=y_test,
      batch_size=10,
      num_epochs=1,
      shuffle=False)

In [36]:
dnn_model.evaluate(eval_input_func)

INFO:tensorflow:Starting evaluation at 2018-07-19-14:16:16
INFO:tensorflow:Restoring parameters from C:\Users\~Aniket~\AppData\Local\Temp\tmp187f4a2p\model.ckpt-1000
INFO:tensorflow:Finished evaluation at 2018-07-19-14:16:17
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.72047246, accuracy_baseline = 0.65748036, auc = 0.8114805, auc_precision_recall = 0.65112627, average_loss = 0.534745, global_step = 1000, label/mean = 0.34251967, loss = 5.224047, prediction/mean = 0.44834912


{'accuracy': 0.72047246,
 'accuracy_baseline': 0.65748036,
 'auc': 0.8114805,
 'auc_precision_recall': 0.65112627,
 'average_loss': 0.534745,
 'global_step': 1000,
 'label/mean': 0.34251967,
 'loss': 5.224047,
 'prediction/mean': 0.44834912}

#### predictions

In [37]:
pred_fn = tf.estimator.inputs.pandas_input_fn(x=X_test,batch_size=len(X_test),shuffle=False,)

In [38]:
predictions = list(dnn_model.predict(input_fn=pred_fn))

INFO:tensorflow:Restoring parameters from C:\Users\~Aniket~\AppData\Local\Temp\tmp187f4a2p\model.ckpt-1000


In [39]:
predictions[0]

{'class_ids': array([1], dtype=int64),
 'classes': array([b'1'], dtype=object),
 'logistic': array([0.7783562], dtype=float32),
 'logits': array([1.2561129], dtype=float32),
 'probabilities': array([0.22164373, 0.7783562 ], dtype=float32)}

In [40]:
final_preds = []
for pred in predictions:
    final_preds.append(pred['class_ids'][0])

In [41]:
final_preds[:10]

[1, 1, 0, 1, 0, 1, 1, 1, 0, 0]

### Evalution

In [42]:
from sklearn.metrics import classification_report

In [43]:
print(classification_report(y_test,final_preds))

             precision    recall  f1-score   support

          0       0.86      0.68      0.76       167
          1       0.57      0.79      0.66        87

avg / total       0.76      0.72      0.73       254



In [44]:
from sklearn.metrics import confusion_matrix

In [45]:
print(confusion_matrix(y_test,final_preds))

[[114  53]
 [ 18  69]]
