# ESTIMATOR API LINEAR REGRESSION AND DNN EXAMPLE
# USING PIMA INDIAN DIABETES DATA (panda csv file)

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd

In [2]:
diabetes=pd.read_csv("pima-indians-diabetes.csv")

In [3]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,6,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,1,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,8,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,1,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


In [4]:
diabetes.columns

Index(['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree', 'Age', 'Class', 'Group'],
      dtype='object')

In [5]:
diabetes["Number_pregnant"].head()

0    6
1    1
2    8
3    1
4    0
Name: Number_pregnant, dtype: int64

In [6]:
diabetes["Blood_pressure"].head()

0    0.590164
1    0.540984
2    0.524590
3    0.540984
4    0.327869
Name: Blood_pressure, dtype: float64

In [7]:
cols_to_norm = ['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree', 'Age']

In [8]:
diabetes[cols_to_norm] = diabetes[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

In [9]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333,1,B
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667,0,C
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333,1,B
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0,0,B
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2,1,C


In [10]:
num_preg = tf.feature_column.numeric_column('Number_pregnant')
plasma_gluc = tf.feature_column.numeric_column('Glucose_concentration')
dias_press = tf.feature_column.numeric_column('Blood_pressure')
tricep = tf.feature_column.numeric_column('Triceps')
insulin = tf.feature_column.numeric_column('Insulin')
bmi = tf.feature_column.numeric_column('BMI')
diabetes_pedigree = tf.feature_column.numeric_column('Pedigree')
age = tf.feature_column.numeric_column('Age')

In [11]:
feat_cols=[num_preg ,plasma_gluc,dias_press ,tricep ,insulin,bmi,diabetes_pedigree ,age]

In [12]:
diabetes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 10 columns):
Number_pregnant          768 non-null float64
Glucose_concentration    768 non-null float64
Blood_pressure           768 non-null float64
Triceps                  768 non-null float64
Insulin                  768 non-null float64
BMI                      768 non-null float64
Pedigree                 768 non-null float64
Age                      768 non-null float64
Class                    768 non-null int64
Group                    768 non-null object
dtypes: float64(8), int64(1), object(1)
memory usage: 60.1+ KB


In [13]:
x_data = diabetes[cols_to_norm]
y_data = diabetes['Class']

In [14]:
from sklearn.model_selection import train_test_split

In [15]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = 0.3, random_state=101)

In [16]:
x_train.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age
711,0.294118,0.633166,0.639344,0.272727,0.026005,0.441133,0.154142,0.316667
58,0.0,0.733668,0.672131,0.0,0.0,0.603577,0.727156,0.383333
216,0.294118,0.547739,0.508197,0.414141,0.152482,0.533532,0.186166,0.066667
168,0.235294,0.552764,0.540984,0.0,0.0,0.47541,0.167805,0.133333
658,0.647059,0.638191,0.868852,0.0,0.0,0.581222,0.047822,0.5


In [17]:
y_train.head()

711    0
58     0
216    1
168    0
658    0
Name: Class, dtype: int64

In [18]:
input_func = tf.estimator.inputs.pandas_input_fn(x=x_train, y=y_train, batch_size=10, num_epochs=1000, shuffle=True)

In [19]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols, n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_session_config': None, '_save_summary_steps': 100, '_model_dir': '/tmp/tmp_v5jrxp2', '_log_step_count_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_secs': 600, '_tf_random_seed': 1, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5}


In [20]:
model.train(input_fn=input_func, steps=1000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp_v5jrxp2/model.ckpt.
INFO:tensorflow:loss = 6.93147, step = 1
INFO:tensorflow:global_step/sec: 390.422
INFO:tensorflow:loss = 7.8106, step = 101 (0.271 sec)
INFO:tensorflow:global_step/sec: 358.609
INFO:tensorflow:loss = 6.1656, step = 201 (0.272 sec)
INFO:tensorflow:global_step/sec: 361.199
INFO:tensorflow:loss = 6.6641, step = 301 (0.280 sec)
INFO:tensorflow:global_step/sec: 344.841
INFO:tensorflow:loss = 5.51924, step = 401 (0.288 sec)
INFO:tensorflow:global_step/sec: 366.91
INFO:tensorflow:loss = 5.9037, step = 501 (0.273 sec)
INFO:tensorflow:global_step/sec: 388.458
INFO:tensorflow:loss = 6.85408, step = 601 (0.257 sec)
INFO:tensorflow:global_step/sec: 316.647
INFO:tensorflow:loss = 4.88938, step = 701 (0.314 sec)
INFO:tensorflow:global_step/sec: 325.753
INFO:tensorflow:loss = 5.70692, step = 801 (0.312 sec)
INFO:tensorflow:global_step/sec: 327.758
INFO:tensorflow:loss = 5.92016, step

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x7f70ee0d5358>

In [21]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(
      x=x_test,
      y=y_test,
      batch_size=10,
      num_epochs=1,
      shuffle=False)

In [22]:
results = model.evaluate(eval_input_func)

INFO:tensorflow:Starting evaluation at 2018-07-11-18:15:53
INFO:tensorflow:Restoring parameters from /tmp/tmp_v5jrxp2/model.ckpt-1000
INFO:tensorflow:Finished evaluation at 2018-07-11-18:15:53
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.709957, accuracy_baseline = 0.649351, auc = 0.786543, auc_precision_recall = 0.619402, average_loss = 0.56022, global_step = 1000, label/mean = 0.350649, loss = 5.39212, prediction/mean = 0.34985


#### Linear Regression Results

In [23]:
results

{'accuracy': 0.70995671,
 'accuracy_baseline': 0.64935064,
 'auc': 0.78654325,
 'auc_precision_recall': 0.61940229,
 'average_loss': 0.56021988,
 'global_step': 1000,
 'label/mean': 0.35064936,
 'loss': 5.3921165,
 'prediction/mean': 0.34985027}

In [24]:
dnn_model = tf.estimator.DNNClassifier(hidden_units=[10,10,10],feature_columns=feat_cols,n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_session_config': None, '_save_summary_steps': 100, '_model_dir': '/tmp/tmp1y9rcsri', '_log_step_count_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_secs': 600, '_tf_random_seed': 1, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5}


In [25]:
dnn_model.train(input_fn=input_func,steps=1000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp1y9rcsri/model.ckpt.
INFO:tensorflow:loss = 6.56102, step = 1
INFO:tensorflow:global_step/sec: 364.787
INFO:tensorflow:loss = 6.06954, step = 101 (0.283 sec)
INFO:tensorflow:global_step/sec: 382.709
INFO:tensorflow:loss = 7.91863, step = 201 (0.264 sec)
INFO:tensorflow:global_step/sec: 333.979
INFO:tensorflow:loss = 5.375, step = 301 (0.298 sec)
INFO:tensorflow:global_step/sec: 365.148
INFO:tensorflow:loss = 5.52501, step = 401 (0.274 sec)
INFO:tensorflow:global_step/sec: 357.541
INFO:tensorflow:loss = 6.72946, step = 501 (0.280 sec)
INFO:tensorflow:global_step/sec: 338.303
INFO:tensorflow:loss = 6.06732, step = 601 (0.291 sec)
INFO:tensorflow:global_step/sec: 356.211
INFO:tensorflow:loss = 2.10755, step = 701 (0.292 sec)
INFO:tensorflow:global_step/sec: 357.296
INFO:tensorflow:loss = 6.51792, step = 801 (0.269 sec)
INFO:tensorflow:global_step/sec: 330.286
INFO:tensorflow:loss = 7.33368, s

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f70ed913e80>

In [26]:
results = dnn_model.evaluate(eval_input_func)

INFO:tensorflow:Starting evaluation at 2018-07-11-18:15:59
INFO:tensorflow:Restoring parameters from /tmp/tmp1y9rcsri/model.ckpt-1000
INFO:tensorflow:Finished evaluation at 2018-07-11-18:15:59
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.774892, accuracy_baseline = 0.649351, auc = 0.839959, auc_precision_recall = 0.738585, average_loss = 0.467685, global_step = 1000, label/mean = 0.350649, loss = 4.50147, prediction/mean = 0.373816


#### DNN Results

In [27]:
results

{'accuracy': 0.77489179,
 'accuracy_baseline': 0.64935064,
 'auc': 0.83995879,
 'auc_precision_recall': 0.73858452,
 'average_loss': 0.46768489,
 'global_step': 1000,
 'label/mean': 0.35064936,
 'loss': 4.5014672,
 'prediction/mean': 0.37381643}