<a href="https://colab.research.google.com/github/PriyathamVarma/tensorflow/blob/main/Classification_Tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# This notebook is for classification using tensorflow created on 30/12/2021

In [1]:
# this is for specifing the exact version
%tensorflow_version 1.3.0

`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `1.3.0`. This will be interpreted as: `1.x`.


TensorFlow 1.x selected.


In [2]:
# imports
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
tf.__version__

'1.15.2'

# The aim of this notebook is to demonstrate basic classification using tensorflow 

*   Dataset is Pima Indians diabetes 
*   Tf.estimator API
*   Categorical and continous features
*   Linear Classifier and Dense Neural Network Classifier







In [6]:
# select the databse from tensorflow basic library
data_frame = pd.read_csv('https://raw.githubusercontent.com/PriyathamVarma/tensorflow/main/pima-indians-diabetes.csv')

In [7]:
columns_list = ['Pregnancies','Glucose',
                'BloodPressure','SkinThickness',
                'Insulin','BMI','DiabetesPedigreeFunction',
                'Age','Class']

In [8]:
data_frame.columns = columns_list

In [9]:
data_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 767 entries, 0 to 766
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               767 non-null    int64  
 1   Glucose                   767 non-null    int64  
 2   BloodPressure             767 non-null    int64  
 3   SkinThickness             767 non-null    int64  
 4   Insulin                   767 non-null    int64  
 5   BMI                       767 non-null    float64
 6   DiabetesPedigreeFunction  767 non-null    float64
 7   Age                       767 non-null    int64  
 8   Class                     767 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


# Normalize the data

In [10]:
normalized_dataFrame = data_frame[columns_list].apply(lambda x: (x-x.min())/(x.max()-x.min()))

In [11]:
normalized_dataFrame

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Class
0,0.058824,0.427136,0.540984,0.292929,0.000000,0.396423,0.116567,0.166667,0.0
1,0.470588,0.919598,0.524590,0.000000,0.000000,0.347243,0.253629,0.183333,1.0
2,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.000000,0.0
3,0.000000,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.200000,1.0
4,0.294118,0.582915,0.606557,0.000000,0.000000,0.381520,0.052519,0.150000,0.0
...,...,...,...,...,...,...,...,...,...
762,0.588235,0.507538,0.622951,0.484848,0.212766,0.490313,0.039710,0.700000,0.0
763,0.117647,0.613065,0.573770,0.272727,0.000000,0.548435,0.111870,0.100000,0.0
764,0.294118,0.608040,0.590164,0.232323,0.132388,0.390462,0.071307,0.150000,0.0
765,0.058824,0.633166,0.491803,0.000000,0.000000,0.448584,0.115713,0.433333,1.0


In [12]:
# Numeric columns
Pregnancies = tf.feature_column.numeric_column('Pregnancies')
Glucose = tf.feature_column.numeric_column('Glucose')
BloodPressure = tf.feature_column.numeric_column('BloodPressure')
SkinThickness = tf.feature_column.numeric_column('SkinThickness')
Insulin = tf.feature_column.numeric_column('Insulin')
BMI = tf.feature_column.numeric_column('BMI')
DiabetesPedigreeFunction = tf.feature_column.numeric_column('DiabetesPedigreeFunction')
Age = tf.feature_column.numeric_column('Age')

In [13]:
# Create a bucket
age_bucket = tf.feature_column.bucketized_column(Age,boundaries=[20,30,40,50,60,70,80,90])

In [14]:
age_bucket

BucketizedColumn(source_column=NumericColumn(key='Age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(20, 30, 40, 50, 60, 70, 80, 90))

In [15]:
features_columns = [Pregnancies,Glucose,
                BloodPressure,SkinThickness,
                Insulin,BMI,DiabetesPedigreeFunction,
                Age] 

In [16]:
features_columns

[NumericColumn(key='Pregnancies', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Glucose', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='BloodPressure', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='SkinThickness', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Insulin', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='BMI', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='DiabetesPedigreeFunction', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='Age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

# Train and test data

In [22]:
x_data = normalized_dataFrame.drop('Class',axis=1)

In [23]:
x_data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,0.058824,0.427136,0.540984,0.292929,0.000000,0.396423,0.116567,0.166667
1,0.470588,0.919598,0.524590,0.000000,0.000000,0.347243,0.253629,0.183333
2,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.000000
3,0.000000,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.200000
4,0.294118,0.582915,0.606557,0.000000,0.000000,0.381520,0.052519,0.150000
...,...,...,...,...,...,...,...,...
762,0.588235,0.507538,0.622951,0.484848,0.212766,0.490313,0.039710,0.700000
763,0.117647,0.613065,0.573770,0.272727,0.000000,0.548435,0.111870,0.100000
764,0.294118,0.608040,0.590164,0.232323,0.132388,0.390462,0.071307,0.150000
765,0.058824,0.633166,0.491803,0.000000,0.000000,0.448584,0.115713,0.433333


In [24]:
labels = data_frame['Class']

In [25]:
labels

0      0
1      1
2      0
3      1
4      0
      ..
762    0
763    0
764    0
765    1
766    0
Name: Class, Length: 767, dtype: int64

In [30]:
# Splitting the data
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x_data,labels,test_size = 0.2, random_state = 101)

In [34]:
y_test

765    1
747    1
42     1
485    0
543    0
      ..
602    1
510    0
171    0
671    0
298    0
Name: Class, Length: 154, dtype: int64

# Creating an input function

In [35]:
input_function = tf.estimator.inputs.pandas_input_fn(x = x_train,y=y_train, batch_size=10,
                                                     num_epochs=1000, shuffle=True)

In [36]:
# Linear Classifier
model = tf.estimator.LinearClassifier(feature_columns = features_columns,
                                      n_classes = 2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmppv7m_h8b', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fe43b0fac90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [37]:
# Training the model
model.train(input_fn=input_function,steps=1000)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmppv7m_h8b/mo

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifier at 0x7fe43b0fa750>

In [39]:
eval_input_function = tf.estimator.inputs.pandas_input_fn(x=x_test,
                                                          y=y_test,
                                                          batch_size=10,
                                                          num_epochs=1,
                                                          shuffle=False)

In [40]:
# Results
results = model.evaluate(eval_input_function)

INFO:tensorflow:Calling model_fn.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-12-30T14:24:45Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmppv7m_h8b/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2021-12-30-14:24:47
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.7077922, accuracy_baseline = 0.64285713, auc = 0.77281916, auc_precision_recall = 0.61166877, average_loss = 0.5654498, global_step = 1000, label/mean = 0.35714287, loss = 5.442454, precision = 0.6785714, prediction/mean = 0.3765436, recall = 0.34545454
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: /tmp/tmppv7m_h8b/model.ckpt-1000


In [41]:
results

{'accuracy': 0.7077922,
 'accuracy_baseline': 0.64285713,
 'auc': 0.77281916,
 'auc_precision_recall': 0.61166877,
 'average_loss': 0.5654498,
 'global_step': 1000,
 'label/mean': 0.35714287,
 'loss': 5.442454,
 'precision': 0.6785714,
 'prediction/mean': 0.3765436,
 'recall': 0.34545454}