In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import pandas as pd

In [2]:
CSV_COLUMN_NAMES = ['sepal_length', 'sepal_width', 'petal_length',
       'petal_width', 'target']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']

In [3]:
train_path = tf.keras.utils.get_file(
    "iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")
test_path = tf.keras.utils.get_file(
    "iris_test.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)

In [4]:
train.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,6.4,2.8,5.6,2.2,2
1,5.0,2.3,3.3,1.0,1
2,4.9,2.5,4.5,1.7,2
3,4.9,3.1,1.5,0.1,0
4,5.7,3.8,1.7,0.3,0


In [5]:
type(train)

pandas.core.frame.DataFrame

In [6]:
y_train = train.pop('target')
y_test = test.pop('target')

In [7]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 4 columns):
sepal_length    120 non-null float64
sepal_width     120 non-null float64
petal_length    120 non-null float64
petal_width     120 non-null float64
dtypes: float64(4)
memory usage: 3.8 KB


In [8]:
def input_fn(features,labels,training=True, batch_size=256):
    '''An input function fo training or evaluating'''
    #Turn inputs to dataset 
    dataset= tf.data.Dataset.from_tensor_slices((dict(features), labels))
    
    # Shuffle and repeat if you are in training mode
    if training:
        dataset = dataset.shuffle(1000).repeat()
    return dataset.batch(batch_size)

In [9]:
feat_cols =[]
for key in train.keys():
    feat_cols.append(tf.feature_column.numeric_column(key=key))

In [29]:
feat_cols

[NumericColumn(key='sepal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='sepal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='petal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='petal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [10]:
#build a dnn with 2 hidden layers with 30 and 10 hidden nodes each

classifier = tf.estimator.DNNClassifier(
    feature_columns=feat_cols,
    #hidden layers of 10 nodes each
    hidden_units = [30,10],
    # the model must choose one of three classes
    n_classes = 3)

W0731 18:01:23.178097 15412 estimator.py:1811] Using temporary folder as model directory: C:\Users\Corvus\AppData\Local\Temp\tmprbpx23pd


In [11]:
#now to train model

In [12]:
classifier.train(
    input_fn=lambda: input_fn(train,y_train,training=True), steps=5000)

W0731 18:01:23.217990 15412 deprecation.py:323] From C:\Users\Corvus\Anaconda3\lib\site-packages\tensorflow\python\training\training_util.py:236: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
W0731 18:01:23.934478 15412 deprecation.py:323] From C:\Users\Corvus\Anaconda3\lib\site-packages\tensorflow_estimator\python\estimator\head\base_head.py:574: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.
W0731 18:01:24.067104 15412 deprecation.py:506] From C:\Users\Corvus\Anaconda3\lib\site-packages\tensorflow\python\keras\optimizer_v2\adagrad.py:105: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future v

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x1f535d64208>

In [13]:
#evaluating the model

eval_result =classifier.evaluate(
input_fn=lambda: input_fn(test,y_test,training=False))

print('test accuracy: {accuracy:0.3f}'.format(**eval_result))

W0731 18:01:32.748054 15412 deprecation.py:323] From C:\Users\Corvus\Anaconda3\lib\site-packages\tensorflow\python\training\saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


test accuracy: 0.933


In [14]:
# Generate predictions from the model
expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
    'sepal_length': [5.1, 5.9, 6.9],
    'sepal_width':  [3.3, 3.0, 3.1],
    'petal_length': [1.7, 4.2, 5.4],
    'petal_width':  [0.5, 1.5, 2.1],
}

def input_fn(features,batch_size=256):
    """input function for predictions"""
    #convert inputs to dataset w/ labels
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)
  
predictions = classifier.predict(
    input_fn=lambda: input_fn(predict_x))

In [15]:
for pred_dict, expec in zip(predictions, expected):
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print('Prediction is "{}" ({:.1f}%), expected "{}"'.format(
        SPECIES[class_id], 100 * probability, expec))

Prediction is "Setosa" (86.8%), expected "Setosa"
Prediction is "Versicolor" (45.2%), expected "Versicolor"
Prediction is "Virginica" (45.6%), expected "Virginica"
