In [91]:
import pandas as pd

In [92]:
from sklearn.datasets import load_iris

In [93]:
iris = load_iris()

In [94]:
print(iris['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [95]:
df = pd.DataFrame(iris['data'],columns=iris['feature_names'])

In [96]:
iris.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [97]:
df['target'] = pd.DataFrame(iris['target'])

In [98]:
df.head(100)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
6,4.6,3.4,1.4,0.3,0
7,5.0,3.4,1.5,0.2,0
8,4.4,2.9,1.4,0.2,0
9,4.9,3.1,1.5,0.1,0


In [99]:
df.columns

Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
       'petal width (cm)', 'target'],
      dtype='object')

In [100]:
df.columns = ['sepal_length','sepal_width','petal_length','petal_width','target']

In [101]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [102]:
df['target'] = df['target'].apply(int) # Target must be integer

In [103]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [104]:
from sklearn.model_selection import train_test_split

In [105]:
X = df.drop('target',axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3)

In [106]:
import tensorflow as tf

In [107]:
X.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], dtype='object')

In [108]:
feat_cols = []
for col in X.columns:
    feat_cols.append(tf.feature_column.numeric_column(col))

In [109]:
feat_cols

[_NumericColumn(key='sepal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='sepal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='petal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='petal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [110]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=20,num_epochs=5,shuffle=True)

In [111]:
classifier = tf.estimator.DNNClassifier(hidden_units=[10,20,10],n_classes=3,feature_columns=feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\nilto\\AppData\\Local\\Temp\\tmpiuhoabdb', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002302957D4E0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [112]:
classifier.train(input_fn=input_func,steps=50)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\nilto\AppData\Local\Temp\tmpiuhoabdb\model.ckpt.
INFO:tensorflow:loss = 31.136366, step = 1
INFO:tensorflow:Saving checkpoints for 27 into C:\Users\nilto\AppData\Local\Temp\tmpiuhoabdb\model.ckpt.
INFO:tensorflow:Loss for final step: 2.800305.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x2302957d4a8>

In [113]:
pred_fn = tf.estimator.inputs.pandas_input_fn(x=X_test,batch_size=len(X_test),shuffle=False)

In [114]:
predictions = list(classifier.predict(input_fn=pred_fn))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\nilto\AppData\Local\Temp\tmpiuhoabdb\model.ckpt-27
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [115]:
predictions

[{'logits': array([-6.585446 , -2.383134 , -1.7476562], dtype=float32),
  'probabilities': array([0.00515382, 0.3444849 , 0.65036124], dtype=float32),
  'class_ids': array([2], dtype=int64),
  'classes': array([b'2'], dtype=object)},
 {'logits': array([-5.5900154, -2.382458 , -2.1909811], dtype=float32),
  'probabilities': array([0.01796822, 0.4441499 , 0.53788185], dtype=float32),
  'class_ids': array([2], dtype=int64),
  'classes': array([b'2'], dtype=object)},
 {'logits': array([-5.2941527, -2.3088262, -2.1815002], dtype=float32),
  'probabilities': array([0.0231088 , 0.45739165, 0.5194996 ], dtype=float32),
  'class_ids': array([2], dtype=int64),
  'classes': array([b'2'], dtype=object)},
 {'logits': array([-5.0534477, -2.0289295, -1.7506272], dtype=float32),
  'probabilities': array([0.02050302, 0.4220359 , 0.5574611 ], dtype=float32),
  'class_ids': array([2], dtype=int64),
  'classes': array([b'2'], dtype=object)},
 {'logits': array([-3.2853231, -1.7438116, -1.9919577], dtype=fl

In [116]:
final_preds = []

for pred in predictions:
    final_preds.append(pred['class_ids'][0])

In [117]:
final_preds

[2,
 2,
 2,
 2,
 1,
 2,
 0,
 2,
 0,
 2,
 2,
 2,
 0,
 2,
 0,
 2,
 2,
 0,
 0,
 2,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 2,
 2,
 0,
 1,
 2,
 2,
 0,
 1,
 0,
 0,
 0,
 2,
 0,
 1,
 0,
 2]

In [118]:
from sklearn.metrics import confusion_matrix, classification_report

In [119]:
print(confusion_matrix(y_test,final_preds))

[[19  0  0]
 [ 0  7  6]
 [ 0  0 13]]


In [120]:
print(classification_report(y_test,final_preds))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.54      0.70        13
           2       0.68      1.00      0.81        13

   micro avg       0.87      0.87      0.87        45
   macro avg       0.89      0.85      0.84        45
weighted avg       0.91      0.87      0.86        45

