In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
CSV_COL_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']

In [3]:
df = pd.read_csv('./iris.data.csv', names=CSV_COL_NAMES, header=0)
df = df.sample(frac=1).reset_index(drop=True)
df.head()


Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
0,5.9,3.0,5.1,1.8,Iris-virginica
1,6.0,2.9,4.5,1.5,Iris-versicolor
2,4.8,3.4,1.9,0.2,Iris-setosa
3,6.1,2.8,4.7,1.2,Iris-versicolor
4,7.0,3.2,4.7,1.4,Iris-versicolor


In [4]:
train_data = df[:119]
test_data = df[119:]

In [5]:
train_label = train_data[train_data.columns[-1]]
train_data = train_data.drop(train_data.columns[-1], axis=1)

test_label = test_data[test_data.columns[-1]]
test_data = test_data.drop(test_data.columns[-1], axis=1)

In [6]:
mean_std_dict = {col: {'mean':df[col].mean(), 'std': df[col].std()} for col in train_data.columns}

def sl_normalize(x):
    return (x - mean_std_dict['SepalLength']['mean'])/mean_std_dict['SepalLength']['std']

def sw_normalize(x):
    return (x - mean_std_dict['SepalWidth']['mean'])/mean_std_dict['SepalWidth']['std']

def pl_normalize(x):
    return (x - mean_std_dict['PetalLength']['mean'])/mean_std_dict['PetalLength']['std']

def pw_normalize(x):
    return (x - mean_std_dict['PetalWidth']['mean'])/mean_std_dict['PetalWidth']['std']

In [7]:
my_feature_cols = []
my_feature_cols.append(tf.feature_column.numeric_column(key=CSV_COL_NAMES[0], normalizer_fn=sl_normalize))
my_feature_cols.append(tf.feature_column.numeric_column(key=CSV_COL_NAMES[1], normalizer_fn=sw_normalize))
my_feature_cols.append(tf.feature_column.numeric_column(key=CSV_COL_NAMES[2], normalizer_fn=pl_normalize))
my_feature_cols.append(tf.feature_column.numeric_column(key=CSV_COL_NAMES[3], normalizer_fn=pw_normalize))

In [8]:
class Model(object):

    def __init__(self, feature_cols):
        super(Model, self).__init__()
        self.feature_cols = feature_cols
    
    def __call__(self, input):
        net = tf.feature_column.input_layer(feature_columns=self.feature_cols, features=input)
        net = tf.layers.dense(inputs=net, units=10, activation=tf.nn.relu, name='layer_1')
        net = tf.layers.dense(inputs=net, units=10, activation=tf.nn.relu, name='layer_2')
        net = tf.layers.dense(inputs=net, units=3, activation=None, name='logits')
        return net

In [9]:
def model_fn(features, labels, mode):

    model = Model(my_feature_cols)
    global_steps = tf.train.get_global_step()

    logits = model(features)
    logits = tf.cast(logits, tf.float64)
    pred_logits = tf.argmax(logits, axis=1, output_type=tf.int64)
    probs = tf.nn.softmax(logits)

    predictions = {
        'pred_logits': pred_logits,
        'probabilities': probs
    }
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(predictions=predictions, mode=mode)

    with tf.name_scope('loss'):
        error = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits, scope='loss')
        tf.summary.scalar('loss', error)
    
    with tf.name_scope('accuracy'):
        accuracy = tf.metrics.accuracy(labels=labels, predictions=pred_logits, name='acc')
        tf.summary.scalar('accuracy',accuracy[1])
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode, loss=error, eval_metric_ops={
            'accuracy/accuracy':accuracy
        }, evaluation_hooks=None)
    
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(error, global_step=global_steps)
    train_hooks_list = []
    train_tensor_log ={'accuracy': accuracy[1], 'loss':error, 'global_steps':global_steps}
    train_hooks_list.append(tf.train.LoggingTensorHook(tensors=train_tensor_log, every_n_iter=100))

    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode, loss=error, train_op=optimizer, training_hooks=train_hooks_list)

In [10]:
test_label.unique().tolist()

['Iris-virginica', 'Iris-versicolor', 'Iris-setosa']

In [11]:
def labels_nor(x):
    tmp = { v:i for i,v in enumerate(df['Species'].unique().tolist())}
    return x.apply(lambda x : tmp[x])

def get_input_fn(features, labels, batch_size=32, shuffle=True, num_epoch=1000):
    
    def input_fn():
        if labels is not None:
            dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels_nor(labels)))
        else:
            dataset = tf.data.Dataset.from_tensor_slices(dict(features))
        if shuffle:
            dataset = dataset.shuffle(1000)
        dataset = dataset.batch(batch_size).repeat(num_epoch)
        return dataset
    return input_fn

train_input_fn = get_input_fn(train_data, train_label)
eval_input_fn = get_input_fn(test_data, test_label, shuffle=False)

In [12]:
classifier = tf.estimator.Estimator(model_fn=model_fn, model_dir='./Model')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './Model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000025F03167160>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [13]:
classifier.train(train_input_fn)
metric = classifier.evaluate(eval_input_fn)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorfl

In [14]:
 predict_ = {
     'SepalLength': [5.1, 5.9, 6.9],
     'SepalWidth': [3.3, 3.0, 3.1],
     'PetalLength': [1.7, 4.2, 5.4],
     'PetalWidth': [0.5, 1.5, 2.1]
 }
 df_test = pd.DataFrame(predict_)
 df_test_fn = get_input_fn(df_test, None, shuffle=False, num_epoch=1)

In [15]:
preds = classifier.predict(df_test_fn)

In [16]:
list(preds)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./Model\model.ckpt-4000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


[{'pred_logits': 2,
  'probabilities': array([9.69825594e-06, 6.15165768e-05, 9.99928785e-01])},
 {'pred_logits': 1,
  'probabilities': array([7.75048770e-04, 9.99191315e-01, 3.36365912e-05])},
 {'pred_logits': 0,
  'probabilities': array([9.99954626e-01, 4.40758918e-05, 1.29762657e-06])}]