In [4]:
import pandas as pd
import tensorflow as tf

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'Species']

SPECIES = ['Sentosa', 'Versicolor', 'Virginica']

def maybe_download():
    train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
    return train_path, test_path

def load_data(y_name='Species'):
    """Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
    train_path, test_path = maybe_download()
    
    train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
    train_x, train_y = train, train.pop(y_name)
    
    print type(train),type(train_y)

    test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
    test_x, test_y = test, test.pop(y_name)

    return (train_x, train_y), (test_x, test_y)

(train_feature, train_label), (test_feature, test_label) = load_data()


<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>


In [5]:
my_feature_columns = []
for key in train_feature.keys():
        my_feature_columns.append(tf.feature_column.numeric_column(key=key))

print my_feature_columns

[_NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='SepalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


In [6]:
classifier = tf.estimator.DNNClassifier(
    feature_columns = my_feature_columns,
    hidden_units = [10,10],
    n_classes = 3
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7efbf86d8910>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/tmp/tmpvfRvi3', '_save_summary_steps': 100}


In [7]:
def train_input_fn(features, labels, batch_size):
    """An input function for training"""

    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # Return the read end of the pipeline.
    return dataset.make_one_shot_iterator().get_next()

In [8]:
def eval_input_fn(features, labels, batch_size):
    """An input function for evaluation or prediction"""
    features=dict(features)
    if labels is None:
        # No labels, use only features.
        inputs = features
    else:
        inputs = (features, labels)

    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    # Batch the examples
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)

    # Return the read end of the pipeline.
    return dataset.make_one_shot_iterator().get_next()

In [13]:
(train_x, train_y), (test_x, test_y) = load_data()
# Train the Model.
classifier.train(input_fn=lambda:train_input_fn(train_x, train_y,100),steps=1000)

# Evaluate the model.
eval_result = classifier.evaluate(input_fn=lambda:eval_input_fn(test_x, test_y,100))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'>
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpvfRvi3/model.ckpt.
INFO:tensorflow:loss = 154.52, step = 1
INFO:tensorflow:global_step/sec: 806.067
INFO:tensorflow:loss = 26.7912, step = 101 (0.125 sec)
INFO:tensorflow:global_step/sec: 980.738
INFO:tensorflow:loss = 11.192, step = 201 (0.102 sec)
INFO:tensorflow:global_step/sec: 1004.54
INFO:tensorflow:loss = 9.43229, step = 301 (0.100 sec)
INFO:tensorflow:global_step/sec: 997.316
INFO:tensorflow:loss = 7.61455, step = 401 (0.100 sec)
INFO:tensorflow:global_step/sec: 996.731
INFO:tensorflow:loss = 5.74416, step = 501 (0.100 sec)
INFO:tensorflow:global_step/sec: 942.96
INFO:tensorflow:loss = 6.26401, step = 601 (0.105 sec)
INFO:tensorflow:global_step/sec: 1003.9
INFO:tensorflow:loss = 7.42368, step = 701 (0.100 sec)
INFO:tensorflow:global_step/sec: 880.259
INFO:tensorflow:loss = 10.2062, step = 801 (0.113 sec)
INFO

In [16]:
    # Generate predictions from the model
    expected = ['Setosa', 'Versicolor', 'Virginica']
    predict_x = {
        'SepalLength': [5.1, 5.9, 6.9],
        'SepalWidth': [3.3, 3.0, 3.1],
        'PetalLength': [1.7, 4.2, 5.4],
        'PetalWidth': [0.5, 1.5, 2.1],
    }

    
    predictions = classifier.predict(
        input_fn=lambda:eval_input_fn(predict_x,labels=None,batch_size=100))

    for pred_dict, expec in zip(predictions, expected):
        template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
        class_id = pred_dict['class_ids'][0]
        probability = pred_dict['probabilities'][class_id]
        print template.format(SPECIES[class_id],100 * probability, expec)

INFO:tensorflow:Restoring parameters from /tmp/tmpvfRvi3/model.ckpt-1000

Prediction is "Sentosa" (99.9%), expected "Setosa"

Prediction is "Versicolor" (99.6%), expected "Versicolor"

Prediction is "Virginica" (94.8%), expected "Virginica"
