In [1]:
# common packages
import tensorflow as tf
import numpy as np
import sys
import os

# displaying images
from matplotlib.pyplot import imshow
%matplotlib inline

# download data
from six.moves.urllib.request import urlopen


# Helper to make the output consistent
SEED = 42
def reset_graph(seed=SEED):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()


# set log level to supress messages, unless an error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Version information
print("Python: {}".format(sys.version_info[:]))
print('TensorFlow: {}'.format(tf.__version__))

# Check if using GPU
if not tf.test.gpu_device_name():
    print('No GPU found')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

Python: (3, 5, 4, 'final', 0)
TensorFlow: 1.4.0
Default GPU Device: /device:GPU:0


## Obtain and load data

In [2]:
## Download data paths
ROOT_DATA = "../ROOT_DATA/"
DATA_DIR = "IRIS"

IRIS_TRAINING_PATH = os.path.join(ROOT_DATA, DATA_DIR, "iris_training.csv")
IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"

IRIS_TEST_PATH = os.path.join(ROOT_DATA, DATA_DIR, "iris_test.csv")
IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

In [3]:
# download data
## training
if not os.path.exists(IRIS_TRAINING_PATH):
    raw = urlopen(IRIS_TRAINING_URL).read()
    with open(IRIS_TRAINING_PATH, "wb") as f:
        f.write(raw)
    print(IRIS_TRAINING_PATH, "path written")
else:
    print(IRIS_TRAINING_PATH, "path exists")

## test
if not os.path.exists(IRIS_TEST_PATH):
    raw = urlopen(IRIS_TEST_URL).read()
    with open(IRIS_TEST_PATH, "wb") as f:
        f.write(raw)
    print(IRIS_TEST_PATH, "path written")
else:
    print(IRIS_TEST_PATH, "path exists")

../ROOT_DATA/IRIS/iris_training.csv path exists
../ROOT_DATA/IRIS/iris_test.csv path exists


In [4]:
# Load dataset
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
                  filename=IRIS_TRAINING_PATH,
                  target_dtype=np.int,
                  features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
              filename=IRIS_TEST_PATH,
              target_dtype=np.int,
              features_dtype=np.float32)

## Classifier

### Dataset information

In [5]:
# print some dataset information
print("training shape:", training_set.data.shape)
print("test shape:", test_set.data.shape)

# quick check
assert training_set.data.shape[1] == test_set.data.shape[1], \
    "Rut row raggy, features don't match ({} vs {})".format(training_set.data.shape[1],
                                                            test_set.data.shape[0])
if training_set.data.shape[1] == test_set.data.shape[1]:
    NUM_FEATURES = training_set.data.shape[1]
    print("num features = {}".format(NUM_FEATURES))
    
print("training target information: {} targets, {} classes".format(\
    len(training_set.target), len(set(training_set.target))))
print("test target information: {} targets, {} classes".format(\
    len(test_set.target), len(set(test_set.target))))

training shape: (120, 4)
test shape: (30, 4)
num features = 4
training target information: 120 targets, 3 classes
test target information: 30 targets, 3 classes


In [6]:
# create feature column
# in this example training_set.data.shape[1] is 4
feature_columns = [tf.feature_column.numeric_column("x", shape=[NUM_FEATURES])]
if len(set(training_set.target)) == len(set(test_set.target)):
    NUM_CLASSES = len(set(training_set.target))
    print("Number of classes = {}".format(NUM_CLASSES))
else:
    print("number of classes in training and test set don't match")
    NUM_CLASSES = len(set(training_set.target))
    print("WARNING: num classes has been set to {} to match training set".format(NUM_CLASSES))

Number of classes = 3


### Build Basic DNN Classifier

In [7]:
# DNN classifier will be projected to 20, then filtered down to num classes
# hidden units = [input: NUM_FEATURES] -> 20, 16, 12, 8 -> [out: NUM_CLASSES]
classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
                                        hidden_units=[10, 20, 10],
                                        n_classes=NUM_CLASSES,
                                        model_dir="/tmp/iris_model")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fc5105ba8d0>, '_save_summary_steps': 100, '_tf_random_seed': None, '_keep_checkpoint_max': 5, '_num_worker_replicas': 1, '_save_checkpoints_steps': None, '_service': None, '_task_id': 0, '_num_ps_replicas': 0, '_master': '', '_model_dir': '/tmp/iris_model', '_is_chief': True, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_task_type': 'worker', '_keep_checkpoint_every_n_hours': 10000}


In [8]:
# create input function
# "x" will accept our input data
# y will accept our input labels
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": np.array(training_set.data)},
    y=np.array(training_set.target),
    num_epochs=None,
    shuffle=True)

### Train our classifier

In [9]:
classifier.train(input_fn=train_input_fn, steps=2500)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/iris_model/model.ckpt.
INFO:tensorflow:step = 1, loss = 201.049
INFO:tensorflow:global_step/sec: 375.226
INFO:tensorflow:step = 101, loss = 16.4933 (0.267 sec)
INFO:tensorflow:global_step/sec: 457.454
INFO:tensorflow:step = 201, loss = 8.19692 (0.220 sec)
INFO:tensorflow:global_step/sec: 402.874
INFO:tensorflow:step = 301, loss = 6.31408 (0.249 sec)
INFO:tensorflow:global_step/sec: 416.92
INFO:tensorflow:step = 401, loss = 9.12889 (0.238 sec)
INFO:tensorflow:global_step/sec: 396.901
INFO:tensorflow:step = 501, loss = 7.02289 (0.254 sec)
INFO:tensorflow:global_step/sec: 301.315
INFO:tensorflow:step = 601, loss = 22.6839 (0.331 sec)
INFO:tensorflow:global_step/sec: 397.084
INFO:tensorflow:step = 701, loss = 4.06732 (0.250 sec)
INFO:tensorflow:global_step/sec: 416.189
INFO:tensorflow:step = 801, loss = 7.456 (0.240 sec)
INFO:tensorflow:global_step/sec: 392.124
INFO:tensorflow:step = 901, loss = 

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7fc5105ad400>

### Evaluate our classifier

In [10]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": np.array(test_set.data)},
      y=np.array(test_set.target),
      num_epochs=1,
      shuffle=False)

In [15]:
test_metrics = classifier.evaluate(input_fn=test_input_fn)

INFO:tensorflow:Starting evaluation at 2017-11-06-15:36:55
INFO:tensorflow:Restoring parameters from /tmp/iris_model/model.ckpt-2500
INFO:tensorflow:Finished evaluation at 2017-11-06-15:36:55
INFO:tensorflow:Saving dict for global step 2500: accuracy = 0.966667, average_loss = 0.0492171, global_step = 2500, loss = 1.47651


In [19]:
print("Test accuracy: {:.4f}%".format(test_metrics["accuracy"]*100))

Test accuracy: 96.6667%
