In [1]:
# Import all the necessary modules, and define where to download and store the dataset
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
# urllib.urlopen()は削除されているので、別のurllib.request.urlopen()を使う
# http://docs.python.jp/2/library/urllib.html
# import urllib
import urllib.request

import tensorflow as tf
import numpy as np

IRIS_TRAINING = "iris_training.csv"
IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"

IRIS_TEST = "iris_test.csv"
IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

In [2]:
# Download if the training and test sets are not already stored locally
# 文字コードのせいか、正しくファイル読み書きができないので手動ダウンロードで配置
if not os.path.exists(IRIS_TRAINING):
  # raw = urllib.urlopen(IRIS_TRAINING_URL).read()
  raw = urllib.request.urlopen(IRIS_TRAINING_URL).read()
  with open(IRIS_TRAINING,'w') as f:
    f.write(raw)

if not os.path.exists(IRIS_TEST):
  # raw = urllib.urlopen(IRIS_TEST_URL).read()
  raw = urllib.request.urlopen(IRIS_TEST_URL).read()
  with open(IRIS_TEST,'w') as f:
    f.write(raw)

In [3]:
# Load datasets.
# tf.contrib.learn.datasetsには名前付きtupleで、
# データとターゲットを保持している。
# training_set.data, training_set.targetのようにアクセスが可能
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=IRIS_TRAINING,
    target_dtype=np.int,
    features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=IRIS_TEST,
    target_dtype=np.int,
    features_dtype=np.float32)

In [4]:
# Specify that all features have real-value data
# 入力するデータは4次元なので、全ての次元を使うようにshape=[4]
feature_columns = [tf.feature_column.numeric_column("x", shape=[4])]

# Build 3 layer DNN with 10, 20, 10 units respectively.
# 0-2の3クラスに分類するのでn_classes=3
classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns,
                                        hidden_units=[10, 20, 10],
                                        n_classes=3,
                                        model_dir="/tmp/iris_model")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_session_config': None, '_model_dir': '/tmp/iris_model', '_keep_checkpoint_max': 5, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000, '_tf_random_seed': 1, '_save_checkpoints_steps': None}


In [5]:
# Define the training inputs
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": np.array(training_set.data)},
    y=np.array(training_set.target),
    num_epochs=None,
    shuffle=True)

In [6]:
# Train model.
classifier.train(input_fn=train_input_fn, steps=4000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /tmp/iris_model\model.ckpt-8000
INFO:tensorflow:Saving checkpoints for 8001 into /tmp/iris_model\model.ckpt.
INFO:tensorflow:loss = 2.94745, step = 8001
INFO:tensorflow:global_step/sec: 576.492
INFO:tensorflow:loss = 7.95504, step = 8101 (0.171 sec)
INFO:tensorflow:global_step/sec: 623.856
INFO:tensorflow:loss = 5.37395, step = 8201 (0.161 sec)
INFO:tensorflow:global_step/sec: 617.53
INFO:tensorflow:loss = 5.41774, step = 8301 (0.161 sec)
INFO:tensorflow:global_step/sec: 619.649
INFO:tensorflow:loss = 2.54411, step = 8401 (0.162 sec)
INFO:tensorflow:global_step/sec: 625.289
INFO:tensorflow:loss = 4.61811, step = 8501 (0.159 sec)
INFO:tensorflow:global_step/sec: 588.377
INFO:tensorflow:loss = 4.12768, step = 8601 (0.170 sec)
INFO:tensorflow:global_step/sec: 655.854
INFO:tensorflow:loss = 7.08055, step = 8701 (0.153 sec)
INFO:tensorflow:global_step/sec: 683.111
INFO:tensorflow:loss = 3.93271, step = 880

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1ede9bda390>

In [7]:
# Define the test inputs
# テスト用のデータを定義して
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": np.array(test_set.data)},
    y=np.array(test_set.target),
    num_epochs=1,
    shuffle=False)

# Evaluate accuracy.
# evaluate()を使用することで、学習済みのclassifierの評価を行うことができる
# evaluate()はdictを返すので、必要な値をkeyを入力して取り出している。
accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]

print("\nTest Accuracy: {0:f}\n".format(accuracy_score))

INFO:tensorflow:Starting evaluation at 2017-09-04-15:02:19
INFO:tensorflow:Restoring parameters from /tmp/iris_model\model.ckpt-12000
INFO:tensorflow:Finished evaluation at 2017-09-04-15:02:19
INFO:tensorflow:Saving dict for global step 12000: accuracy = 0.966667, average_loss = 0.12649, global_step = 12000, loss = 3.79469

Test Accuracy: 0.966667



In [14]:
# Classify two new flower samples.
# 既存のデータだけでなく、(仮に得られたとして)別のデータの分類もできる。
# predict()はdictのgeneratorを返すのでlist化して、
# 最終結果を配列に出力している。
new_samples = np.array(
    [[6.4, 3.2, 4.5, 1.5],
     [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": new_samples},
    num_epochs=1,
    shuffle=False)

predictions = list(classifier.predict(input_fn=predict_input_fn))
print(predictions)
predicted_classes = [p["classes"] for p in predictions]

print(
    "New Samples, Class Predictions:    {}\n"
    .format(predicted_classes))

INFO:tensorflow:Restoring parameters from /tmp/iris_model\model.ckpt-8000
[{'probabilities': array([  4.54885640e-06,   9.99990225e-01,   5.21195898e-06], dtype=float32), 'classes': array([b'1'], dtype=object), 'logits': array([-5.67307234,  6.62755251, -5.53699255], dtype=float32), 'class_ids': array([1], dtype=int64)}, {'probabilities': array([  4.44002990e-06,   7.10272849e-01,   2.89722741e-01], dtype=float32), 'classes': array([b'1'], dtype=object), 'logits': array([-9.86227322,  2.12046981,  1.22374511], dtype=float32), 'class_ids': array([1], dtype=int64)}]
New Samples, Class Predictions:    [array([b'1'], dtype=object), array([b'1'], dtype=object)]

