## Using TensorFlow to deal with iris data

### 1 Create a simple graph and run it:  $f(x,y) = x^2\cdot y + y + 2$

In [1]:
import tensorflow as tf

x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y + y + 2

In [2]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
sess.run(f)

42

In [3]:
sess.close()

In [4]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()
result

42

### 2 Classify iris data using Logistic Regression by TensorFlow

#### 2.1 Normal Equation: $\theta = (X^T\cdot X)^{-1}\cdot X^T\cdot y$

Note that label is a 1D array, but we need to reshape it to a column vector to compute theta.

In [5]:
import numpy as np
from sklearn.datasets import load_iris

dataset = load_iris()
feature = dataset.data
m, n = feature.data.shape
label = dataset.target

In [6]:
feature_add_bias = np.c_[np.ones((m, 1)), feature]
X = tf.constant(feature_add_bias, dtype=tf.float32, name="X")
y = tf.constant(label.reshape(-1, 1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)
error = tf.matmul(X, theta) - y
mse = tf.reduce_mean(tf.square(error), name="mse")

with tf.Session() as sess:
    theta_result = theta.eval()
    mse_result = mse.eval()
    
mse_result

0.046385087

In [7]:
theta_result

array([[ 0.19220488],
       [-0.10975894],
       [-0.04425819],
       [ 0.22700629],
       [ 0.609898  ]], dtype=float32)

#### 2.2 Using Batch Gradient Descent to fit iris data
Gradient: $ \nabla_{\theta}MSE(\theta) = \frac{2}{m}X^T\cdot(X\cdot\theta - y) $ <br>
Gradient Descent step: $ \theta^{(next step)} = \theta - \eta\cdot \nabla_{\theta}MSE(\theta) $

In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_feature = scaler.fit_transform(feature)

In [9]:
np.random.seed(42)

X = tf.constant(feature_add_bias, dtype=tf.float32, name="X")
y = tf.constant(label.reshape(-1, 1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0, seed=42), name="theta")
error = tf.matmul(X, theta) - y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), tf.matmul(X, theta) - y)
training = tf.assign(theta, theta - 0.01 * gradients)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for i in range(1000):
        sess.run(training)
    best_mse = mse.eval()
    theta_result = theta.eval()
    
best_mse

0.05250996

In [10]:
theta_result

array([[ 0.6631392 ],
       [-0.30217153],
       [ 0.03044995],
       [ 0.44131878],
       [ 0.29573858]], dtype=float32)

#### 2.3 Using TensorBoard for Data Visualization

In [11]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [12]:
mse_summary = tf.summary.scalar("MSE", mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

In [13]:
with tf.Session() as sess:
    sess.run(init)
    for iter in range(1000):
        if iter % 10 == 0:
            summary = sess.run(mse_summary)
            file_writer.add_summary(summary, iter)
        sess.run(training)
    best_mse = mse.eval()
    theta_result = theta.eval()

best_mse

0.05250996

#### 2.4 Retrain the Gradient Descent by optimazer

In [14]:
learning_rate = 0.001
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train = optimizer.minimize(mse)

In [15]:
with tf.Session() as sess:
    sess.run(init)
    for i in range(1000):
        sess.run(training)
    best_mse = mse.eval()
    theta_result = theta.eval()

best_mse

0.05250996

### 3 Classify iris data by using Deep Neural Network with TensorFlow

See [dnn_iris.py](dnn_iris.py)

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(feature, label, 
                                                    random_state=42, test_size=0.2)

In [17]:
# Defaults to a tf.float32 scalar.
SepalLength = tf.feature_column.numeric_column(key="SepalLength")
SepalWidth = tf.feature_column.numeric_column(key="SepalWidth")
PetalLength = tf.feature_column.numeric_column(key="PetalLength")
PetalWidth = tf.feature_column.numeric_column(key="PetalWidth")
columns = [SepalLength, SepalWidth, PetalLength, PetalWidth]

In [18]:
train_features = {"SepalLength": np.array(X_train[:, 0]),
                  "SepalWidth": np.array(X_train[:, 1]),
                  "PetalLength": np.array(X_train[:, 2]),
                  "PetalWidth": np.array(X_train[:, 3])}
train_labels = np.array(y_train)

In [19]:
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x=dict(train_features),
    y=train_labels,
    num_epochs=None,
    shuffle=True)

In [20]:
train_input_fn

<function tensorflow.python.estimator.inputs.numpy_io.numpy_input_fn.<locals>.input_fn()>

In [21]:
dataset = tf.data.Dataset.from_tensor_slices((dict(train_features), y_train))
dataset = dataset.shuffle(1000).repeat().batch(100)
input_fn = dataset.make_one_shot_iterator().get_next()

In [22]:
estimator = tf.estimator.DNNClassifier(
    feature_columns=columns,
    hidden_units=[1024, 512, 256],
    n_classes=3
)

INFO:tensorflow:Using default config.




INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\a\\AppData\\Local\\Temp\\tmpf2zmwrpx', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002DD1C2AD6D8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [23]:
estimator.train(input_fn=train_input_fn, steps=1000)

INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.




INFO:tensorflow:Saving checkpoints for 1 into C:\Users\a\AppData\Local\Temp\tmpf2zmwrpx\model.ckpt.


INFO:tensorflow:loss = 138.8454, step = 1


INFO:tensorflow:global_step/sec: 27.5466


INFO:tensorflow:loss = 15.863358, step = 101 (3.633 sec)


INFO:tensorflow:global_step/sec: 30.1187


INFO:tensorflow:loss = 8.606751, step = 201 (3.320 sec)


INFO:tensorflow:global_step/sec: 29.2295


INFO:tensorflow:loss = 11.907811, step = 301 (3.421 sec)


INFO:tensorflow:global_step/sec: 28.5779


INFO:tensorflow:loss = 3.164533, step = 401 (3.499 sec)


INFO:tensorflow:global_step/sec: 28.1912


INFO:tensorflow:loss = 3.4470944, step = 501 (3.547 sec)


INFO:tensorflow:global_step/sec: 29.0343


INFO:tensorflow:loss = 10.6713915, step = 601 (3.444 sec)


INFO:tensorflow:global_step/sec: 30.4304


INFO:tensorflow:loss = 8.8376465, step = 701 (3.286 sec)


INFO:tensorflow:global_step/sec: 30.6825


INFO:tensorflow:loss = 8.993654, step = 801 (3.259 sec)


INFO:tensorflow:global_step/sec: 30.8148


INFO:tensorflow:loss = 3.3819172, step = 901 (3.244 sec)


INFO:tensorflow:Saving checkpoints for 1000 into C:\Users\a\AppData\Local\Temp\tmpf2zmwrpx\model.ckpt.


INFO:tensorflow:Loss for final step: 8.268209.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x2dd1c29fe48>

In [26]:
test_features = {"SepalLength": np.array(X_test[:, 0]),
                  "SepalWidth": np.array(X_test[:, 1]),
                  "PetalLength": np.array(X_test[:, 2]),
                  "PetalWidth": np.array(X_test[:, 3])}
test_labels = np.array(y_test)
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x=test_features,
    y=test_labels,
    num_epochs=None,
    shuffle=True)

In [27]:
eval = estimator.evaluate(test_input_fn, steps=10)
eval["accuracy"]

INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2018-05-05-21:54:45


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from C:\Users\a\AppData\Local\Temp\tmpf2zmwrpx\model.ckpt-1000


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Evaluation [1/10]


INFO:tensorflow:Evaluation [2/10]


INFO:tensorflow:Evaluation [3/10]


INFO:tensorflow:Evaluation [4/10]


INFO:tensorflow:Evaluation [5/10]


INFO:tensorflow:Evaluation [6/10]


INFO:tensorflow:Evaluation [7/10]


INFO:tensorflow:Evaluation [8/10]


INFO:tensorflow:Evaluation [9/10]


INFO:tensorflow:Evaluation [10/10]




INFO:tensorflow:Finished evaluation at 2018-05-05-21:54:46


INFO:tensorflow:Saving dict for global step 1000: accuracy = 1.0, average_loss = 0.06023919, global_step = 1000, loss = 7.710616


1.0