In [2]:
# Implementation of a simple MLP network with one hidden layer. Tested on the iris data set.
# Requires: numpy, sklearn>=0.18.1, tensorflow>=1.0

# NOTE: In order to make the code simple, we rewrite x * W_1 + b_1 = x' * W_1'
# where x' = [x | 1] and W_1' is the matrix W_1 appended with a new row with elements b_1's.
# Similarly, for h * W_2 + b_2
import tensorflow as tf
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [3]:
RANDOM_SEED = 42
tf.set_random_seed(RANDOM_SEED)


def init_weights(shape):
    """ Weight initialization """
    weights = tf.random_normal(shape, stddev=0.1)
    return tf.Variable(weights)

def forwardprop(X, w_1, w_2):
    """
    Forward-propagation.
    IMPORTANT: yhat is not softmax since TensorFlow's softmax_cross_entropy_with_logits() does that internally.
    """
    h    = tf.nn.sigmoid(tf.matmul(X, w_1))  # The \sigma function
    yhat = tf.matmul(h, w_2)  # The \varphi function
    return yhat



In [4]:
""" Read the iris data set and split them into training and test sets """
iris   = datasets.load_iris()
data   = iris["data"]
target = iris["target"]
# Prepend the column of 1s for bias
N, M  = data.shape
all_X = np.ones((N, M + 1))
all_X[:, 1:] = data

# Convert into one-hot vectors
num_labels = len(np.unique(target))
all_Y = np.eye(num_labels)[target]  # One liner trick!
train_X, test_X, train_y, test_y = train_test_split(all_X, all_Y, test_size=0.33, random_state=RANDOM_SEED)
print(train_X)
print("-----------------------------")
print(test_X)
print("-----------------------------")
print(train_y)
print("-----------------------------")
print(test_y)
print("-----------------------------")

# Layer's sizes
x_size = train_X.shape[1]   # Number of input nodes: 4 features and 1 bias
h_size = 256                # Number of hidden nodes
y_size = train_y.shape[1]   # Number of outcomes (3 iris flowers)

# Symbols
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])

# Weight initializations
w_1 = init_weights((x_size, h_size))
w_2 = init_weights((h_size, y_size))

# Forward propagation
yhat    = forwardprop(X, w_1, w_2)
predict = tf.argmax(yhat, axis=1)

# Backward propagation
cost    = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=yhat))
updates = tf.train.GradientDescentOptimizer(0.01).minimize(cost)

# Run SGD
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

for epoch in range(100):
    # Train with each example
    for i in range(len(train_X)):
        sess.run(updates, feed_dict={X: train_X[i: i + 1], y: train_y[i: i + 1]})

    train_accuracy = np.mean(np.argmax(train_y, axis=1) ==
                             sess.run(predict, feed_dict={X: train_X, y: train_y}))
    test_accuracy  = np.mean(np.argmax(test_y, axis=1) ==
                             sess.run(predict, feed_dict={X: test_X, y: test_y}))

    print("Epoch = %d, train accuracy = %.2f%%, test accuracy = %.2f%%"
          % (epoch + 1, 100. * train_accuracy, 100. * test_accuracy))

sess.close()

[[ 1.   5.7  2.9  4.2  1.3]
 [ 1.   7.6  3.   6.6  2.1]
 [ 1.   5.6  3.   4.5  1.5]
 [ 1.   5.1  3.5  1.4  0.2]
 [ 1.   7.7  2.8  6.7  2. ]
 [ 1.   5.8  2.7  4.1  1. ]
 [ 1.   5.2  3.4  1.4  0.2]
 [ 1.   5.   3.5  1.3  0.3]
 [ 1.   5.1  3.8  1.9  0.4]
 [ 1.   5.   2.   3.5  1. ]
 [ 1.   6.3  2.7  4.9  1.8]
 [ 1.   4.8  3.4  1.9  0.2]
 [ 1.   5.   3.   1.6  0.2]
 [ 1.   5.1  3.3  1.7  0.5]
 [ 1.   5.6  2.7  4.2  1.3]
 [ 1.   5.1  3.4  1.5  0.2]
 [ 1.   5.7  3.   4.2  1.2]
 [ 1.   7.7  3.8  6.7  2.2]
 [ 1.   4.6  3.2  1.4  0.2]
 [ 1.   6.2  2.9  4.3  1.3]
 [ 1.   5.7  2.5  5.   2. ]
 [ 1.   5.5  4.2  1.4  0.2]
 [ 1.   6.   3.   4.8  1.8]
 [ 1.   5.8  2.7  5.1  1.9]
 [ 1.   6.   2.2  4.   1. ]
 [ 1.   5.4  3.   4.5  1.5]
 [ 1.   6.2  3.4  5.4  2.3]
 [ 1.   5.5  2.3  4.   1.3]
 [ 1.   5.4  3.9  1.7  0.4]
 [ 1.   5.   2.3  3.3  1. ]
 [ 1.   6.4  2.7  5.3  1.9]
 [ 1.   5.   3.3  1.4  0.2]
 [ 1.   5.   3.2  1.2  0.2]
 [ 1.   5.5  2.4  3.8  1.1]
 [ 1.   6.7  3.   5.   1.7]
 [ 1.   4.9  3.1  1.

Epoch = 30, train accuracy = 93.00%, test accuracy = 92.00%
Epoch = 31, train accuracy = 93.00%, test accuracy = 94.00%
Epoch = 32, train accuracy = 94.00%, test accuracy = 94.00%
Epoch = 33, train accuracy = 94.00%, test accuracy = 94.00%
Epoch = 34, train accuracy = 94.00%, test accuracy = 94.00%
Epoch = 35, train accuracy = 94.00%, test accuracy = 94.00%
Epoch = 36, train accuracy = 94.00%, test accuracy = 94.00%
Epoch = 37, train accuracy = 94.00%, test accuracy = 94.00%
Epoch = 38, train accuracy = 94.00%, test accuracy = 96.00%
Epoch = 39, train accuracy = 94.00%, test accuracy = 96.00%
Epoch = 40, train accuracy = 94.00%, test accuracy = 96.00%
Epoch = 41, train accuracy = 94.00%, test accuracy = 98.00%
Epoch = 42, train accuracy = 95.00%, test accuracy = 98.00%
Epoch = 43, train accuracy = 95.00%, test accuracy = 98.00%
Epoch = 44, train accuracy = 95.00%, test accuracy = 98.00%
Epoch = 45, train accuracy = 95.00%, test accuracy = 98.00%
Epoch = 46, train accuracy = 95.00%, tes