In [11]:
import tensorflow as tf
from functools import partial
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import StandardScaler

# load MNIST
df_train = pd.read_csv("../datasets/mnist_train.csv")
X_train_noScale = df_train.iloc[:].drop("5", axis=1).values
y_train = df_train.iloc[:]["5"].values.reshape(-1,1)
df_test = pd.read_csv("../datasets/mnist_test.csv")
X_test = df_test.drop("7", axis=1).values
y_test = df_test["7"].values.reshape(-1,1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_noScale)
X_test = scaler.transform(X_test)

# initialize
tf.reset_default_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10
learning_rate = 0.01
n_epochs = 200
batch_size = 50
dropout_rate = 0.5
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
training = tf.placeholder_with_default(False, shape=(), name="training")
my_batch_norm_layer = partial(tf.layers.batch_normalization, training=training, momentum=0.9)

# layers
with tf.name_scope("dnn"):
    X_drop = tf.layers.dropout(X, dropout_rate, training=training)
    with tf.name_scope("hidden1"):
        hidden1 = tf.layers.dense(X_drop, n_hidden1, name="hidden1")
        hidden1_drop = tf.layers.dropout(hidden1, dropout_rate, training=training)
        bn1 = my_batch_norm_layer(hidden1_drop)
        bn1_act = tf.nn.elu(bn1)
    with tf.name_scope("hidden2"):
        hidden2 = tf.layers.dense(bn1_act, n_hidden2, name="hidden2")
        hidden2_drop = tf.layers.dropout(hidden2, dropout_rate, training=training)
        bn2 = my_batch_norm_layer(hidden2_drop)
        bn2_act = tf.nn.elu(bn2)
    with tf.name_scope("outputs"):
        logits_before_bn = tf.layers.dense(bn2_act, n_outputs, name="outputs")
        logits = my_batch_norm_layer(logits_before_bn)

# loss function
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=tf.cast(tf.reshape(y,[-1]), tf.int32), logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
# train
with tf.name_scope("train"):
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, 
                                           momentum=0.9, use_nesterov=True)
    training_op = optimizer.minimize(loss)
    
# evaluation
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, tf.cast(tf.reshape(y,[-1]), tf.int32), 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
logdir = "../tf_logs/run-{}/".format(now)
file_writer = tf.summary.FileWriter(logdir, graph=tf.get_default_graph())
accuracy_summary = tf.summary.scalar("accuracy", accuracy)

# run
init = tf.global_variables_initializer()
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        shuffled_index = np.random.permutation(len(X_train))
        X_train, y_train = X_train[shuffled_index], y_train[shuffled_index]
        X_train_noScale = X_train_noScale[shuffled_index]
        for i in range(len(X_train) // batch_size):
            X_batch = X_train[batch_size * i : batch_size * (i+1)]
            y_batch = y_train[batch_size * i : batch_size * (i+1)]
            sess.run([training_op, extra_update_ops], 
                     feed_dict={training: True, X: X_batch, y: y_batch})
        accuracy_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        file_writer.add_summary(accuracy_summary.eval(feed_dict={X: X_test, y: y_test}), epoch)
        print(epoch, "Test accuracy:", accuracy_test)
    Z = logits.eval(feed_dict={X: X_train[:]})
    y_pred = np.argmax(Z, axis=1)
    print(y_pred)
file_writer.flush()

# monitor
cl_a, cl_b = 3, 5
X_aa = X_train_noScale[(y_train.ravel() == cl_a) & (y_pred == cl_a)]
X_ab = X_train_noScale[(y_train.ravel() == cl_a) & (y_pred == cl_b)]
X_ba = X_train_noScale[(y_train.ravel() == cl_b) & (y_pred == cl_a)]
X_bb = X_train_noScale[(y_train.ravel() == cl_b) & (y_pred == cl_b)]
def plot_digits(instances, images_per_row=10, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.append(np.zeros((size, size * n_empty)))
    for row in range(n_rows):
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    plt.imshow(image, cmap=matplotlib.cm.binary, **options)
    plt.axis("off")
plt.figure(figsize=(8,8))
plt.subplot(221);
plot_digits(X_aa[:25], images_per_row=5)
plt.subplot(222);
plot_digits(X_ab[:25], images_per_row=5)
plt.subplot(223);
plot_digits(X_ba[:25], images_per_row=5)
plt.subplot(224);
plot_digits(X_bb[:25], images_per_row=5)
plt.savefig("../plots/ex_11_01.pdf")



0 Test accuracy: 0.909691
1 Test accuracy: 0.912191
2 Test accuracy: 0.919592
3 Test accuracy: 0.925393
4 Test accuracy: 0.928893
5 Test accuracy: 0.930593
6 Test accuracy: 0.932793
7 Test accuracy: 0.934893
8 Test accuracy: 0.938194
9 Test accuracy: 0.940694
10 Test accuracy: 0.940194
11 Test accuracy: 0.940894
12 Test accuracy: 0.940094
13 Test accuracy: 0.944494
14 Test accuracy: 0.945495
15 Test accuracy: 0.945695
16 Test accuracy: 0.947895
17 Test accuracy: 0.950895
18 Test accuracy: 0.949895
19 Test accuracy: 0.950795
20 Test accuracy: 0.949395
21 Test accuracy: 0.948195
22 Test accuracy: 0.953295
23 Test accuracy: 0.952695
24 Test accuracy: 0.953495
25 Test accuracy: 0.953195
26 Test accuracy: 0.953495
27 Test accuracy: 0.955696
28 Test accuracy: 0.956696
29 Test accuracy: 0.958096
30 Test accuracy: 0.956796
31 Test accuracy: 0.955296
32 Test accuracy: 0.952995
33 Test accuracy: 0.956596
34 Test accuracy: 0.956196
35 Test accuracy: 0.958296
36 Test accuracy: 0.957296
37 Test acc