In [1]:
import tensorflow as tf
import numpy as np

# Some functions for deep neural network
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)
    
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch
        
def max_norm_regularizer(threshold, axes=1, name="max_norm",
                         collection="max_norm"):
    def max_norm(weights):
        clipped = tf.clip_by_norm(weights, clip_norm=threshold, axes=axes)
        clip_weights = tf.assign(weights, clipped, name=name)
        tf.add_to_collection(collection, clip_weights)
        return None 
    return max_norm

#Fetching Mnist Data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

## Dropout NN 3 Layers(ReLU) 1024

In [2]:
import os
n_inputs = 28*28  # MNIST
n_hidden1 = 1024
n_hidden2 = 1024
n_hidden3 = 1024
n_outputs = 10

reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')
dropout_rate = 0.5  # == 1 - keep_prob
X_drop = tf.layers.dropout(X, 0.8, training=training)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, name="hidden1",
                              activation=tf.nn.relu)
    hidden2 = tf.layers.dense(tf.layers.dropout(hidden1, dropout_rate, training=training), n_hidden2, name="hidden2",
                              activation=tf.nn.relu)
    hidden3 = tf.layers.dense(tf.layers.dropout(hidden2, dropout_rate, training=training), n_hidden3, name="hidden3",
                              activation=tf.nn.relu)
    logits = tf.layers.dense(tf.layers.dropout(hidden3, dropout_rate, training=training), n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)
logdir = log_dir("mnist_dnn")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
m, n = X_train.shape
n_epochs = 10001
batch_size = 50
n_batches = int(np.ceil(m / batch_size))

checkpoint_path = "/tmp/mnist_relu.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./mnist_relu"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Previous train was interrupted, will start at epoch {}".format(start_epoch))
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)

    for epoch in range(start_epoch, n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation set accuracy: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 5
                if epochs_without_progress > max_epochs_without_progress:
                    print("termination")
                    break

Epoch: 0 	Validation set accuracy: 91.660% 	Loss: 0.30961
Epoch: 5 	Validation set accuracy: 95.980% 	Loss: 0.14728
Epoch: 10 	Validation set accuracy: 97.360% 	Loss: 0.09577
Epoch: 15 	Validation set accuracy: 97.660% 	Loss: 0.08058
Epoch: 20 	Validation set accuracy: 97.940% 	Loss: 0.07182
Epoch: 25 	Validation set accuracy: 98.020% 	Loss: 0.06575
Epoch: 30 	Validation set accuracy: 98.000% 	Loss: 0.06732
Epoch: 35 	Validation set accuracy: 98.040% 	Loss: 0.06660
Epoch: 40 	Validation set accuracy: 98.120% 	Loss: 0.06518
Epoch: 45 	Validation set accuracy: 98.180% 	Loss: 0.06644
Epoch: 50 	Validation set accuracy: 98.140% 	Loss: 0.06653
Epoch: 55 	Validation set accuracy: 98.060% 	Loss: 0.06866
Epoch: 60 	Validation set accuracy: 98.080% 	Loss: 0.07063
Epoch: 65 	Validation set accuracy: 98.020% 	Loss: 0.07107
Epoch: 70 	Validation set accuracy: 98.100% 	Loss: 0.07233
Epoch: 75 	Validation set accuracy: 98.120% 	Loss: 0.07203
Epoch: 80 	Validation set accuracy: 98.100% 	Loss: 0.07323

## Dropout NN 3 Layers(Logistic) 1024

In [3]:
n_inputs = 28*28
n_hidden1 = 1024
n_hidden2 = 1024
n_hidden3 = 1024
n_outputs = 10

reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')

# Set the dropout rate
dropout_rate = 0.5  
X_drop = tf.layers.dropout(X, 0.8, training=training)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, name="hidden1",
                              activation=tf.nn.sigmoid)
    hidden2 = tf.layers.dense(tf.layers.dropout(hidden1, dropout_rate, training=training), n_hidden2, name="hidden2",
                              activation=tf.nn.sigmoid)
    hidden3 = tf.layers.dense(tf.layers.dropout(hidden2, dropout_rate, training=training), n_hidden3, name="hidden3",
                              activation=tf.nn.sigmoid)
    logits = tf.layers.dense(tf.layers.dropout(hidden3, dropout_rate, training=training), n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)
logdir = log_dir("mnist_dnn")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
m, n = X_train.shape
n_epochs = 10001
batch_size = 50
n_batches = int(np.ceil(m / batch_size))

checkpoint_path = "/tmp/mnist_logistic.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./mnist_logistic"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Previous train was interrupted, will start at epoch {}".format(start_epoch))
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)

    for epoch in range(start_epoch, n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation set accuracy: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 5
                if epochs_without_progress > max_epochs_without_progress:
                    print("termination")
                    break

Epoch: 0 	Validation set accuracy: 11.260% 	Loss: 2.30426
Epoch: 5 	Validation set accuracy: 54.460% 	Loss: 1.51349
Epoch: 10 	Validation set accuracy: 82.520% 	Loss: 0.62264
Epoch: 15 	Validation set accuracy: 86.900% 	Loss: 0.44661
Epoch: 20 	Validation set accuracy: 89.080% 	Loss: 0.38989
Epoch: 25 	Validation set accuracy: 89.820% 	Loss: 0.35892
Epoch: 30 	Validation set accuracy: 90.660% 	Loss: 0.33679
Epoch: 35 	Validation set accuracy: 90.960% 	Loss: 0.32228
Epoch: 40 	Validation set accuracy: 91.260% 	Loss: 0.31592
Epoch: 45 	Validation set accuracy: 91.720% 	Loss: 0.30399
Epoch: 50 	Validation set accuracy: 91.980% 	Loss: 0.29383
Epoch: 55 	Validation set accuracy: 91.940% 	Loss: 0.28481
Epoch: 60 	Validation set accuracy: 92.180% 	Loss: 0.27952
Epoch: 65 	Validation set accuracy: 92.260% 	Loss: 0.27179
Epoch: 70 	Validation set accuracy: 92.360% 	Loss: 0.26453
Epoch: 75 	Validation set accuracy: 92.600% 	Loss: 0.25924
Epoch: 80 	Validation set accuracy: 92.500% 	Loss: 0.25503

## Dropout NN 3 Layers(ReLU) with max-norm constraint 1024 units

In [4]:
import os
n_inputs = 28*28  # MNIST
n_hidden1 = 1024
n_hidden2 = 1024
n_hidden3 = 1024
n_outputs = 10

reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')

# Implement dropout
dropout_rate = 0.5  # == 1 - keep_prob
X_drop = tf.layers.dropout(X, 0.8, training=training)

max_norm_reg = max_norm_regularizer(threshold = 1.0)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, name="hidden1",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    hidden2 = tf.layers.dense(tf.layers.dropout(hidden1, dropout_rate, training=training), n_hidden2, name="hidden2",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    hidden3 = tf.layers.dense(tf.layers.dropout(hidden2, dropout_rate, training=training), n_hidden3, name="hidden3",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    logits = tf.layers.dense(tf.layers.dropout(hidden3, dropout_rate, training=training), n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)
logdir = log_dir("mnist_dnn")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
m, n = X_train.shape
n_epochs = 10001
batch_size = 50
n_batches = int(np.ceil(m / batch_size))

checkpoint_path = "/tmp/mnist_relu_norm.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./mnist_relu_norm"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Previous train was interrupted, will start at epoch {}".format(start_epoch))
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)

    for epoch in range(start_epoch, n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation set accuracy: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 5
                if epochs_without_progress > max_epochs_without_progress:
                    print("termination")
                    break

Epoch: 0 	Validation set accuracy: 91.580% 	Loss: 0.31120
Epoch: 5 	Validation set accuracy: 95.960% 	Loss: 0.14384
Epoch: 10 	Validation set accuracy: 97.520% 	Loss: 0.09419
Epoch: 15 	Validation set accuracy: 97.700% 	Loss: 0.07958
Epoch: 20 	Validation set accuracy: 97.960% 	Loss: 0.06950
Epoch: 25 	Validation set accuracy: 98.040% 	Loss: 0.06496
Epoch: 30 	Validation set accuracy: 98.140% 	Loss: 0.06622
Epoch: 35 	Validation set accuracy: 98.140% 	Loss: 0.06571
Epoch: 40 	Validation set accuracy: 98.240% 	Loss: 0.06448
Epoch: 45 	Validation set accuracy: 98.260% 	Loss: 0.06591
Epoch: 50 	Validation set accuracy: 98.260% 	Loss: 0.06580
Epoch: 55 	Validation set accuracy: 98.380% 	Loss: 0.06711
Epoch: 60 	Validation set accuracy: 98.360% 	Loss: 0.06883
Epoch: 65 	Validation set accuracy: 98.340% 	Loss: 0.06975
Epoch: 70 	Validation set accuracy: 98.380% 	Loss: 0.07062
Epoch: 75 	Validation set accuracy: 98.320% 	Loss: 0.07063
Epoch: 80 	Validation set accuracy: 98.420% 	Loss: 0.07167

## Dropout NN 3 Layers(ReLU) with max-norm constraint 2048 units

In [5]:
import os
n_inputs = 28*28  
n_hidden1 = 2048
n_hidden2 = 2048
n_hidden3 = 2048
n_outputs = 10

reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')

# Implement dropout
dropout_rate = 0.5  # == 1 - keep_prob
X_drop = tf.layers.dropout(X, 0.8, training=training)

max_norm_reg = max_norm_regularizer(threshold = 1.0)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, name="hidden1",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    hidden2 = tf.layers.dense(tf.layers.dropout(hidden1, dropout_rate, training=training), n_hidden2, name="hidden2",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    hidden3 = tf.layers.dense(tf.layers.dropout(hidden2, dropout_rate, training=training), n_hidden3, name="hidden3",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    logits = tf.layers.dense(tf.layers.dropout(hidden3, dropout_rate, training=training), n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)
logdir = log_dir("mnist_dnn")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
m, n = X_train.shape
n_epochs = 10001
batch_size = 50
n_batches = int(np.ceil(m / batch_size))

checkpoint_path = "/tmp/mnist_relu_norm2048.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./mnist_relu_norm2048"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Previous train was interrupted, will start at epoch {}".format(start_epoch))
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)

    for epoch in range(start_epoch, n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation set accuracy: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 5
                if epochs_without_progress > max_epochs_without_progress:
                    print("termination")
                    break

Epoch: 0 	Validation set accuracy: 91.960% 	Loss: 0.30055
Epoch: 5 	Validation set accuracy: 96.240% 	Loss: 0.14103
Epoch: 10 	Validation set accuracy: 97.500% 	Loss: 0.09193
Epoch: 15 	Validation set accuracy: 97.760% 	Loss: 0.07634
Epoch: 20 	Validation set accuracy: 98.120% 	Loss: 0.06699
Epoch: 25 	Validation set accuracy: 98.100% 	Loss: 0.06271
Epoch: 30 	Validation set accuracy: 98.120% 	Loss: 0.06459
Epoch: 35 	Validation set accuracy: 98.320% 	Loss: 0.06318
Epoch: 40 	Validation set accuracy: 98.280% 	Loss: 0.06130
Epoch: 45 	Validation set accuracy: 98.300% 	Loss: 0.06202
Epoch: 50 	Validation set accuracy: 98.420% 	Loss: 0.06210
Epoch: 55 	Validation set accuracy: 98.380% 	Loss: 0.06355
Epoch: 60 	Validation set accuracy: 98.380% 	Loss: 0.06450
Epoch: 65 	Validation set accuracy: 98.300% 	Loss: 0.06554
Epoch: 70 	Validation set accuracy: 98.300% 	Loss: 0.06635
Epoch: 75 	Validation set accuracy: 98.360% 	Loss: 0.06572
Epoch: 80 	Validation set accuracy: 98.340% 	Loss: 0.06655

## Dropout NN 2 Layers(ReLU) with max-norm constraint 4096 units

In [6]:
import os
n_inputs = 28*28  
n_hidden1 = 4096
n_hidden2 = 4096
n_outputs = 10

reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')

# Implement dropout
dropout_rate = 0.5  # == 1 - keep_prob
X_drop = tf.layers.dropout(X, 0.8, training=training)

max_norm_reg = max_norm_regularizer(threshold = 1.0)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, name="hidden1",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    hidden2 = tf.layers.dense(tf.layers.dropout(hidden1, dropout_rate, training=training), n_hidden2, name="hidden2",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    logits = tf.layers.dense(tf.layers.dropout(hidden2, dropout_rate, training=training), n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)
logdir = log_dir("mnist_dnn")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
m, n = X_train.shape
n_epochs = 10001
batch_size = 50
n_batches = int(np.ceil(m / batch_size))

checkpoint_path = "/tmp/mnist_relu_norm4096.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./mnist_relu_norm4096"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Previous train was interrupted, will start at epoch {}".format(start_epoch))
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)

    for epoch in range(start_epoch, n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation set accuracy: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 5
                if epochs_without_progress > max_epochs_without_progress:
                    print("termination")
                    break

Epoch: 0 	Validation set accuracy: 91.440% 	Loss: 0.32339
Epoch: 5 	Validation set accuracy: 95.520% 	Loss: 0.16703
Epoch: 10 	Validation set accuracy: 96.900% 	Loss: 0.11745
Epoch: 15 	Validation set accuracy: 97.500% 	Loss: 0.09419
Epoch: 20 	Validation set accuracy: 97.800% 	Loss: 0.08185
Epoch: 25 	Validation set accuracy: 97.920% 	Loss: 0.07378
Epoch: 30 	Validation set accuracy: 98.100% 	Loss: 0.07026
Epoch: 35 	Validation set accuracy: 98.140% 	Loss: 0.06700
Epoch: 40 	Validation set accuracy: 98.180% 	Loss: 0.06405
Epoch: 45 	Validation set accuracy: 98.280% 	Loss: 0.06229
Epoch: 50 	Validation set accuracy: 98.340% 	Loss: 0.06153
Epoch: 55 	Validation set accuracy: 98.240% 	Loss: 0.06114
Epoch: 60 	Validation set accuracy: 98.280% 	Loss: 0.06186
Epoch: 65 	Validation set accuracy: 98.360% 	Loss: 0.06145
Epoch: 70 	Validation set accuracy: 98.300% 	Loss: 0.06154
Epoch: 75 	Validation set accuracy: 98.380% 	Loss: 0.06072
Epoch: 80 	Validation set accuracy: 98.420% 	Loss: 0.06114

## Dropout NN 2 Layers(ReLU) with max-norm constraint 8192 units

In [7]:
import os
n_inputs = 28*28  
n_hidden1 = 8192
n_hidden2 = 8192
n_outputs = 10

reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')

# Implement dropout
dropout_rate = 0.5  # == 1 - keep_prob
X_drop = tf.layers.dropout(X, 0.8, training=training)

max_norm_reg = max_norm_regularizer(threshold = 1.0)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, n_hidden1, name="hidden1",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    hidden2 = tf.layers.dense(tf.layers.dropout(hidden1, dropout_rate, training=training), n_hidden2, name="hidden2",
                              activation=tf.nn.relu, kernel_regularizer=max_norm_reg)
    logits = tf.layers.dense(tf.layers.dropout(hidden2, dropout_rate, training=training), n_outputs, name="outputs")
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

from datetime import datetime

def log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)
logdir = log_dir("mnist_dnn")
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
m, n = X_train.shape
n_epochs = 10001
batch_size = 50
n_batches = int(np.ceil(m / batch_size))

checkpoint_path = "/tmp/mnist_relu_norm8192.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./mnist_relu_norm8192"

best_loss = np.infty
epochs_without_progress = 0
max_epochs_without_progress = 50

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, "rb") as f:
            start_epoch = int(f.read())
        print("Previous train was interrupted, will start at epoch {}".format(start_epoch))
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        sess.run(init)

    for epoch in range(start_epoch, n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val, loss_val, accuracy_summary_str, loss_summary_str = sess.run([accuracy, loss, accuracy_summary, loss_summary], feed_dict={X: X_valid, y: y_valid})
        file_writer.add_summary(accuracy_summary_str, epoch)
        file_writer.add_summary(loss_summary_str, epoch)
        if epoch % 5 == 0:
            print("Epoch:", epoch,
                  "\tValidation set accuracy: {:.3f}%".format(accuracy_val * 100),
                  "\tLoss: {:.5f}".format(loss_val))
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
            if loss_val < best_loss:
                saver.save(sess, final_model_path)
                best_loss = loss_val
            else:
                epochs_without_progress += 5
                if epochs_without_progress > max_epochs_without_progress:
                    print("termination")
                    break

Epoch: 0 	Validation set accuracy: 91.640% 	Loss: 0.31973
Epoch: 5 	Validation set accuracy: 95.520% 	Loss: 0.16414
Epoch: 10 	Validation set accuracy: 97.040% 	Loss: 0.11532
Epoch: 15 	Validation set accuracy: 97.540% 	Loss: 0.09256
Epoch: 20 	Validation set accuracy: 97.860% 	Loss: 0.08076
Epoch: 25 	Validation set accuracy: 98.000% 	Loss: 0.07212
Epoch: 30 	Validation set accuracy: 98.040% 	Loss: 0.06872
Epoch: 35 	Validation set accuracy: 98.100% 	Loss: 0.06528
Epoch: 40 	Validation set accuracy: 98.280% 	Loss: 0.06236
Epoch: 45 	Validation set accuracy: 98.280% 	Loss: 0.06020
Epoch: 50 	Validation set accuracy: 98.300% 	Loss: 0.05943
Epoch: 55 	Validation set accuracy: 98.300% 	Loss: 0.05909
Epoch: 60 	Validation set accuracy: 98.360% 	Loss: 0.05978
Epoch: 65 	Validation set accuracy: 98.240% 	Loss: 0.05911
Epoch: 70 	Validation set accuracy: 98.360% 	Loss: 0.05897
Epoch: 75 	Validation set accuracy: 98.420% 	Loss: 0.05832
Epoch: 80 	Validation set accuracy: 98.340% 	Loss: 0.05867

In [8]:
tf.reset_default_graph()