In [2]:
# NOTE: this is a custom cell that contains the common imports I personally 
# use these may/may not be necessary for the following examples

# DL framework
import tensorflow as tf

from datetime import datetime

# common packages
import numpy as np
import os # handling file i/o
import sys
import math
import time # timing epochs
import random

# for ordered dict when building layer components
import collections

# plotting pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib import colors # making colors consistent
from mpl_toolkits.axes_grid1 import make_axes_locatable # colorbar helper


# from imageio import imread # read image from disk
# + data augmentation
from scipy import ndimage
from scipy import misc


import pickle # manually saving best params
from sklearn.utils import shuffle # shuffling data batches
from tqdm import tqdm # display training progress bar

# const
SEED = 42

# Helper to make the output consistent
def reset_graph(seed=SEED):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# helper to create dirs if they don't already exist
def maybe_create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print("{} created".format(dir_path))
    else:
        print("{} already exists".format(dir_path))
    
def make_standard_dirs(saver=True, best_params=True, tf_logs=True):
    # `saver/` will hold tf saver files
    maybe_create_dir("saver")
    # `best_params/` will hold a serialized version of the best params
    # I like to keep this as a backup in case I run into issues with
    # the saver files
    maybe_create_dir("best_params")
    # `tf_logs/` will hold the logs that will be visable in tensorboard
    maybe_create_dir("tf_logs")

    
# set tf log level to supress messages, unless an error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Important Version information
print("Python: {}".format(sys.version_info[:]))
print('TensorFlow: {}'.format(tf.__version__))

# Check if using GPU
if not tf.test.gpu_device_name():
    print('No GPU')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
    
reset_graph()

Python: (3, 5, 4, 'final', 0)
TensorFlow: 1.4.0
Default GPU Device: /device:GPU:0


In [3]:
make_standard_dirs()

saver already exists
best_params already exists
tf_logs already exists


In [4]:
### Clean all logs
## WARNING! You likely don't want to do this (but if you do, this is a convenient call)
# !rm -r -f ./tf_logs/*

In [5]:
# these two functions (get_model_params and restore_model_params) are 
# ad[a|o]pted from; 
# https://github.com/ageron/handson-ml/blob/master/11_deep_learning.ipynb
def get_model_params():
    global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    return {global_vars.op.name: value for global_vars, value in 
            zip(global_vars, tf.get_default_session().run(global_vars))}

def restore_model_params(model_params, g, sess):
    gvar_names = list(model_params.keys())
    assign_ops = {gvar_name: g.get_operation_by_name(gvar_name + "/Assign")
                  for gvar_name in gvar_names}
    init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
    feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
    sess.run(assign_ops, feed_dict=feed_dict)

# these two functions are used to manually save the best
# model params to disk
def save_obj(obj, name):
    with open('best_params/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open('best_params/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [6]:
ROOT_DATA = "../../ROOT_DATA/"
DATA_DIR = "mnist_data"
BEST_PARAMS_PATH = "best_params"

MNIST_TRAINING_PATH = os.path.join(ROOT_DATA, DATA_DIR)
# ensure we have the correct directory
for _, _, files in os.walk(MNIST_TRAINING_PATH):
    files = sorted(files)
    for filename in files:
        print(filename)

t10k-images-idx3-ubyte.gz
t10k-labels-idx1-ubyte.gz
train-images-idx3-ubyte.gz
train-labels-idx1-ubyte.gz


In [7]:
from tensorflow.examples.tutorials.mnist import input_data
MNIST = input_data.read_data_sets(MNIST_TRAINING_PATH, one_hot=True)

Extracting ../../ROOT_DATA/mnist_data/train-images-idx3-ubyte.gz
Extracting ../../ROOT_DATA/mnist_data/train-labels-idx1-ubyte.gz
Extracting ../../ROOT_DATA/mnist_data/t10k-images-idx3-ubyte.gz
Extracting ../../ROOT_DATA/mnist_data/t10k-labels-idx1-ubyte.gz


In [8]:
def create_hyper_params():
    hypms = {}
    hypms['n_epochs'] = 60
    hypms['batch_size'] = 512
    hypms['init_lr'] = 1e-3
    hypms['raw_input_size'] = 28*28
    hypms['in_dim'] = [28,28,1] # width,height,channels
    hypms['fc_dropout'] = 0.5
    hypms['n_classes'] = 10
    hypms['filter_dim'] = 3 # default will be 3x3
    return hypms
hypms = create_hyper_params()

In [9]:
# this isn't my favorite way to design the architecture, but defining
# the architecture up front like this will save us from manually
# inputing layer sizes later on.
def create_layer_def():
    global hypms
    lyrdef = {}
    lyrdef['conv_01_depth'] = 64
    lyrdef['conv_02_depth'] = 128
    lyrdef['conv_03_depth'] = 256
    lyrdef['fc_01'] = 1024
    lyrdef['fc_02'] = 56
    lyrdef['output'] = hypms['n_classes']
    return lyrdef
lyrdef = create_layer_def()

In [10]:
def build_graph():
    global hypms # hyper params
    global lyrdef # layer definition params
    g = tf.Graph()
    with g.as_default():
        with tf.name_scope("architecture"):
            with tf.name_scope("model"):
                # inputs
                with tf.name_scope("inputs"):
                    X_raw = tf.placeholder(tf.float32, shape=[None, hypms['raw_input_size']])
                    X = tf.reshape(X_raw, 
                                   shape=[-1, hypms['in_dim'][0],hypms['in_dim'][1],hypms['in_dim'][2]], 
                                   name="data") # Input
                    y = tf.placeholder(tf.float32, shape=[None, hypms['n_classes']], name="labels") # Target

                with tf.name_scope("conv_layers"):
                    with tf.name_scope("01"):
                        with tf.name_scope("conv"):
                            w_01 = tf.Variable(tf.random_normal([hypms['filter_dim'],
                                                                 hypms['filter_dim'],
                                                                 hypms['in_dim'][2],
                                                                 lyrdef['conv_01_depth']]))
                            b_01 = tf.Variable(tf.random_normal([lyrdef['conv_01_depth']]))
                            conv_01 = tf.nn.conv2d(X, w_01, strides=[1,1,1,1], 
                                                   padding='SAME', name="convolution")
                            conv_01 = tf.nn.bias_add(conv_01, b_01, name="add_bias")
                            conv_01_out = tf.nn.elu(conv_01, name="activation")
                        with tf.name_scope("pool"):
                            l_01_out = tf.nn.max_pool(conv_01_out, 
                                                      ksize=[1,2,2,1], strides=[1,2,2,1], 
                                                      padding='SAME', name="max_pool")

                    with tf.name_scope("02"):
                        with tf.name_scope("conv"):
                            w_02 = tf.Variable(tf.random_normal([hypms['filter_dim'],
                                                                 hypms['filter_dim'],
                                                                 lyrdef['conv_01_depth'],
                                                                 lyrdef['conv_02_depth']]))
                            b_02 = tf.Variable(tf.random_normal([lyrdef['conv_02_depth']]))
                            conv_02 = tf.nn.conv2d(l_01_out, w_02, strides=[1,1,1,1],
                                                   padding='SAME', name="convolution")
                            conv_02 = tf.nn.bias_add(conv_02, b_02, name="add_bias")
                            conv_02_out = tf.nn.elu(conv_02, name="activation")
                        with tf.name_scope("pool"):
                            l_02_out = tf.nn.max_pool(conv_02_out, 
                                                      ksize=[1,2,2,1], strides=[1,2,2,1], 
                                                      padding='SAME', name="max_pool")

                    with tf.name_scope("03"):
                        with tf.name_scope("conv"):
                            w_03 = tf.Variable(tf.random_normal([hypms['filter_dim'],
                                                                 hypms['filter_dim'],
                                                                 lyrdef['conv_02_depth'],
                                                                 lyrdef['conv_03_depth']]))
                            b_03 = tf.Variable(tf.random_normal([lyrdef['conv_03_depth']]))
                            conv_03 = tf.nn.conv2d(l_02_out, w_03, strides=[1,1,1,1],
                                                   padding='SAME', name="convolution")
                            conv_03 = tf.nn.bias_add(conv_03, b_03, name="add_bias")
                            conv_03_out = tf.nn.elu(conv_03, name="activation")
                        with tf.name_scope("pool"):
                            l_03_out = tf.nn.max_pool(conv_03_out, 
                                                      ksize=[1,2,2,1], strides=[1,2,2,1], 
                                                      padding='SAME', name="max_pool")


                with tf.name_scope("fully_connected"):
                    # reshape to flatten
                    last_shape = int(np.prod(l_03_out.get_shape()[1:]))
                    flattened = tf.reshape(l_03_out, shape=[-1, last_shape])

                    with tf.name_scope("fc1"):
                        w_fc1 = tf.Variable(tf.random_normal([last_shape, lyrdef['fc_01']]))
                        b_fc1 = tf.Variable(tf.random_normal([lyrdef['fc_01']]))
                        fc1 = tf.matmul(flattened, w_fc1, name="mult_weights")
                        fc1 = tf.add(fc1, b_fc1, name="add_bias")
                        fc1 = tf.nn.elu(fc1, name="activation")

                    with tf.name_scope("fc2"):
                        w_fc2 = tf.Variable(tf.random_normal([lyrdef['fc_01'], lyrdef['fc_02']]))
                        b_fc2 = tf.Variable(tf.random_normal([lyrdef['fc_02']]))
                        fc2 = tf.matmul(fc1, w_fc2, name="mult_weights")
                        fc2 = tf.add(fc2, b_fc2, name="add_bias")
                        fc2 = tf.nn.elu(fc2, name="activation")

                    with tf.name_scope("output"):
                        w_out = tf.Variable(tf.random_normal([lyrdef['fc_02'], lyrdef['output']]))
                        b_out = tf.Variable(tf.random_normal([lyrdef['output']]))
                        out = tf.matmul(fc2, w_out, name="mult_weights")
                        logits = tf.add(out, b_out, name="logits")
                        Y_proba = tf.nn.softmax(logits, name="Y_proba")

            with tf.name_scope("train"):
                xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
                batch_loss = tf.reduce_mean(xentropy)
                optimizer = tf.train.AdamOptimizer(learning_rate=hypms['init_lr'])
                training_op = optimizer.minimize(batch_loss)

            with tf.name_scope("save_session"):
                init_global = tf.global_variables_initializer()
                init_local = tf.local_variables_initializer()
                saver = tf.train.Saver()

            # Ops: training metrics
            with tf.name_scope("metrics"):
                # ================================== performance
                with tf.name_scope("common"):
                    y_true_cls = tf.argmax(y,1)
                    y_pred_cls = tf.argmax(Y_proba,1)
                    correct_prediction = tf.equal(y_pred_cls, y_true_cls, name="correct_predictions")
                    batch_acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
                with tf.name_scope("train_metrics") as scope:
                    train_auc, train_auc_update = tf.metrics.auc(labels=y, predictions=Y_proba)
                    train_acc, train_acc_update = tf.metrics.accuracy(labels=y_true_cls, predictions=y_pred_cls)
                    train_acc_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                    train_met_reset_op = tf.variables_initializer(train_acc_vars, name="train_met_reset_op")
                with tf.name_scope("val_metrics") as scope:
                    val_auc, val_auc_update = tf.metrics.auc(labels=y, predictions=Y_proba)
                    val_acc, val_acc_update = tf.metrics.accuracy(labels=y_true_cls, predictions=y_pred_cls)
                    val_acc_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                    val_met_reset_op = tf.variables_initializer(val_acc_vars, name="val_met_reset_op")
                with tf.name_scope("test_metrics") as scope:
                    test_auc, test_auc_update = tf.metrics.auc(labels=y, predictions=Y_proba)
                    test_acc, test_acc_update = tf.metrics.accuracy(labels=y_true_cls, predictions=y_pred_cls)
                    test_acc_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                    test_acc_reset_op = tf.variables_initializer(test_acc_vars, name="test_met_reset_op")

                # =============================================== loss 
                with tf.name_scope("train_loss_eval") as scope:
                    train_mean_loss, train_mean_loss_update = tf.metrics.mean(batch_loss)
                    train_loss_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                    train_loss_reset_op = tf.variables_initializer(train_loss_vars, name="train_loss_reset_op")
                with tf.name_scope("val_loss_eval") as scope:
                    val_mean_loss, val_mean_loss_update = tf.metrics.mean(batch_loss)
                    val_loss_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                    val_loss_reset_op = tf.variables_initializer(val_loss_vars, name="val_loss_reset_op")
                with tf.name_scope("test_loss_eval")as scope:
                    test_mean_loss, test_mean_loss_update = tf.metrics.mean(batch_loss)
                    test_loss_vars = tf.contrib.framework.get_variables(scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
                    test_loss_reset_op = tf.variables_initializer(test_loss_vars, name="test_loss_rest_op")

            # --- create collections
            for node in (saver, init_global, init_local):
                g.add_to_collection("save_init", node)
            for node in (X_raw, y, training_op):
                g.add_to_collection("main_ops", node)
            for node in (Y_proba, y_true_cls, y_pred_cls, correct_prediction):
                g.add_to_collection("preds", node)
            for node in (train_auc, train_auc_update, train_acc, train_acc_update, train_met_reset_op):
                g.add_to_collection("train_metrics", node)
            for node in (val_auc, val_auc_update, val_acc, val_acc_update, val_met_reset_op):
                g.add_to_collection("val_metrics", node)
            for node in (test_auc, test_auc_update, test_acc, test_acc_update, test_acc_reset_op):
                g.add_to_collection("test_metrics", node)
            for node in (train_mean_loss, train_mean_loss_update, train_loss_reset_op):
                g.add_to_collection("train_loss", node)
            for node in (val_mean_loss, val_mean_loss_update, val_loss_reset_op):
                g.add_to_collection("val_loss", node)
            for node in (test_mean_loss, test_mean_loss_update, test_loss_reset_op):
                g.add_to_collection("test_loss", node)
            g.add_to_collection("logits", logits)

            # ===================================== tensorboard
            with tf.name_scope("tensorboard_writer") as scope:
                epoch_train_loss_scalar = tf.summary.scalar('train_epoch_loss', train_mean_loss)
                epoch_train_acc_scalar = tf.summary.scalar('train_epoch_acc', train_acc)
                epoch_train_auc_scalar = tf.summary.scalar('train_epoch_auc', train_auc)
                epoch_train_write_op = tf.summary.merge([epoch_train_loss_scalar, epoch_train_acc_scalar, epoch_train_auc_scalar], name="epoch_train_write_op")

                # ===== epoch, validation
                epoch_validation_loss_scalar = tf.summary.scalar('validation_epoch_loss', val_mean_loss)
                epoch_validation_acc_scalar = tf.summary.scalar('validation_epoch_acc', val_acc)
                epoch_validation_auc_scalar = tf.summary.scalar('validation_epoch_auc', val_auc)
                epoch_validation_write_op = tf.summary.merge([epoch_validation_loss_scalar, epoch_validation_acc_scalar, epoch_validation_auc_scalar], name="epoch_validation_write_op")

            for node in (epoch_train_write_op, epoch_validation_write_op):
                g.add_to_collection("tensorboard", node)

    return g

In [11]:
def train_graph(g, hypms):
    global BEST_PARAMS_PATH
    saver, init_global, init_local = g.get_collection("save_init")
    X_raw, y, training_op = g.get_collection("main_ops")
    preds, y_true_cls, y_pred_cls, _ = g.get_collection("preds")
    train_auc, train_auc_update, train_acc, train_acc_update, train_met_reset_op = g.get_collection("train_metrics")
    val_auc, val_auc_update, val_acc, val_acc_update, val_met_reset_op = g.get_collection("val_metrics")
    train_mean_loss, train_mean_loss_update, train_loss_reset_op = g.get_collection("train_loss")
    val_mean_loss, val_mean_loss_update, val_loss_reset_op = g.get_collection("val_loss")
    epoch_train_write_op, epoch_validation_write_op = g.get_collection("tensorboard")
    train_writer = tf.summary.FileWriter(os.path.join("tf_logs","train"))
    val_writer = tf.summary.FileWriter(os.path.join("tf_logs","validation"))
    best_val_loss = np.inf
    
    with tf.Session(graph=g) as sess:
        sess.run([init_global, init_local])
        train_writer.add_summary
        for e in tqdm(range(1,hypms['n_epochs']+1)):
            sess.run([val_met_reset_op,val_loss_reset_op,train_met_reset_op,train_loss_reset_op])
            n_batches = int(MNIST.train.num_examples/hypms['batch_size'])
            for i in range(1,n_batches+1):
                data, target = MNIST.train.next_batch(hypms['batch_size'])
                sess.run([training_op, train_auc_update, train_acc_update, train_mean_loss_update], 
                         feed_dict={X_raw:data, y:target})

            # write average for epoch + graph information
            # run options is not working as expected -- my directory link must be broken
            #run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            summary = sess.run(epoch_train_write_op, run_metadata=run_metadata)
            train_writer.add_run_metadata(run_metadata, str(e)) # not sure on this..
            train_writer.add_graph(g)
            train_writer.add_summary(summary, e)
            train_writer.flush()

            # run validation
            n_batches = int(MNIST.validation.num_examples/hypms['batch_size'])
            for i in range(1,n_batches+1):
                Xb, yb = MNIST.validation.next_batch(hypms['batch_size'])
                sess.run([val_auc_update, val_acc_update, val_mean_loss_update], 
                                  feed_dict={X_raw:Xb, y:yb})

            # check for (and save) best validation params here
            cur_loss, cur_acc = sess.run([val_mean_loss, val_acc])
            if cur_loss < best_val_loss:
                best_val_loss = cur_loss
                best_params = get_model_params()
                save_obj(best_params, BEST_PARAMS_PATH)
                print("best params saved: val acc: {:.3f}% val loss: {:.4f}".format(cur_acc*100, cur_loss))
            
            summary = sess.run(epoch_validation_write_op) 
            val_writer.add_summary(summary, e)
            val_writer.flush()

        # close writers
        train_writer.close()
        val_writer.close()
    return sess

In [12]:
reset_graph()
hypms = create_hyper_params()
g = build_graph()
sess = train_graph(g, hypms)

  2%|▏         | 1/60 [00:05<05:15,  5.34s/it]

best params saved: val acc: 88.954% val loss: 93217.7578


  3%|▎         | 2/60 [00:09<04:41,  4.85s/it]

best params saved: val acc: 91.688% val loss: 68134.9531


  5%|▌         | 3/60 [00:14<04:27,  4.70s/it]

best params saved: val acc: 94.293% val loss: 47134.1367


  7%|▋         | 4/60 [00:18<04:19,  4.63s/it]

best params saved: val acc: 94.987% val loss: 35515.1172


  8%|▊         | 5/60 [00:22<04:09,  4.54s/it]

best params saved: val acc: 96.332% val loss: 24741.5586


 15%|█▌        | 9/60 [00:39<03:45,  4.43s/it]

best params saved: val acc: 96.441% val loss: 23092.1953


 20%|██        | 12/60 [00:52<03:29,  4.37s/it]

best params saved: val acc: 97.092% val loss: 19016.3672


 22%|██▏       | 13/60 [00:56<03:24,  4.36s/it]

best params saved: val acc: 97.374% val loss: 18676.9043


 23%|██▎       | 14/60 [01:01<03:20,  4.37s/it]

best params saved: val acc: 97.157% val loss: 18137.5508


 27%|██▋       | 16/60 [01:09<03:11,  4.35s/it]

best params saved: val acc: 97.439% val loss: 17526.5527


 30%|███       | 18/60 [01:18<03:02,  4.35s/it]

best params saved: val acc: 97.352% val loss: 16235.7363


 33%|███▎      | 20/60 [01:26<02:53,  4.34s/it]

best params saved: val acc: 97.765% val loss: 15813.6162


 37%|███▋      | 22/60 [01:35<02:44,  4.33s/it]

best params saved: val acc: 97.765% val loss: 15383.5576


 48%|████▊     | 29/60 [02:04<02:12,  4.28s/it]

best params saved: val acc: 98.090% val loss: 14502.6924


 50%|█████     | 30/60 [02:08<02:08,  4.27s/it]

best params saved: val acc: 98.155% val loss: 13306.4648


 53%|█████▎    | 32/60 [02:16<01:59,  4.27s/it]

best params saved: val acc: 98.025% val loss: 12348.7129


 60%|██████    | 36/60 [02:33<01:42,  4.25s/it]

best params saved: val acc: 98.177% val loss: 12337.7246


 75%|███████▌  | 45/60 [03:09<01:03,  4.22s/it]

best params saved: val acc: 98.351% val loss: 10849.0820


100%|██████████| 60/60 [04:11<00:00,  4.19s/it]

best params saved: val acc: 98.698% val loss: 10149.4072





In [16]:
reset_graph()
hypms = create_hyper_params()
g_eval = build_graph()
best_params = load_obj(BEST_PARAMS_PATH)
with tf.Session(graph=g_eval) as sess:
    saver, init_global, init_local = g_eval.get_collection("save_init")
    X_raw, y, training_op = g_eval.get_collection("main_ops")
    Y_proba, y_true_cls, y_pred_cls, _ = g_eval.get_collection("preds")
    test_auc, test_auc_update, test_acc, test_acc_update, test_acc_reset_op = g_eval.get_collection("test_metrics")
    test_mean_loss, test_mean_loss_update, test_loss_reset_op = g_eval.get_collection("test_loss")
    
    restore_model_params(model_params=best_params, g=g_eval, sess=sess)
    sess.run([test_acc_reset_op, test_loss_reset_op])

    # run test
    n_batches = int(MNIST.test.num_examples/hypms['batch_size'])
    for i in range(1,n_batches+1):
        Xb, yb = MNIST.test.next_batch(hypms['batch_size'])
        sess.run([test_auc_update, test_acc_update, test_mean_loss_update], 
                     feed_dict={X_raw:Xb, y:yb})      
            
    # print
    final_test_acc, final_test_loss, final_test_auc = sess.run([test_acc, test_mean_loss, test_auc])
    print("test auc: {:.3f}% acc: {:.3f}% loss: {:.5f}".format(final_test_auc*100, 
                                                              final_test_acc*100,
                                                              final_test_loss))

test auc: 99.172% acc: 98.509% loss: 10754.31543


In [14]:
# TODO: Add dropout