# Table of contents

 - [Load](#Load)
 - [Simple Network](#Simple-Network)
 - [Deep fully connected network](#Deep-fully-connected-network)
 - [Deep fully connected network with Tensorboard (coming soon)](#Simple-Convolutional-Network-with-Tensorboard)

The preparation of the dataset is done in an other [notebook]()

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import os
import time
from six.moves import cPickle as pickle
import tensorflow as tf

## Load

I tried 2 sizes for the validation set:
 - 60 000 that enables me and my GPU with 4Go of ram to deal with the validation set without using batchs
 - 80 000 where I need to put the validation set into a set of batchs for evaluation

In [2]:
# Load your dataset
data_root = '.\Data\\notmnist\\' # Change me to store data elsewhere
pickle_file = 'notMNIST_valid_60k.pickle'
try:
    pickle_file = os.path.join(data_root, pickle_file)
    with open(pickle_file, 'rb') as f:
        save = pickle.load(f)
        train_dataset = save['train_dataset']
        train_labels = save['train_labels']
        valid_dataset = save['valid_dataset']
        valid_labels = save['valid_labels']
        test_dataset = save['test_dataset']
        test_labels = save['test_labels']
        del save  # hint to help gc free up memory
        print('Training set', train_dataset.shape, train_labels.shape)
        print('Validation set', valid_dataset.shape, valid_labels.shape)
        print('Test set', test_dataset.shape, test_labels.shape)
except Exception as e:
    print('Unable to load data from', pickle_file, ':', e)

Training set (469090, 28, 28) (469090,)
Validation set (60000, 28, 28) (60000,)
Test set (18720, 28, 28) (18720,)


In [3]:
# Data set parameters 
image_size = 28  # Pixel width and height.
n_input = image_size*image_size # MNIST data input
num_labels = 10 # Number of labels

In [4]:
def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
    # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels

train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)

print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (469090, 784) (469090, 10)
Validation set (60000, 784) (60000, 10)
Test set (18720, 784) (18720, 10)


In [5]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

## Simple Network

Simple neural network with:
 - 1 hidden layer
 - Relu activation for hidden unit
 - Softmax loss
 - L2 regularization
 - Dropout
 - SGB with fixed learning rate

This implementation is not using correctly placeholders but is straightforward and try to give a good insight of the mathematical operations.

In [6]:
# Parameters of the model
batch_size = 128
size_hidden_node = 1024
learning_rate = 0.5
beta_regul = 1e-3
dropout_rate = 1 # Not necessary here
size_train = train_labels.shape[0]

graph = tf.Graph()
with graph.as_default():

    # Constant for valid and test
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    # Placeholder for training
    tf_train_dataset = tf.placeholder(tf.float32,shape=(batch_size, image_size * image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    
    # Layer 1 - this is our hidden layer
    weights_layer1 = tf.Variable(tf.truncated_normal([image_size * image_size, size_hidden_node]))
    biases_layer1 = tf.Variable(tf.zeros([size_hidden_node]))
    out_layer1 = tf.nn.relu(tf.matmul(tf_train_dataset, weights_layer1) + biases_layer1)
    out_layer1_dropout = tf.nn.dropout(out_layer1, dropout_rate) 
    
    # Layer 2
    weights_layer2 = tf.Variable(tf.truncated_normal([size_hidden_node, num_labels]))
    biases_layer2 = tf.Variable(tf.zeros([num_labels]))
    out_layer2 = tf.matmul(out_layer1_dropout, weights_layer2) + biases_layer2
    
    # Loss function
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=out_layer2))
    regularization = tf.nn.l2_loss(weights_layer1) + tf.nn.l2_loss(weights_layer2)
    reg_loss = loss + beta_regul * regularization

    # Optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(reg_loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(out_layer2)
    
    valid_out_layer1 = tf.nn.relu(tf.matmul(tf_valid_dataset, weights_layer1) + biases_layer1)
    valid_out_layer2 = tf.matmul(valid_out_layer1, weights_layer2) + biases_layer2
    valid_prediction = tf.nn.softmax(valid_out_layer2)
    
    test_out_layer1 = tf.nn.relu(tf.matmul(tf_test_dataset, weights_layer1) + biases_layer1)
    test_out_layer2 = tf.matmul(test_out_layer1, weights_layer2) + biases_layer2
    test_prediction = tf.nn.softmax(test_out_layer2)

In [9]:
n_epoch = 10
n_step_one_epoch = int(size_train / batch_size) # Some images will not be used
num_steps = int(n_epoch * n_step_one_epoch)
t1 = time.time()   

num_epoch = 1
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    for step in range(num_steps):
        if ( step % n_step_one_epoch == 0 ):
            print('\n------Epoch n°%d ------\n' % num_epoch)
            num_epoch +=1
        # Simple offset for batch
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Batch creation
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        # Feed dictionary
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        # Run optimizer, loss and train_prediction with the feeded batch
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 2000 == 0):
            print("Minibatch loss at step %d: %f" % (step, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
            print()
    print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized

------Epoch n°1 ------

Minibatch loss at step 0: 322.194519
Minibatch accuracy: 6.2%
Validation accuracy: 30.3%

Minibatch loss at step 2000: 0.839249
Minibatch accuracy: 83.6%
Validation accuracy: 84.6%


------Epoch n°2 ------

Minibatch loss at step 4000: 0.497808
Minibatch accuracy: 85.2%
Validation accuracy: 87.3%

Minibatch loss at step 6000: 0.551060
Minibatch accuracy: 84.4%
Validation accuracy: 88.1%


------Epoch n°3 ------

Minibatch loss at step 8000: 0.311141
Minibatch accuracy: 89.8%
Validation accuracy: 88.3%

Minibatch loss at step 10000: 0.351788
Minibatch accuracy: 87.5%
Validation accuracy: 88.4%


------Epoch n°4 ------

Minibatch loss at step 12000: 0.327644
Minibatch accuracy: 90.6%
Validation accuracy: 88.4%

Minibatch loss at step 14000: 0.344216
Minibatch accuracy: 88.3%
Validation accuracy: 88.1%


------Epoch n°5 ------

Minibatch loss at step 16000: 0.434843
Minibatch accuracy: 85.2%
Validation accuracy: 88.6%

Minibatch loss at step 18000: 0.3

## Deep fully connected network

Let's build a more advanced fully connected network with re-usable code made of functions

Deep neural network with:
 - 3 hidden layers
 - Relu activation for hidden layers
 - Softmax loss
 - L2 regularization
 - RMSProp with fixed decay learning rate and momentum
 - Dropout

The network is not really tuned, to do it's easier with tensorboard

In [10]:
def weight_variable(shape,stddev,name):
    """Create a weight variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=stddev, name=name)
    return tf.Variable(initial)

In [11]:
def bias_variable(shape, value, name):
    """Create a bias variable with appropriate initialization."""
    initial = tf.constant(value, shape=shape, name=name)
    return tf.Variable(initial)

In [20]:
def nn_layer(input_tensor, input_dim, output_dim, keep_prob, layer_name, weight_stddev, bias_value ,act=tf.nn.relu):
    """Reusable code for making a simple neural net layer. 
    It does a matrix multiply, bias add, and then an activation function.
    """
    weights = weight_variable([input_dim, output_dim], weight_stddev, 'weight_'+layer_name)
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, weights)
    biases = bias_variable([output_dim], bias_value, 'bias_'+layer_name)
    preactivate = tf.matmul(input_tensor, weights) + biases
    activations = act(preactivate, name='activation')
    dropped = tf.nn.dropout(activations, keep_prob)
    return activations

In [13]:
def loss_function(labels, output_layer, coef_reg, reg='l2', coef_reg2=1e-3):
    """ Loss function with softmax implementing L1,L2 and L1+L2 regularization"""
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=output_layer))
    if (reg == 'l1'):
        regularizer = tf.contrib.layers.l1_regularizer(scale=coef_reg)
    elif (reg == 'l2'):
        regularizer = tf.contrib.layers.l2_regularizer(scale=coef_reg)
    else:
        l1 = regularizer = tf.contrib.layers.l1_regularizer(scale=coef_reg)
        l2 = regularizer = tf.contrib.layers.l2_regularizer(scale=coef_reg2)
        regularizer = tf.contrib.layers.sum_regularizer([l1,l2])
        
    reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    reg_term = tf.contrib.layers.apply_regularization(regularizer, reg_variables)
    reg_loss = cross_entropy + reg_term
    return reg_loss

In [14]:
def feed_dict(dataset, dropout_rate=0.5):
    """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
    if (dataset == 'train'):
        offset = (step * batch_size) % (size_train - batch_size)
        data_batch = train_dataset[offset:(offset + batch_size), :]
        label_batch = train_labels[offset:(offset + batch_size), :]
        keep_prob = dropout_rate
    elif (dataset == 'validation'):
        data_batch = valid_dataset
        label_batch = valid_labels
        keep_prob = 1
    elif (dataset == 'test'):
        data_batch = test_dataset
        label_batch = test_labels
        keep_prob = 1
    return {x: data_batch, y_: label_batch, k: keep_prob}

In [15]:
# Data set parameters 
image_size = 28  # Pixel width and height.
n_input = image_size*image_size # MNIST data input
num_labels = 10 # Number of labels
size_train = train_labels.shape[0]

# Parameters of the model
batch_size = 300
size_hidden_layer = [1024,300,60]
std_weights = [np.sqrt(2.0/size_hidden_layer[0]),np.sqrt(2.0/size_hidden_layer[1]),
               np.sqrt(2.0/size_hidden_layer[2]),0.01]
bias_value = 0.1
learning_rate = 1e-4
regul = 8e-4
decay_rate = 0.95
dropout_rate = 0.5
momentum = 0.5

graph = tf.Graph()
with graph.as_default():
    
    # Count the number of steps taken
    global_step = tf.Variable(0) 
    
    # Placeholder for train, valid and test and the dropout rate
    x = tf.placeholder(tf.float32, shape=[None, image_size * image_size], name='x-input')
    y_ = tf.placeholder(tf.float32, shape=[None, num_labels], name='y-input')
    k = tf.placeholder(tf.float32)
    
    # Define the model
    layer_1 = nn_layer(x, n_input, size_hidden_layer[0], k, 
                       'layer_1', std_weights[0], bias_value, tf.nn.relu)
    layer_2 = nn_layer(layer_1, size_hidden_layer[0], size_hidden_layer[1], k, 
                       'layer_2', std_weights[1], bias_value, tf.nn.relu)
    layer_3 = nn_layer(layer_2, size_hidden_layer[1], size_hidden_layer[2], k, 
                       'layer_3', std_weights[2], bias_value, tf.nn.relu)
    y = nn_layer(layer_3, size_hidden_layer[2], num_labels, k, 
                 'layer_4', std_weights[3], bias_value, tf.identity)       
    
    # Loss function - Cross entropy + regularization   
    reg_loss = loss_function(y_, y, regul, reg='l2')

    # Optimizer    
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay_rate, momentum=momentum
                                         ).minimize(reg_loss, global_step=global_step)
    
    # Predictions for the train, valid, and test.
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    accuracy = tf.cast(accuracy, tf.float32)

In [19]:
n_epoch = 40
n_step_one_epoch = int(size_train / batch_size)
num_steps = int(n_epoch * n_step_one_epoch)
interval_step_Train = 1000
interval_step_Valid = 1000
num_epoch = 1

try:
    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(num_steps):
            if ( step % n_step_one_epoch == 0 ):
                print('\n------Epoch n°%d ------\n' % num_epoch)
                num_epoch +=1
            _, l, acc = session.run([optimizer, reg_loss, accuracy], feed_dict=feed_dict('train',dropout_rate))
            # Run optimizer, loss and train_prediction with the feeded batch 
            if (step % interval_step_Train == 0):
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.2f%% " % (100*acc))
            if (step % interval_step_Valid == 0):
                print('Validation accuracy: %.2f%%' % (100*accuracy.eval(feed_dict=feed_dict('validation'))))
                print()
        print('Test accuracy: %.2f%%' % (100*accuracy.eval(feed_dict=feed_dict('test'))))
except Exception as e:
    print('An error occur in step', step, ':', e)

Initialized

------Epoch n°1 ------

Minibatch loss at step 0: 3.604520
Minibatch accuracy: 12.00% 
Validation accuracy: 10.38%

Minibatch loss at step 1000: 1.203882
Minibatch accuracy: 88.00% 
Validation accuracy: 86.89%


------Epoch n°2 ------

Minibatch loss at step 2000: 0.792411
Minibatch accuracy: 88.33% 
Validation accuracy: 88.48%

Minibatch loss at step 3000: 0.605802
Minibatch accuracy: 89.67% 
Validation accuracy: 89.12%


------Epoch n°3 ------

Minibatch loss at step 4000: 0.615607
Minibatch accuracy: 89.00% 
Validation accuracy: 89.46%


------Epoch n°4 ------

Minibatch loss at step 5000: 0.499206
Minibatch accuracy: 90.67% 
Validation accuracy: 89.93%

Minibatch loss at step 6000: 0.380488
Minibatch accuracy: 92.33% 
Validation accuracy: 89.91%


------Epoch n°5 ------

Minibatch loss at step 7000: 0.455447
Minibatch accuracy: 91.00% 
Validation accuracy: 90.25%


------Epoch n°6 ------

Minibatch loss at step 8000: 0.403935
Minibatch accuracy: 91.67% 
Validation accu

## Deep fully connected network with Tensorboard