# Table of contents

 - [Data Preparation](#Data-Preparation)
 - [Simple Convolution Network](#Simple-Convolution-Network)
 - [Advanced Convolution network](#Advanced-Convolution-network)
 - [Advanced Convolution network with Tensorboard (coming soon)](#Advanced-Convolution-network)

In [1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
import time

## Data Preparation

### Loading data

In [2]:
data_root = '.\Data\\notmnist\\' # Change me to store data elsewhere
pickle_file = 'notMNIST_valid_60k.pickle'

with open(data_root+pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

Training set (469090, 28, 28) (469090,)
Validation set (60000, 28, 28) (60000,)
Test set (18720, 28, 28) (18720,)


### Reformating Data

In [3]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
    dataset = dataset.reshape(
        (-1, image_size, image_size, num_channels)).astype(np.float32)
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (469090, 28, 28, 1) (469090, 10)
Validation set (60000, 28, 28, 1) (60000, 10)
Test set (18720, 28, 28, 1) (18720, 10)


In [4]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

## Simple Convolution Network

In [5]:
def W_var(shape,stddev,name):
    """Create a weight variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=stddev, name=name)
    return tf.Variable(initial)

In [6]:
def b_var(shape, value, name):
    """Create a bias variable with appropriate initialization."""
    initial = tf.constant(value, shape=shape, name=name)
    return tf.Variable(initial)

In [7]:
def conv2d(x, W, b, strides=1):
    """" Create Conv2D with bias and relu activation"""
    y = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') + b
    return tf.nn.relu(y)

In [8]:
def maxpool2d(x, k=2):
    """" Create MaxPool2D """
    return tf.nn.max_pool(x, ksize=[1, k, k, 1],strides=[1, k, k, 1], padding='SAME')

In [9]:
def loss_function(labels, output_layer, coef_reg, reg='l2', coef_reg2=1e-3):
    """ Loss function with softmax implementing L1,L2 and L1+L2 regularization"""
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=output_layer))
    if (reg == 'l1'):
        regularizer = tf.contrib.layers.l1_regularizer(scale=coef_reg)
    elif (reg == 'l2'):
        regularizer = tf.contrib.layers.l2_regularizer(scale=coef_reg)
    else:
        l1 = regularizer = tf.contrib.layers.l1_regularizer(scale=coef_reg)
        l2 = regularizer = tf.contrib.layers.l2_regularizer(scale=coef_reg2)
        regularizer = tf.contrib.layers.sum_regularizer([l1,l2])
        
    reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    reg_term = tf.contrib.layers.apply_regularization(regularizer, reg_variables)
    reg_loss = cross_entropy + reg_term
    return reg_loss

In [10]:
def batch_accuracy(tf_data, data, label, batch_size, prediction):
    """ Creation of a batch version to calculate accuracy for huge validation and testing sets"""
    correct = 0
    size = data.shape[0]
    # If size / batch_size not an integer than the rest of the Euclidean division 
    # will be missing --> to avoid that take care to take a batch_size that divide the
    # size of the subset of the data that is evaluated
    for step in range(size // batch_size): 
        offset = step * batch_size
        batch_test_data = data[offset:(offset + batch_size), :, :, :]
        batch_test_labels = label[offset:(offset + batch_size),:]
        feed_dict = { tf_data: batch_test_data }
        (predictions,) = session.run([prediction],feed_dict=feed_dict)
        correct += np.sum(np.argmax(predictions, 1) == np.argmax(batch_test_labels, 1))
    accuracy = (100.0 * (correct / float(size)))
    return accuracy

In [25]:
# Data set parameters
image_size = 28
num_labels = 10
num_channels = 1 # grayscale
size_train = train_labels.shape[0]

# Model parameters
num_conv_layer = 2
fully_connected = 1024

stdev = [0.1,0.1,np.sqrt(2.0/fully_connected),0.1]
bias = [0.1,0.1,0.1,0.1]

patch_size = [5,5]
depth = [32,64]

batch_size = 250
batch_size_eval = 250

coef_reg = 1e-3
keep_prob = 0.5

graph = tf.Graph()
with graph.as_default():
    
    # Placeholders
    # Define placeholder for valid and test to simplify the batch evaluation because of GPU memory problems
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels),
                                      name='train_dataset')
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels),
                                     name='train_label')

    tf_valid_dataset = tf.placeholder(tf.float32, shape=(batch_size_eval, image_size, image_size, num_channels),
                                      name='valid_dataset')
    tf_test_dataset = tf.placeholder(tf.float32, shape=(batch_size_eval, image_size, image_size, num_channels),
                                     name='test_dataset')
    
    # Weights
    W_conv1 = W_var(shape = [patch_size[0], patch_size[0], num_channels, depth[0]],stddev= stdev[0], name = 'W_conv1' )                      
    W_conv2 = W_var(shape = [patch_size[1], patch_size[1], depth[0], depth[1]],stddev= stdev[1], name = 'W_conv2' )
    W_fc1 = W_var(shape = [(image_size // 2**num_conv_layer) * (image_size // 2**num_conv_layer) * depth[1],fully_connected],stddev= stdev[2], name = 'W_fc1')
    W_fc2 = W_var(shape = [fully_connected, num_labels],stddev= stdev[3], name = 'W_fc2' )
    
    # Add the 2 fully connected layers to regularired list of layers
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, W_fc1)
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, W_fc2) 
    
    # Biases                       
    b_conv1 = b_var(shape=[depth[0]], value = bias[0], name= 'b_conv1')                       
    b_conv2 = b_var(shape=[depth[1]], value = bias[1], name= 'b_conv2')
    b_fc1 = b_var(shape=[fully_connected], value = bias[2], name= 'b_fc1')
    b_fc2 = b_var(shape=[num_labels], value = bias[3], name= 'b_fc2')
        
    weights = {   
    'cv1' : W_conv1,
    'cv2' : W_conv2,        
    'fc1' : W_fc1,
    'fc2' : W_fc2
    }
                             
    biases = {   
    'cv1' : b_conv1,
    'cv2' : b_conv2,        
    'fc1' : b_fc1,
    'fc2' : b_fc2
    }    
    
    def conv_net(x_in, weights, biases, dropout):
        
        # Convolution Layer 1
        conv1 = conv2d(x_in, weights['cv1'], biases['cv1'])
        # Max Pooling 1 (down-sampling by k)
        pool1 = maxpool2d(conv1, k=2)
                           
        # Convolution Layer
        conv2 = conv2d(pool1, weights['cv2'], biases['cv2'])
        # Max Pooling (down-sampling by k)
        pool2 = maxpool2d(conv2, k=2)
        
        # Reshape for fully connected layer
        shape = pool2.get_shape().as_list()
        reshape = tf.reshape(pool2, [-1, shape[1] * shape[2] * shape[3]])
        
        # Fully connected layer 1
        fc1 = tf.nn.relu(tf.matmul(reshape,  weights['fc1']) + biases['fc1'])
        fc1 = tf.nn.dropout(fc1, dropout)
        
        # Fully connected layer 2 - Output
        y_out = tf.matmul(fc1, weights['fc2']) + biases['fc2']
        return y_out                           
    
    # Prediction
    pred = conv_net(tf_train_dataset, weights, biases, keep_prob)
    
    # Loos
    reg_loss = loss_function(tf_train_labels, pred, coef_reg, reg='l2')
    
    # Optimizer.
    optimizer = tf.train.AdamOptimizer(1e-4).minimize(reg_loss)
    
    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(pred)
    valid_prediction = tf.nn.softmax(conv_net(tf_valid_dataset, weights, biases, dropout = 1))
    test_prediction = tf.nn.softmax(conv_net(tf_test_dataset, weights, biases, dropout = 1))

In [26]:
n_epoch = 12
n_step_one_epoch = int(size_train / batch_size)
num_steps = int(n_epoch * n_step_one_epoch)
t1 = time.time()   

num_epoch = 1
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    for step in range(num_steps):
        if ( step % n_step_one_epoch == 0 ):
            print('\n------Epoch n°%d ------\n' % num_epoch)
            num_epoch +=1
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data,  tf_train_labels : batch_labels} #
        _ = session.run([optimizer], feed_dict=feed_dict)
        if (step % 500 == 0):
            l, predictions = session.run([reg_loss, train_prediction], feed_dict=feed_dict)
            print('Minibatch loss at step %d: %f' % (step, l))
            correct_prediction = tf.equal(tf.argmax(predictions,1), tf.argmax(batch_labels,1))
            accuracy2 = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            print('Minibatch accuracy: %.2f%%' % (100*accuracy2.eval()))
            # Update to run validation with batch
            print('Validation accuracy: %.2f%%' % (batch_accuracy(tf_data = tf_valid_dataset, data = valid_dataset, 
                                                                    label = valid_labels,
                                                                    batch_size = batch_size_eval, prediction = valid_prediction)))
            print()
    print('Test accuracy: %.1f%%' % (batch_accuracy(tf_data = tf_test_dataset, data = test_dataset, 
                                                                    label = test_labels,
                                                                    batch_size = batch_size_eval, prediction = test_prediction)))
t2 = time.time()
print("Time: %0.2fs" % (t2 - t1))

Initialized

------Epoch n°1 ------

Minibatch loss at step 0: 7.048244
Minibatch accuracy: 12.00%
Validation accuracy: 12.16%

Minibatch loss at step 500: 2.541504
Minibatch accuracy: 83.20%
Validation accuracy: 86.31%

Minibatch loss at step 1000: 2.145658
Minibatch accuracy: 87.60%
Validation accuracy: 88.15%

Minibatch loss at step 1500: 1.831149
Minibatch accuracy: 89.60%
Validation accuracy: 89.10%


------Epoch n°2 ------

Minibatch loss at step 2000: 1.584659
Minibatch accuracy: 89.20%
Validation accuracy: 89.68%

Minibatch loss at step 2500: 1.381074
Minibatch accuracy: 90.00%
Validation accuracy: 90.07%

Minibatch loss at step 3000: 1.155787
Minibatch accuracy: 91.20%
Validation accuracy: 90.46%

Minibatch loss at step 3500: 1.045421
Minibatch accuracy: 90.00%
Validation accuracy: 90.72%


------Epoch n°3 ------

Minibatch loss at step 4000: 0.975914
Minibatch accuracy: 88.00%
Validation accuracy: 90.90%

Minibatch loss at step 4500: 0.706401
Minibatch accuracy: 94.40%
Valida

## Advanced Convolution network