In [2]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

import matplotlib.pyplot as plt

import os
import sys

from IPython.display import display, Image

from scipy import ndimage

import random

In [3]:
%matplotlib inline

## Load dataset

In [29]:
pickle_file = 'SVHN_multi_crop.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

Training set (27401, 32, 32, 1) (27401, 6)
Validation set (6000, 32, 32, 1) (6000, 6)
Test set (13068, 32, 32, 1) (13068, 6)


## Metrics

In [18]:
def accuracy_single(predictions, labels):
    """calculate character-level accuracy"""
    a = np.argmax(predictions, 2).T == labels[:,1:6]
    length = labels[:,0]
    summ = 0.0
    for i in range(len(length)):
        summ += np.sum(a[i,:length[i]])
    return(100 * summ / np.sum(length))

In [19]:
def accuracy_multi(predictions, labels):
    """calculate sequence-level accuracy"""
    count = predictions.shape[1]
    return 100.0 * (count - np.sum([1 for i in np.argmax(predictions, 2).T == labels[:,1:6] if False in i])) / count

## Model

### First Model

In [45]:
batch_size = 64
patch_size = 5
depth1 = 16
depth2 = 32
depth3 = 64
num_hidden = 64

image_size = 32
num_labels = 11 
num_channels = 1 # grayscale

graph = tf.Graph()

with graph.as_default():

    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev = 0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(1.0, shape = shape)
        return tf.Variable(initial)
    
    def conv2d(data, weight):
        # strides [1, x_movement, y_movement, 1]
        return tf.nn.conv2d(data, weight, strides = [1, 1, 1, 1], padding = 'VALID')

    def max_pooling(data):
        return tf.nn.max_pool(data, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
    
    
    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, 6))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    beta_regul = tf.placeholder(tf.float32)
    
    
    # Varibles
    # conv1 layer 1
#     layer1_weights = weight_varible([patch_size, patch_size, num_channels, depth1])
    layer1_weights = tf.get_variable('W1',shape=[patch_size, patch_size, num_channels, depth1],\
                                     initializer=tf.contrib.layers.xavier_initializer_conv2d())
    layer1_biases = bias_variable([depth1]) # 16
    # conv2 layer 2
#     layer2_weights = weight_varible([patch_size, patch_size, depth1, depth2]) # in depth1, out depth2
    layer2_weights = tf.get_variable('W2',shape=[patch_size, patch_size, depth1, depth2],\
                                     initializer=tf.contrib.layers.xavier_initializer_conv2d())
    layer2_biases = bias_variable([depth2]) # 32
    # conv3 layer 3
#     layer3_weights = weight_varible([patch_size, patch_size, depth2, depth3]) # in depth2, out depth3
    layer3_weights = tf.get_variable('W3',shape=[patch_size, patch_size, depth2, depth3],\
                                     initializer=tf.contrib.layers.xavier_initializer())
    layer3_biases = bias_variable([depth3]) # 64
    

    s1_w = tf.get_variable("WS1", shape=[num_hidden, num_labels],\
           initializer=tf.contrib.layers.xavier_initializer())
    s1_b = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='BS1')
    s2_w = tf.get_variable("WS2", shape=[num_hidden, num_labels],\
           initializer=tf.contrib.layers.xavier_initializer())
    s2_b = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='BS2')
    s3_w = tf.get_variable("WS3", shape=[num_hidden, num_labels],\
           initializer=tf.contrib.layers.xavier_initializer())
    s3_b = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='BS3')
    s4_w = tf.get_variable("WS4", shape=[num_hidden, num_labels],\
           initializer=tf.contrib.layers.xavier_initializer())
    s4_b = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='BS4')
    s5_w = tf.get_variable("WS5", shape=[num_hidden, num_labels],\
           initializer=tf.contrib.layers.xavier_initializer())
    s5_b = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='BS5')
    
    sw = [s1_w, s2_w, s3_w, s4_w, s5_w]
    
    global_step = tf.Variable(0)  # count the number of steps taken.
    
    def model(dataset):
        # conv1 layer 1
        hidden1 = tf.nn.relu(conv2d(dataset, layer1_weights) + layer1_biases) # 28 * 28 * depth1
        pool1 = max_pooling(hidden1) # 14 * 14 * depth1
        # conv2 layer 2
        hidden2 = tf.nn.relu(conv2d(pool1, layer2_weights) + layer2_biases) # 10 * 10 * depth2
        pool2 = max_pooling(hidden2) # 5 * 5 * depth2
        # conv3 layer 3
        pool3 = tf.nn.relu(conv2d(pool2, layer3_weights) + layer3_biases) # 1 * 1 * depth3
#         pool3 = max_pooling(hidden3) # 1 * 1 * depth3
        
        shape = pool3.get_shape().as_list()
        pool3_flat = tf.reshape(pool3, [shape[0], shape[1] * shape[2] * shape[3]])
        
        hidden4_drop = tf.nn.dropout(pool3_flat, 0.9375)

        logits_1 = tf.matmul(hidden4_drop, s1_w) + s1_b
        logits_2 = tf.matmul(hidden4_drop, s2_w) + s2_b
        logits_3 = tf.matmul(hidden4_drop, s3_w) + s3_b
        logits_4 = tf.matmul(hidden4_drop, s4_w) + s4_b
        logits_5 = tf.matmul(hidden4_drop, s5_w) + s5_b
        
        return [logits_1, logits_2, logits_3, logits_4, logits_5]
    
# Training computation.
    logits = model(tf_train_dataset)
    [logits_1, logits_2, logits_3, logits_4, logits_5] = model(tf_train_dataset)
    
    loss_per_digit = [tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits[i],
                            tf_train_labels[:,i+1]
                        )) + beta_regul * tf.nn.l2_loss(sw[i])
                       for i in range(5)]
    
    loss = tf.add_n(loss_per_digit)
    
    # Optimizer.
    learning_rate = tf.train.exponential_decay(0.05, global_step, 1000, 0.70, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    def prediction_softmax(dataset, shape):
        prediction = tf.pack([
            tf.nn.softmax(model(dataset, 1.0)[0]),
            tf.nn.softmax(model(dataset, 1.0)[1]),
            tf.nn.softmax(model(dataset, 1.0)[2]),
            tf.nn.softmax(model(dataset, 1.0)[3]),
            tf.nn.softmax(model(dataset, 1.0)[4])])
        return prediction
    
    # Predictions for the training, validation, and test data.
    train_prediction = prediction_softmax(tf_train_dataset, 1.0)
    valid_prediction = prediction_softmax(tf_valid_dataset, 1.0)             
    test_prediction = prediction_softmax(tf_test_dataset, 1.0)


In [46]:
num_steps = 20001

with tf.Session(graph=graph) as session:

    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy_multi(predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy_multi(valid_prediction.eval(), valid_labels))
    print('Test accuracy: %.1f%%' % accuracy_multi(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 16.209852
Minibatch accuracy: 0.0%
Validation accuracy: 1.0%
Minibatch loss at step 500: 5.981581
Minibatch accuracy: 1.6%
Validation accuracy: 3.3%
Minibatch loss at step 1000: 5.648309
Minibatch accuracy: 6.2%
Validation accuracy: 4.0%
Minibatch loss at step 1500: 4.760007
Minibatch accuracy: 9.4%
Validation accuracy: 8.3%
Minibatch loss at step 2000: 4.746642
Minibatch accuracy: 9.4%
Validation accuracy: 12.1%
Minibatch loss at step 2500: 4.075912
Minibatch accuracy: 10.9%
Validation accuracy: 13.9%
Minibatch loss at step 3000: 4.167899
Minibatch accuracy: 18.8%
Validation accuracy: 14.8%
Minibatch loss at step 3500: 4.798819
Minibatch accuracy: 12.5%
Validation accuracy: 15.6%
Minibatch loss at step 4000: 3.603654
Minibatch accuracy: 25.0%
Validation accuracy: 17.2%
Minibatch loss at step 4500: 4.381269
Minibatch accuracy: 14.1%
Validation accuracy: 18.0%
Minibatch loss at step 5000: 3.754035
Minibatch accuracy: 23.4%
Validation accuracy: 19.3%

## Base Model

In [132]:
batch_size = 64
patch_size = 5 # conv kernel size
depth1 = 16
depth2 = 32
depth3 = 64
num_hidden = 1024

image_size = 32
num_labels = 11 
num_channels = 1 # grayscale

graph = tf.Graph()

with graph.as_default():

    def get_weight_variable(name, shape):
        return tf.get_variable(name, shape=shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())

    def bias_variable(shape):
        initial = tf.constant(1.0, shape = shape)
        return tf.Variable(initial)
    
    def conv2d(data, weight):
        # strides [1, x_movement, y_movement, 1]
        return tf.nn.conv2d(data, weight, strides = [1, 1, 1, 1], padding = 'SAME')

    def max_pooling(data):
        return tf.nn.max_pool(data, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
    
    def get_label_wb(weight_name):
        weights = tf.get_variable(weight_name, shape=[num_hidden, num_labels],\
                                  initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
        return weights, biases 
    
    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, 6))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    beta_regul = tf.placeholder(tf.float32)
    
    
    # Varibles
    # conv1 layer 1
    layer1_weights = get_weight_variable('CNN_W1', [patch_size, patch_size, num_channels, depth1])
    layer1_biases = bias_variable([depth1]) # 16
    # conv2 layer 2
    layer2_weights = get_weight_variable('CNN_W2', [patch_size, patch_size, depth1, depth2])
    layer2_biases = bias_variable([depth2]) # 32
    # conv3 layer 3
    layer3_weights = get_weight_variable('CNN_W3', [patch_size, patch_size, depth2, depth3])
    layer3_biases = bias_variable([depth3]) # 64
    
    # func1 layer 4
    layer4_weights = get_weight_variable('FC_W1',[image_size // 8 * image_size // 8 * depth3, num_hidden])
    layer4_biases = bias_varible([num_hidden])
    
    # locally connected layers
    s1_w, s1_b = get_label_wb('S1_W')
    s2_w, s2_b = get_label_wb('S2_W')
    s3_w, s3_b = get_label_wb('S3_W')
    s4_w, s4_b = get_label_wb('S4_W')
    s5_w, s5_b = get_label_wb('S5_W')
    
    sw = [s1_w, s2_w, s3_w, s4_w, s5_w]
    
    global_step = tf.Variable(0)  # count the number of steps taken.
    
    def model(dataset, keep_prob):
        # conv1 layer 1
        hidden1 = tf.nn.relu(conv2d(dataset, layer1_weights) + layer1_biases) # 32 * 32 * depth1
        pool1 = max_pooling(hidden1) # 16 * 16 * depth1
        # conv2 layer 2
        hidden2 = tf.nn.relu(conv2d(pool1, layer2_weights) + layer2_biases) # 16 * 16 * depth2
        pool2 = max_pooling(hidden2) # 8 * 8 * depth2
        # conv3 layer 3
        hidden3 = tf.nn.relu(conv2d(pool2, layer3_weights) + layer3_biases) # 8 * 8 * depth3
        pool3 = max_pooling(hidden3) # 4 * 4 * depth3
        
        shape = pool3.get_shape().as_list()
        pool3_flat = tf.reshape(pool3, [shape[0], shape[1] * shape[2] * shape[3]]) # 1024
        
        # func1 layer 4
        hidden4 = tf.nn.relu(tf.matmul(pool3_flat, layer4_weights) + layer4_biases)
        hidden4_drop = tf.nn.dropout(hidden4, keep_prob)

        logits_1 = tf.matmul(hidden4_drop, s1_w) + s1_b
        logits_2 = tf.matmul(hidden4_drop, s2_w) + s2_b
        logits_3 = tf.matmul(hidden4_drop, s3_w) + s3_b
        logits_4 = tf.matmul(hidden4_drop, s4_w) + s4_b
        logits_5 = tf.matmul(hidden4_drop, s5_w) + s5_b
        
        return [logits_1, logits_2, logits_3, logits_4, logits_5]
    
# Training computation.
    logits = model(tf_train_dataset, 0.9375)
    
    loss_per_digit = [tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits[i],
                            tf_train_labels[:,i+1]
                        )) + beta_regul * tf.nn.l2_loss(sw[i])
                       for i in range(5)]
    
    loss = tf.add_n(loss_per_digit)


    # Optimizer.
    learning_rate = tf.train.exponential_decay(0.001, global_step, 1000, 0.90, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    def prediction_softmax(dataset, shape):
        prediction = tf.pack([
            tf.nn.softmax(model(dataset, 1.0)[0]),
            tf.nn.softmax(model(dataset, 1.0)[1]),
            tf.nn.softmax(model(dataset, 1.0)[2]),
            tf.nn.softmax(model(dataset, 1.0)[3]),
            tf.nn.softmax(model(dataset, 1.0)[4])])
        return prediction
    
    # Predictions for the training, validation, and test data.
    train_prediction = prediction_softmax(tf_train_dataset, 1.0)
    valid_prediction = prediction_softmax(tf_valid_dataset, 1.0)             
    test_prediction = prediction_softmax(tf_test_dataset, 1.0)
    

In [137]:
num_steps = 20001

with tf.Session(graph=graph) as session:

    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch single digit accuracy: %.1f%%' % accuracy_single(predictions, batch_labels))
            print('Minibatch image accuracy: %.1f%%' % accuracy_multi(predictions, batch_labels))
            print('Validation single digit accuracy: %.1f%%' % accuracy_single(valid_prediction.eval(), valid_labels))
            print('Validation image accuracy: %.1f%%' % accuracy_multi(valid_prediction.eval(), valid_labels))
            
    print('Test single digit accuracy: %.1f%%' % accuracy_single(test_prediction.eval(), test_labels))
    print('Test image accuracy: %.1f%%' % accuracy_multi(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 19.218676
Minibatch single digit accuracy: 5.4%
Minibatch image accuracy: 0.0%
Validation single digit accuracy: 18.3%
Validation image accuracy: 1.3%
Minibatch loss at step 500: 4.821128
Minibatch single digit accuracy: 32.4%
Minibatch image accuracy: 12.5%
Validation single digit accuracy: 37.5%
Validation image accuracy: 16.7%
Minibatch loss at step 1000: 2.653456
Minibatch single digit accuracy: 60.8%
Minibatch image accuracy: 46.9%
Validation single digit accuracy: 65.2%
Validation image accuracy: 49.5%
Minibatch loss at step 1500: 1.684686
Minibatch single digit accuracy: 80.0%
Minibatch image accuracy: 70.3%
Validation single digit accuracy: 73.1%
Validation image accuracy: 58.9%
Minibatch loss at step 2000: 1.625933
Minibatch single digit accuracy: 77.5%
Minibatch image accuracy: 64.1%
Validation single digit accuracy: 75.5%
Validation image accuracy: 62.5%
Minibatch loss at step 2500: 0.746641
Minibatch single digit accuracy: 91.2%
Minibat

KeyboardInterrupt: 

###  Model v1 - Change Dropout

In [143]:
batch_size = 64
patch_size = 5 # conv kernel size
depth1 = 16
depth2 = 32
depth3 = 64
num_hidden = 1024

image_size = 32
num_labels = 11 
num_channels = 1 # grayscale

graph = tf.Graph()

with graph.as_default():

    def get_weight_variable(name, shape):
        return tf.get_variable(name, shape=shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())

    def bias_variable(shape):
        initial = tf.constant(1.0, shape = shape)
        return tf.Variable(initial)
    
    def conv2d(data, weight):
        # strides [1, x_movement, y_movement, 1]
        return tf.nn.conv2d(data, weight, strides = [1, 1, 1, 1], padding = 'SAME')

    def max_pooling(data):
        return tf.nn.max_pool(data, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
    
    def get_label_wb(weight_name):
        weights = tf.get_variable(weight_name, shape=[num_hidden, num_labels],\
                                  initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
        return weights, biases 
    
    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, 6))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    beta_regul = tf.placeholder(tf.float32)
    
    
    # Varibles
    # conv1 layer 1
    layer1_weights = get_weight_variable('CNN_W1', [patch_size, patch_size, num_channels, depth1])
    layer1_biases = bias_variable([depth1]) # 16
    # conv2 layer 2
    layer2_weights = get_weight_variable('CNN_W2', [patch_size, patch_size, depth1, depth2])
    layer2_biases = bias_variable([depth2]) # 32
    # conv3 layer 3
    layer3_weights = get_weight_variable('CNN_W3', [patch_size, patch_size, depth2, depth3])
    layer3_biases = bias_variable([depth3]) # 64
    
    # func1 layer 4
    layer4_weights = get_weight_variable('FC_W1',[image_size // 8 * image_size // 8 * depth3, num_hidden])
    layer4_biases = bias_variable([num_hidden])
    
    # locally connected layers
    s1_w, s1_b = get_label_wb('S1_W')
    s2_w, s2_b = get_label_wb('S2_W')
    s3_w, s3_b = get_label_wb('S3_W')
    s4_w, s4_b = get_label_wb('S4_W')
    s5_w, s5_b = get_label_wb('S5_W')
    
    sw = [s1_w, s2_w, s3_w, s4_w, s5_w]
    
    global_step = tf.Variable(0)  # count the number of steps taken.
    
    def model(dataset, keep_prob):
        # conv1 layer 1
        hidden1 = tf.nn.relu(conv2d(dataset, layer1_weights) + layer1_biases) # 32 * 32 * depth1
        pool1 = max_pooling(hidden1) # 16 * 16 * depth1
        # conv2 layer 2
        hidden2 = tf.nn.relu(conv2d(pool1, layer2_weights) + layer2_biases) # 16 * 16 * depth2
        pool2 = max_pooling(hidden2) # 8 * 8 * depth2
        # conv3 layer 3
        hidden3 = tf.nn.relu(conv2d(pool2, layer3_weights) + layer3_biases) # 8 * 8 * depth3
        pool3 = max_pooling(hidden3) # 4 * 4 * depth3
        
        shape = pool3.get_shape().as_list()
        pool3_flat = tf.reshape(pool3, [shape[0], shape[1] * shape[2] * shape[3]]) # 1024
        
        # func1 layer 4
        hidden4 = tf.nn.relu(tf.matmul(pool3_flat, layer4_weights) + layer4_biases)
        hidden4_drop = tf.nn.dropout(hidden4, keep_prob)

        logits_1 = tf.matmul(hidden4_drop, s1_w) + s1_b
        logits_2 = tf.matmul(hidden4_drop, s2_w) + s2_b
        logits_3 = tf.matmul(hidden4_drop, s3_w) + s3_b
        logits_4 = tf.matmul(hidden4_drop, s4_w) + s4_b
        logits_5 = tf.matmul(hidden4_drop, s5_w) + s5_b
        
        return [logits_1, logits_2, logits_3, logits_4, logits_5]
    
    # Training computation.
    logits = model(tf_train_dataset, 0.5)
    
    loss_per_digit = [tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits[i],
                            tf_train_labels[:,i+1]
                        )) + beta_regul * tf.nn.l2_loss(sw[i])
                       for i in range(5)]
    
    loss = tf.add_n(loss_per_digit)

    # Optimizer.
    learning_rate = tf.train.exponential_decay(0.001, global_step, 1000, 0.80, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    def prediction_softmax(dataset, shape):
        prediction = tf.pack([
            tf.nn.softmax(model(dataset, 1.0)[0]),
            tf.nn.softmax(model(dataset, 1.0)[1]),
            tf.nn.softmax(model(dataset, 1.0)[2]),
            tf.nn.softmax(model(dataset, 1.0)[3]),
            tf.nn.softmax(model(dataset, 1.0)[4])])
        return prediction
    
    # Predictions for the training, validation, and test data.
    train_prediction = prediction_softmax(tf_train_dataset, 1.0)
    valid_prediction = prediction_softmax(tf_valid_dataset, 1.0)             
    test_prediction = prediction_softmax(tf_test_dataset, 1.0)
    
    # Save Model 
    saver = tf.train.Saver()

In [144]:
num_steps = 15001

with tf.Session(graph=graph) as session:

    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch single digit accuracy: %.1f%%' % accuracy_single(predictions, batch_labels))
            print('Minibatch image accuracy: %.1f%%' % accuracy_multi(predictions, batch_labels))
            print('Validation single digit accuracy: %.1f%%' % accuracy_single(valid_prediction.eval(), valid_labels))
            print('Validation image accuracy: %.1f%%' % accuracy_multi(valid_prediction.eval(), valid_labels))
            
    print('Test single digit accuracy: %.1f%%' % accuracy_single(test_prediction.eval(), test_labels))
    print('Test image accuracy: %.1f%%' % accuracy_multi(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 30.106844
Minibatch single digit accuracy: 4.1%
Minibatch image accuracy: 0.0%
Validation single digit accuracy: 18.3%
Validation image accuracy: 1.3%
Minibatch loss at step 500: 5.182123
Minibatch single digit accuracy: 27.6%
Minibatch image accuracy: 9.4%
Validation single digit accuracy: 33.1%
Validation image accuracy: 11.3%
Minibatch loss at step 1000: 2.731842
Minibatch single digit accuracy: 67.1%
Minibatch image accuracy: 53.1%
Validation single digit accuracy: 64.2%
Validation image accuracy: 49.1%
Minibatch loss at step 1500: 1.559345
Minibatch single digit accuracy: 80.7%
Minibatch image accuracy: 67.2%
Validation single digit accuracy: 73.2%
Validation image accuracy: 60.0%
Minibatch loss at step 2000: 1.872497
Minibatch single digit accuracy: 76.8%
Minibatch image accuracy: 70.3%
Validation single digit accuracy: 75.7%
Validation image accuracy: 63.6%
Minibatch loss at step 2500: 1.323040
Minibatch single digit accuracy: 87.6%
Minibatc

### Model v2 -  LRN - Local Response Normalization

In [147]:
batch_size = 64
patch_size = 5 # conv kernel size
depth1 = 16
depth2 = 32
depth3 = 64
num_hidden = 1024

image_size = 32
num_labels = 11 
num_channels = 1 # grayscale

graph = tf.Graph()

with graph.as_default():

    def get_weight_variable(name, shape):
        return tf.get_variable(name, shape=shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())

    def bias_variable(shape):
        initial = tf.constant(1.0, shape = shape)
        return tf.Variable(initial)
    
    def conv2d(data, weight):
        # strides [1, x_movement, y_movement, 1]
        return tf.nn.conv2d(data, weight, strides = [1, 1, 1, 1], padding = 'SAME')

    def max_pooling(data):
        return tf.nn.max_pool(data, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
    
    def get_label_wb(weight_name):
        weights = tf.get_variable(weight_name, shape=[num_hidden, num_labels],\
                                  initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
        return weights, biases 
    
    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, 6))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    beta_regul = tf.placeholder(tf.float32)
    
    
    # Varibles
    # conv1 layer 1
    layer1_weights = get_weight_variable('CNN_W1', [patch_size, patch_size, num_channels, depth1])
    layer1_biases = bias_variable([depth1]) # 16
    # conv2 layer 2
    layer2_weights = get_weight_variable('CNN_W2', [patch_size, patch_size, depth1, depth2])
    layer2_biases = bias_variable([depth2]) # 32
    # conv3 layer 3
    layer3_weights = get_weight_variable('CNN_W3', [patch_size, patch_size, depth2, depth3])
    layer3_biases = bias_variable([depth3]) # 64
    
    # func1 layer 4
    layer4_weights = get_weight_variable('FC_W1',[image_size // 8 * image_size // 8 * depth3, num_hidden])
    layer4_biases = bias_variable([num_hidden])
    
    # locally connected layers
    s1_w, s1_b = get_label_wb('S1_W')
    s2_w, s2_b = get_label_wb('S2_W')
    s3_w, s3_b = get_label_wb('S3_W')
    s4_w, s4_b = get_label_wb('S4_W')
    s5_w, s5_b = get_label_wb('S5_W')
    
    sw = [s1_w, s2_w, s3_w, s4_w, s5_w]
    
    global_step = tf.Variable(0)  # count the number of steps taken.
    
    def model(dataset, keep_prob):
        # conv1 layer 1
        hidden1 = tf.nn.relu(conv2d(dataset, layer1_weights) + layer1_biases) # 32 * 32 * depth1
        hidden1 = tf.nn.local_response_normalization(hidden1)
        pool1 = max_pooling(hidden1) # 16 * 16 * depth1
        # conv2 layer 2
        hidden2 = tf.nn.relu(conv2d(pool1, layer2_weights) + layer2_biases) # 16 * 16 * depth2
        hidden2 = tf.nn.local_response_normalization(hidden2)
        pool2 = max_pooling(hidden2) # 8 * 8 * depth2
        # conv3 layer 3
        hidden3 = tf.nn.relu(conv2d(pool2, layer3_weights) + layer3_biases) # 8 * 8 * depth3
        hidden3 = tf.nn.local_response_normalization(hidden3)
        pool3 = max_pooling(hidden3) # 4 * 4 * depth3
        
        shape = pool3.get_shape().as_list()
        pool3_flat = tf.reshape(pool3, [shape[0], shape[1] * shape[2] * shape[3]]) # 1024
        
        # func1 layer 4
        hidden4 = tf.nn.relu(tf.matmul(pool3_flat, layer4_weights) + layer4_biases)
        hidden4_drop = tf.nn.dropout(hidden4, keep_prob)

        logits_1 = tf.matmul(hidden4_drop, s1_w) + s1_b
        logits_2 = tf.matmul(hidden4_drop, s2_w) + s2_b
        logits_3 = tf.matmul(hidden4_drop, s3_w) + s3_b
        logits_4 = tf.matmul(hidden4_drop, s4_w) + s4_b
        logits_5 = tf.matmul(hidden4_drop, s5_w) + s5_b
        
        return [logits_1, logits_2, logits_3, logits_4, logits_5]
    
    # Training computation.
    logits = model(tf_train_dataset, 0.5)
    
    loss_per_digit = [tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits[i],
                            tf_train_labels[:,i+1]
                        )) + beta_regul * tf.nn.l2_loss(sw[i])
                       for i in range(5)]
    
    loss = tf.add_n(loss_per_digit)

    # Optimizer.
    learning_rate = tf.train.exponential_decay(0.001, global_step, 1000, 0.80, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    def prediction_softmax(dataset, shape):
        prediction = tf.pack([
            tf.nn.softmax(model(dataset, 1.0)[0]),
            tf.nn.softmax(model(dataset, 1.0)[1]),
            tf.nn.softmax(model(dataset, 1.0)[2]),
            tf.nn.softmax(model(dataset, 1.0)[3]),
            tf.nn.softmax(model(dataset, 1.0)[4])])
        return prediction
    
    # Predictions for the training, validation, and test data.
    train_prediction = prediction_softmax(tf_train_dataset, 1.0)
    valid_prediction = prediction_softmax(tf_valid_dataset, 1.0)             
    test_prediction = prediction_softmax(tf_test_dataset, 1.0)
    
    # Save Model 
    saver = tf.train.Saver()

In [148]:
num_steps = 15001

with tf.Session(graph=graph) as session:

    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch single digit accuracy: %.1f%%' % accuracy_single(predictions, batch_labels))
            print('Minibatch image accuracy: %.1f%%' % accuracy_multi(predictions, batch_labels))
            print('Validation single digit accuracy: %.1f%%' % accuracy_single(valid_prediction.eval(), valid_labels))
            print('Validation image accuracy: %.1f%%' % accuracy_multi(valid_prediction.eval(), valid_labels))
            
    print('Test single digit accuracy: %.1f%%' % accuracy_single(test_prediction.eval(), test_labels))
    print('Test image accuracy: %.1f%%' % accuracy_multi(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 13.347601
Minibatch single digit accuracy: 9.5%
Minibatch image accuracy: 0.0%
Validation single digit accuracy: 19.5%
Validation image accuracy: 3.6%
Minibatch loss at step 500: 5.714209
Minibatch single digit accuracy: 16.6%
Minibatch image accuracy: 3.1%
Validation single digit accuracy: 20.8%
Validation image accuracy: 3.7%
Minibatch loss at step 1000: 3.024511
Minibatch single digit accuracy: 60.8%
Minibatch image accuracy: 45.3%
Validation single digit accuracy: 59.9%
Validation image accuracy: 42.4%
Minibatch loss at step 1500: 1.887626
Minibatch single digit accuracy: 79.3%
Minibatch image accuracy: 68.8%
Validation single digit accuracy: 71.9%
Validation image accuracy: 58.0%
Minibatch loss at step 2000: 2.053926
Minibatch single digit accuracy: 78.2%
Minibatch image accuracy: 65.6%
Validation single digit accuracy: 76.5%
Validation image accuracy: 63.9%
Minibatch loss at step 2500: 1.215583
Minibatch single digit accuracy: 87.6%
Minibatch

### Model v3 - add dropout to all hidden layers

In [165]:
batch_size = 64
patch_size = 5 # conv kernel size
depth1 = 16
depth2 = 32
depth3 = 64
num_hidden = 1024

image_size = 32
num_labels = 11 
num_channels = 1 # grayscale

graph = tf.Graph()

with graph.as_default():

    def get_weight_variable(name, shape):
        return tf.get_variable(name, shape=shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())

    def bias_variable(shape):
        initial = tf.constant(1.0, shape = shape)
        return tf.Variable(initial)
    
    def conv2d(data, weight):
        # strides [1, x_movement, y_movement, 1]
        return tf.nn.conv2d(data, weight, strides = [1, 1, 1, 1], padding = 'SAME')

    def max_pooling(data):
        return tf.nn.max_pool(data, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
    
    def get_label_wb(weight_name):
        weights = tf.get_variable(weight_name, shape=[num_hidden, num_labels],\
                                  initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
        return weights, biases 
    
    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, 6))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    beta_regul = tf.placeholder(tf.float32)
    
    
    # Varibles
    # conv1 layer 1
    layer1_weights = get_weight_variable('CNN_W1', [patch_size, patch_size, num_channels, depth1])
    layer1_biases = bias_variable([depth1]) # 16
    # conv2 layer 2
    layer2_weights = get_weight_variable('CNN_W2', [patch_size, patch_size, depth1, depth2])
    layer2_biases = bias_variable([depth2]) # 32
    # conv3 layer 3
    layer3_weights = get_weight_variable('CNN_W3', [patch_size, patch_size, depth2, depth3])
    layer3_biases = bias_variable([depth3]) # 64
    
    # func1 layer 4
    layer4_weights = get_weight_variable('FC_W1',[4 * 4 * depth3, num_hidden])
    layer4_biases = bias_variable([num_hidden])
    
    # locally connected layers
    s1_w, s1_b = get_label_wb('S1_W')
    s2_w, s2_b = get_label_wb('S2_W')
    s3_w, s3_b = get_label_wb('S3_W')
    s4_w, s4_b = get_label_wb('S4_W')
    s5_w, s5_b = get_label_wb('S5_W')
    
    sw = [s1_w, s2_w, s3_w, s4_w, s5_w]
    
    global_step = tf.Variable(0)  # count the number of steps taken.
    
    def model(dataset, keep_prob, keep_prob2):
        # conv1 layer 1
        hidden1 = tf.nn.relu(conv2d(dataset, layer1_weights) + layer1_biases) # 32 * 32 * depth1
        hidden1 = tf.nn.local_response_normalization(hidden1)
        pool1 = max_pooling(hidden1) # 16 * 16 * depth1
        
        # conv2 layer 2
        hidden2 = tf.nn.relu(conv2d(pool1, layer2_weights) + layer2_biases) # 16 * 16 * depth2
        hidden2 = tf.nn.local_response_normalization(hidden2)
        pool2 = max_pooling(hidden2) # 8 * 8 * depth2
        
        pool2 = tf.nn.dropout(pool2, keep_prob2)
        
        # conv3 layer 3
        hidden3 = tf.nn.relu(conv2d(pool2, layer3_weights) + layer3_biases) # 8 * 8 * depth3
        hidden3 = tf.nn.local_response_normalization(hidden3)
        pool3 = max_pooling(hidden3) # 4 * 4 * depth3
        
        pool3 = tf.nn.dropout(pool3, keep_prob2)
        
        shape = pool3.get_shape().as_list()
        pool3_flat = tf.reshape(pool3, [shape[0], shape[1] * shape[2] * shape[3]]) # 1024
        
        # func1 layer 4
        hidden4 = tf.nn.relu(tf.matmul(pool3_flat, layer4_weights) + layer4_biases)
        hidden4_drop = tf.nn.dropout(hidden4, keep_prob)

        logits_1 = tf.matmul(hidden4_drop, s1_w) + s1_b
        logits_2 = tf.matmul(hidden4_drop, s2_w) + s2_b
        logits_3 = tf.matmul(hidden4_drop, s3_w) + s3_b
        logits_4 = tf.matmul(hidden4_drop, s4_w) + s4_b
        logits_5 = tf.matmul(hidden4_drop, s5_w) + s5_b
        
        return [logits_1, logits_2, logits_3, logits_4, logits_5]
    
    # Training computation.
    logits = model(tf_train_dataset, 0.5, 0.8)
    
    loss_per_digit = [tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits[i],
                            tf_train_labels[:,i+1]
                        )) + beta_regul * tf.nn.l2_loss(sw[i])
                       for i in range(5)]
    
    loss = tf.add_n(loss_per_digit)

    # Optimizer.
    learning_rate = tf.train.exponential_decay(0.001, global_step, 1000, 0.90, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    def prediction_softmax(dataset):
        prediction = tf.pack([
            tf.nn.softmax(model(dataset, 1.0, 1.0)[0]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[1]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[2]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[3]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[4])])
        return prediction
    
    # Predictions for the training, validation, and test data.
    train_prediction = prediction_softmax(tf_train_dataset)
    valid_prediction = prediction_softmax(tf_valid_dataset)             
    test_prediction = prediction_softmax(tf_test_dataset)
    
    # Save Model 
    saver = tf.train.Saver()

In [164]:
num_steps = 20001

with tf.Session(graph=graph) as session:

    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch single digit accuracy: %.1f%%' % accuracy_single(predictions, batch_labels))
            print('Minibatch image accuracy: %.1f%%' % accuracy_multi(predictions, batch_labels))
            print('Validation single digit accuracy: %.1f%%' % accuracy_single(valid_prediction.eval(), valid_labels))
            print('Validation image accuracy: %.1f%%' % accuracy_multi(valid_prediction.eval(), valid_labels))
            
    print('Test single digit accuracy: %.1f%%' % accuracy_single(test_prediction.eval(), test_labels))
    print('Test image accuracy: %.1f%%' % accuracy_multi(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 26.544254
Minibatch single digit accuracy: 9.5%
Minibatch image accuracy: 0.0%
Validation single digit accuracy: 18.0%
Validation image accuracy: 1.2%
Minibatch loss at step 500: 6.082138
Minibatch single digit accuracy: 15.2%
Minibatch image accuracy: 3.1%
Validation single digit accuracy: 19.5%
Validation image accuracy: 3.6%
Minibatch loss at step 1000: 4.306312
Minibatch single digit accuracy: 39.9%
Minibatch image accuracy: 18.8%
Validation single digit accuracy: 40.4%
Validation image accuracy: 18.9%
Minibatch loss at step 1500: 2.673647
Minibatch single digit accuracy: 67.9%
Minibatch image accuracy: 51.6%
Validation single digit accuracy: 64.2%
Validation image accuracy: 47.0%
Minibatch loss at step 2000: 2.611204
Minibatch single digit accuracy: 71.8%
Minibatch image accuracy: 56.2%
Validation single digit accuracy: 73.3%
Validation image accuracy: 59.7%
Minibatch loss at step 2500: 1.609107
Minibatch single digit accuracy: 85.4%
Minibatch

###  Model v4 - change dropout on cnn

In [18]:
batch_size = 64
patch_size = 5 # conv kernel size
depth1 = 16
depth2 = 32
depth3 = 64
num_hidden = 1024

image_size = 32
num_labels = 11 
num_channels = 1 # grayscale

graph = tf.Graph()

with graph.as_default():

    def get_weight_variable(name, shape):
        return tf.get_variable(name, shape=shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())

    def bias_variable(shape):
        initial = tf.constant(1.0, shape = shape)
        return tf.Variable(initial)
    
    def conv2d(data, weight):
        # strides [1, x_movement, y_movement, 1]
        return tf.nn.conv2d(data, weight, strides = [1, 1, 1, 1], padding = 'SAME')

    def max_pooling(data):
        return tf.nn.max_pool(data, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
    
    def get_label_wb(weight_name):
        weights = tf.get_variable(weight_name, shape=[num_hidden, num_labels],\
                                  initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
        return weights, biases 
    
    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, 6))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    beta_regul = tf.placeholder(tf.float32)
    
    
    # Varibles
    # conv1 layer 1
    layer1_weights = get_weight_variable('CNN_W1', [patch_size, patch_size, num_channels, depth1])
    layer1_biases = bias_variable([depth1]) # 16
    # conv2 layer 2
    layer2_weights = get_weight_variable('CNN_W2', [patch_size, patch_size, depth1, depth2])
    layer2_biases = bias_variable([depth2]) # 32
    # conv3 layer 3
    layer3_weights = get_weight_variable('CNN_W3', [patch_size, patch_size, depth2, depth3])
    layer3_biases = bias_variable([depth3]) # 64
    
    # func1 layer 4
    layer4_weights = get_weight_variable('FC_W1',[4 * 4 * depth3, num_hidden])
    layer4_biases = bias_variable([num_hidden])
    
    # locally connected layers
    s1_w, s1_b = get_label_wb('S1_W')
    s2_w, s2_b = get_label_wb('S2_W')
    s3_w, s3_b = get_label_wb('S3_W')
    s4_w, s4_b = get_label_wb('S4_W')
    s5_w, s5_b = get_label_wb('S5_W')
    
    sw = [s1_w, s2_w, s3_w, s4_w, s5_w]
    
    global_step = tf.Variable(0)  # count the number of steps taken.
    
    def model(dataset, keep_prob, keep_prob2):
        # conv1 layer 1
        hidden1 = tf.nn.relu(conv2d(dataset, layer1_weights) + layer1_biases) # 32 * 32 * depth1
        hidden1 = tf.nn.local_response_normalization(hidden1)
        pool1 = max_pooling(hidden1) # 16 * 16 * depth1
        
        # conv2 layer 2
        hidden2 = tf.nn.relu(conv2d(pool1, layer2_weights) + layer2_biases) # 16 * 16 * depth2
        hidden2 = tf.nn.local_response_normalization(hidden2)
        pool2 = max_pooling(hidden2) # 8 * 8 * depth2
        
        pool2 = tf.nn.dropout(pool2, keep_prob2)
        
        # conv3 layer 3
        hidden3 = tf.nn.relu(conv2d(pool2, layer3_weights) + layer3_biases) # 8 * 8 * depth3
        hidden3 = tf.nn.local_response_normalization(hidden3)
        pool3 = max_pooling(hidden3) # 4 * 4 * depth3
        
        pool3 = tf.nn.dropout(pool3, keep_prob2)
        
        shape = pool3.get_shape().as_list()
        pool3_flat = tf.reshape(pool3, [shape[0], shape[1] * shape[2] * shape[3]]) # 1024
        
        # func1 layer 4
        hidden4 = tf.nn.relu(tf.matmul(pool3_flat, layer4_weights) + layer4_biases)
        hidden4_drop = tf.nn.dropout(hidden4, keep_prob)

        logits_1 = tf.matmul(hidden4_drop, s1_w) + s1_b
        logits_2 = tf.matmul(hidden4_drop, s2_w) + s2_b
        logits_3 = tf.matmul(hidden4_drop, s3_w) + s3_b
        logits_4 = tf.matmul(hidden4_drop, s4_w) + s4_b
        logits_5 = tf.matmul(hidden4_drop, s5_w) + s5_b
        
        return [logits_1, logits_2, logits_3, logits_4, logits_5]
    
    # Training computation.
    logits = model(tf_train_dataset, 0.5, 0.5)
    
    loss_per_digit = [tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits[i],
                            tf_train_labels[:,i+1]
                        )) + beta_regul * tf.nn.l2_loss(sw[i])
                       for i in range(5)]
    
    loss = tf.add_n(loss_per_digit)

    # Optimizer.
    learning_rate = tf.train.exponential_decay(0.001, global_step, 1000, 0.90, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    def prediction_softmax(dataset):
        prediction = tf.pack([
            tf.nn.softmax(model(dataset, 1.0, 1.0)[0]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[1]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[2]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[3]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[4])])
        return prediction
    
    # Predictions for the training, validation, and test data.
    train_prediction = prediction_softmax(tf_train_dataset)
    valid_prediction = prediction_softmax(tf_valid_dataset)             
    test_prediction = prediction_softmax(tf_test_dataset)
    
    # Save Model 
    saver = tf.train.Saver()

In [17]:
num_steps = 20001

with tf.Session(graph=graph) as session:

    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, beta_regul : 1e-3}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch single digit accuracy: %.1f%%' % accuracy_single(predictions, batch_labels))
            print('Minibatch image accuracy: %.1f%%' % accuracy_multi(predictions, batch_labels))
            print('Validation single digit accuracy: %.1f%%' % accuracy_single(valid_prediction.eval(), valid_labels))
            print('Validation image accuracy: %.1f%%' % accuracy_multi(valid_prediction.eval(), valid_labels))
            
    print('Test single digit accuracy: %.1f%%' % accuracy_single(test_prediction.eval(), test_labels))
    print('Test image accuracy: %.1f%%' % accuracy_multi(test_prediction.eval(), test_labels))
    
    save_path = saver.save(session, "3cnn.ckpt")
    print('Model saved in file: {}'.format(save_path))

Initialized
Minibatch loss at step 0: 17.567846
Minibatch single digit accuracy: 12.2%
Minibatch image accuracy: 0.0%
Validation single digit accuracy: 19.5%
Validation image accuracy: 3.6%
Minibatch loss at step 500: 6.044502
Minibatch single digit accuracy: 15.2%
Minibatch image accuracy: 3.1%
Validation single digit accuracy: 19.5%
Validation image accuracy: 3.6%
Minibatch loss at step 1000: 5.449240
Minibatch single digit accuracy: 21.7%
Minibatch image accuracy: 7.8%
Validation single digit accuracy: 19.3%
Validation image accuracy: 3.6%
Minibatch loss at step 1500: 4.672151
Minibatch single digit accuracy: 37.1%
Minibatch image accuracy: 12.5%
Validation single digit accuracy: 33.6%
Validation image accuracy: 12.7%
Minibatch loss at step 2000: 4.028084
Minibatch single digit accuracy: 54.9%
Minibatch image accuracy: 42.2%
Validation single digit accuracy: 56.8%
Validation image accuracy: 40.1%
Minibatch loss at step 2500: 2.689013
Minibatch single digit accuracy: 70.8%
Minibatch 

In [19]:
num_steps = 10001

with tf.Session(graph=graph) as session:
    
    # If you want to restore model
    saver.restore(session, "3cnn.ckpt")
    print("Model restored!")

#     tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, beta_regul : 1e-3}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch single digit accuracy: %.1f%%' % accuracy_single(predictions, batch_labels))
            print('Minibatch image accuracy: %.1f%%' % accuracy_multi(predictions, batch_labels))
            print('Validation single digit accuracy: %.1f%%' % accuracy_single(valid_prediction.eval(), valid_labels))
            print('Validation image accuracy: %.1f%%' % accuracy_multi(valid_prediction.eval(), valid_labels))
            
    print('Test single digit accuracy: %.1f%%' % accuracy_single(test_prediction.eval(), test_labels))
    print('Test image accuracy: %.1f%%' % accuracy_multi(test_prediction.eval(), test_labels))
    
    save_path = saver.save(session, "3cnn.ckpt")
    print('Model saved in file: {}'.format(save_path))

Model restored!
Initialized
Minibatch loss at step 0: 1.707028
Minibatch single digit accuracy: 85.0%
Minibatch image accuracy: 78.1%
Validation single digit accuracy: 82.7%
Validation image accuracy: 73.3%
Minibatch loss at step 500: 1.836549
Minibatch single digit accuracy: 85.5%
Minibatch image accuracy: 79.7%
Validation single digit accuracy: 83.2%
Validation image accuracy: 73.8%
Minibatch loss at step 1000: 1.508532
Minibatch single digit accuracy: 86.7%
Minibatch image accuracy: 82.8%
Validation single digit accuracy: 83.1%
Validation image accuracy: 73.8%
Minibatch loss at step 1500: 1.532207
Minibatch single digit accuracy: 90.0%
Minibatch image accuracy: 85.9%
Validation single digit accuracy: 83.0%
Validation image accuracy: 73.7%
Minibatch loss at step 2000: 1.768131
Minibatch single digit accuracy: 86.6%
Minibatch image accuracy: 78.1%
Validation single digit accuracy: 83.2%
Validation image accuracy: 74.0%
Minibatch loss at step 2500: 1.279365
Minibatch single digit accur

### Add one conv layer - (Abandoned.  RAM runs out) 

In [36]:
batch_size = 64
patch_size = 5 # conv kernel size
patch_size2 = 5
depth1 = 16
depth2 = 32
# depth3 = 48
depth3 = 128
num_hidden = 64

image_size = 32
num_labels = 11 
num_channels = 1 # grayscale

graph = tf.Graph()

with graph.as_default():

    def get_weight_variable(name, shape):
        return tf.get_variable(name, shape=shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())

    def bias_variable(shape):
        initial = tf.constant(1.0, shape = shape)
        return tf.Variable(initial)
    
    def conv2d(data, weight):
        # strides [1, x_movement, y_movement, 1]
        return tf.nn.conv2d(data, weight, strides = [1, 1, 1, 1], padding = 'VALID')

    def max_pooling(data, strides):
        return tf.nn.max_pool(data, ksize = [1, 2, 2, 1], strides = strides, padding = 'SAME')
    
    def get_label_wb(weight_name):
        weights = tf.get_variable(weight_name, shape=[num_hidden, num_labels],\
                                  initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
        return weights, biases 
    
    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, 6))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    beta_regul = tf.placeholder(tf.float32)
    
    
    # Varibles
    # conv1 layer 1
    layer1_weights = get_weight_variable('CNN_W1', [patch_size, patch_size, num_channels, depth1])
    layer1_biases = bias_variable([depth1]) # 16
    # conv2 layer 2
    layer2_weights = get_weight_variable('CNN_W2', [patch_size, patch_size, depth1, depth2])
    layer2_biases = bias_variable([depth2]) # 32
    # conv3 layer 3
    layer3_weights = get_weight_variable('CNN_W3', [patch_size2, patch_size2, depth2, depth3])
    layer3_biases = bias_variable([depth3]) # 64
#     # conv4 layer 4
#     layer4_weights = get_weight_variable('CNN_W4', [patch_size2, patch_size2, depth3, depth4])
#     layer4_biases = bias_variable([depth4]) # 64
    
    # func1 layer 5
    layer5_weights = get_weight_variable('FC_W1',[1152, num_hidden])
    layer5_biases = bias_variable([num_hidden])
    
    # locally connected layers
    s1_w, s1_b = get_label_wb('S1_W')
    s2_w, s2_b = get_label_wb('S2_W')
    s3_w, s3_b = get_label_wb('S3_W')
    s4_w, s4_b = get_label_wb('S4_W')
    s5_w, s5_b = get_label_wb('S5_W')
    
    sw = [s1_w, s2_w, s3_w, s4_w, s5_w]
    
    global_step = tf.Variable(0)  # count the number of steps taken.
    
    def model(dataset, keep_prob, keep_prob2):
        # conv1 layer 1
        hidden1 = tf.nn.relu(conv2d(dataset, layer1_weights) + layer1_biases) # 32 * 32 * depth1
        hidden1 = tf.nn.local_response_normalization(hidden1)
        pool1 = max_pooling(hidden1, [1, 2, 2, 1]) # 16 * 16 * depth1
        
        # conv2 layer 2
        hidden2 = tf.nn.relu(conv2d(pool1, layer2_weights) + layer2_biases) # 16 * 16 * depth2
        hidden2 = tf.nn.local_response_normalization(hidden2)
        pool2 = max_pooling(hidden2, [1, 1, 1, 1]) # 8 * 8 * depth2
        pool2 = tf.nn.dropout(pool2, keep_prob2)
        
        # conv3 layer 3
        hidden3 = tf.nn.relu(conv2d(pool2, layer3_weights) + layer3_biases) # 8 * 8 * depth3
        hidden3 = tf.nn.local_response_normalization(hidden3)
        pool3 = max_pooling(hidden3, [1, 2, 2, 1]) # 4 * 4 * depth3
#         pool3 = tf.nn.dropout(pool3, keep_prob2)
        
#         # conv3 layer 3
#         hidden4 = tf.nn.relu(conv2d(pool3, layer4_weights) + layer4_biases) # 8 * 8 * depth3
#         hidden4 = tf.nn.local_response_normalization(hidden4)
#         pool4 = max_pooling(hidden4, [1, 2, 2, 1]) # 4 * 4 * depth3
#         pool4 = tf.nn.dropout(pool4, keep_prob2)
        
        shape = pool3.get_shape().as_list()
        pool3_flat = tf.reshape(pool3, [shape[0], shape[1] * shape[2] * shape[3]]) # 1024
        
        # func1 layer 4
        hidden5 = tf.nn.relu(tf.matmul(pool3_flat, layer5_weights) + layer5_biases)
        hidden5_drop = tf.nn.dropout(hidden5, keep_prob)

        logits_1 = tf.matmul(hidden5_drop, s1_w) + s1_b
        logits_2 = tf.matmul(hidden5_drop, s2_w) + s2_b
        logits_3 = tf.matmul(hidden5_drop, s3_w) + s3_b
        logits_4 = tf.matmul(hidden5_drop, s4_w) + s4_b
        logits_5 = tf.matmul(hidden5_drop, s5_w) + s5_b
        
        return [logits_1, logits_2, logits_3, logits_4, logits_5]
    
    # Training computation.
    logits = model(tf_train_dataset, 0.5, 0.8)
    
    loss_per_digit = [tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits[i],
                            tf_train_labels[:,i+1]
                        )) + beta_regul * tf.nn.l2_loss(sw[i])
                       for i in range(5)]
    
    loss = tf.add_n(loss_per_digit)

    # Optimizer.
    learning_rate = tf.train.exponential_decay(0.001, global_step, 1000, 0.90, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    def prediction_softmax(dataset):
        prediction = tf.pack([
            tf.nn.softmax(model(dataset, 1.0, 1.0)[0]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[1]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[2]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[3]),
            tf.nn.softmax(model(dataset, 1.0, 1.0)[4])])
        return prediction
    
    # Predictions for the training, validation, and test data.
    train_prediction = prediction_softmax(tf_train_dataset)
    valid_prediction = prediction_softmax(tf_valid_dataset)             
    test_prediction = prediction_softmax(tf_test_dataset)
    
    # Save Model 
    saver = tf.train.Saver()

In [33]:
num_steps = 10001

with tf.Session(graph=graph) as session:
    
#     save_path = saver.save(session, "CNN5.ckpt")
#     print("Model restored to:", save_path)  
    
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, beta_regul : 1e-3}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch single digit accuracy: %.1f%%' % accuracy_single(predictions, batch_labels))
            print('Minibatch image accuracy: %.1f%%' % accuracy_multi(predictions, batch_labels))
            print('Validation single digit accuracy: %.1f%%' % accuracy_single(valid_prediction.eval(), valid_labels))
            print('Validation image accuracy: %.1f%%' % accuracy_multi(valid_prediction.eval(), valid_labels))
            
    print('Test single digit accuracy: %.1f%%' % accuracy_single(test_prediction.eval(), test_labels))
    print('Test image accuracy: %.1f%%' % accuracy_multi(test_prediction.eval(), test_labels))
    
    save_path = saver.save(session, "5cnn.ckpt")
    print('Model saved in file: {}'.format(save_path))

Initialized
Minibatch loss at step 0: 21.580215
Minibatch single digit accuracy: 2.7%
Minibatch image accuracy: 0.0%
Validation single digit accuracy: 6.9%
Validation image accuracy: 0.0%
Minibatch loss at step 500: 6.046764
Minibatch single digit accuracy: 12.4%
Minibatch image accuracy: 3.1%
Validation single digit accuracy: 19.1%
Validation image accuracy: 3.6%
Minibatch loss at step 1000: 5.019938
Minibatch single digit accuracy: 33.6%
Minibatch image accuracy: 10.9%
Validation single digit accuracy: 33.9%
Validation image accuracy: 13.1%
Minibatch loss at step 1500: 4.056178
Minibatch single digit accuracy: 54.3%
Minibatch image accuracy: 34.4%
Validation single digit accuracy: 48.2%
Validation image accuracy: 28.1%
Minibatch loss at step 2000: 3.900603
Minibatch single digit accuracy: 50.7%
Minibatch image accuracy: 34.4%
Validation single digit accuracy: 56.2%
Validation image accuracy: 37.0%
Minibatch loss at step 2500: 3.121350
Minibatch single digit accuracy: 67.2%
Minibatch 

In [38]:
num_steps = 10001

with tf.Session(graph=graph) as session:
    
    # If you want to restore model
    saver.restore(session, "5cnn.ckpt")
    print("Model restored!")
    
#     tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, beta_regul : 1e-3}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch single digit accuracy: %.1f%%' % accuracy_single(predictions, batch_labels))
            print('Minibatch image accuracy: %.1f%%' % accuracy_multi(predictions, batch_labels))
            print('Validation single digit accuracy: %.1f%%' % accuracy_single(valid_prediction.eval(), valid_labels))
            print('Validation image accuracy: %.1f%%' % accuracy_multi(valid_prediction.eval(), valid_labels))
            
    print('Test single digit accuracy: %.1f%%' % accuracy_single(test_prediction.eval(), test_labels))
    print('Test image accuracy: %.1f%%' % accuracy_multi(test_prediction.eval(), test_labels))
    
    save_path = saver.save(session, "5cnn.ckpt")
    print('Model saved in file: {}'.format(save_path))

Model restored!
Initialized
Minibatch loss at step 0: 1.978281
Minibatch single digit accuracy: 85.0%
Minibatch image accuracy: 78.1%
Validation single digit accuracy: 79.8%
Validation image accuracy: 68.9%
Minibatch loss at step 500: 2.169738
Minibatch single digit accuracy: 86.9%
Minibatch image accuracy: 79.7%
Validation single digit accuracy: 80.0%
Validation image accuracy: 69.2%
Minibatch loss at step 1000: 2.131892
Minibatch single digit accuracy: 83.2%
Minibatch image accuracy: 75.0%
Validation single digit accuracy: 80.0%
Validation image accuracy: 69.5%
Minibatch loss at step 1500: 1.469676
Minibatch single digit accuracy: 86.4%
Minibatch image accuracy: 79.7%
Validation single digit accuracy: 79.9%
Validation image accuracy: 69.7%
Minibatch loss at step 2000: 2.054354
Minibatch single digit accuracy: 85.2%
Minibatch image accuracy: 81.2%
Validation single digit accuracy: 80.4%
Validation image accuracy: 70.3%
Minibatch loss at step 2500: 1.332862
Minibatch single digit accur