Deep Learning
=============

Assignment 4
------------

Previously in `2_fullyconnected.ipynb` and `3_regularization.ipynb`, we trained fully connected networks to classify [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) characters.

The goal of this assignment is make the neural network convolutional.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import time

print("TensorFlow Version:", tf.__version__)

TensorFlow Version: 1.13.1


In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [3]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels

train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [4]:
log_dir = './logs/4_convolutions'
#Delete previous logs
if tf.gfile.Exists(log_dir):
    tf.gfile.DeleteRecursively(log_dir)
#Create folder
tf.gfile.MakeDirs(log_dir)

In [9]:
# We can't initialize these variables to 0 - the network will get stuck.
def weight_variable(shape):
    """Create a weight variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    """Create a bias variable with appropriate initialization."""
    initial = tf.constant(0.1, shape = shape)
    #initial = tf.zeros(shape=shape)
    return tf.Variable(initial)

def variable_summaries(var):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)

        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))

        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

def conv_layer(input_tensor, patch_size, num_channels, depth, layer_name, padMethod='SAME'):
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.variable_scope(layer_name, reuse = tf.AUTO_REUSE) as layer:
        '''VARIABLES'''
        # These are the parameters that we are going to be training. The weight
        # matrix will be initialized using random values following a (truncated)
        # normal distribution. The biases get initialized to zero.

#        print(layer)
        #print(tf.get_variable_scope().original_name_scope)
        
        # This Variable will hold the state of the weights for the layer
#         with tf.name_scope('weights'):
#             weights = weight_variable([patch_size, patch_size, num_channels, depth])
        weights = tf.get_variable(name = 'weights',
                                   shape = [patch_size, patch_size, num_channels, depth],
                                   dtype = tf.float32,
                                   trainable = True,
                                   initializer = tf.initializers.truncated_normal(stddev=0.1))

        #variable_summaries(weights)

#         with tf.name_scope('biases'):
#             biases = bias_variable([depth])
            #variable_summaries(biases)
            
        biases = tf.get_variable(name = 'biases',
                                 shape = [depth],
                                 dtype = tf.float32,
                                 trainable = True,
                                 initializer = tf.constant_initializer(0.1))

    # Convolutional Layer
    with tf.variable_scope(layer.name + '/Convolution/'):
        conv = tf.nn.conv2d(input_tensor, weights, [1, 2, 2, 1], padding = padMethod)

    with tf.variable_scope(layer.name + '/conv_plus_b/'):
        tmp = conv + biases

    with tf.variable_scope(layer.name + '/Activation/'):
        output_tensor = tf.nn.relu(tmp)
        #tf.summary.histogram('output_tensor', output_tensor)

    return output_tensor, weights, biases

def conv_layer_with_MaxPooling(input_tensor, patch_size, num_channels, depth, layer_name, padMethod='SAME'):
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.variable_scope(layer_name, reuse = tf.AUTO_REUSE) as layer:
        '''VARIABLES'''
        # This Variable will hold the state of the weights for the layer
        weights = tf.get_variable(name = 'weights',
                                   shape = [patch_size, patch_size, num_channels, depth],
                                   dtype = tf.float32,
                                   trainable = True,
                                   initializer = tf.initializers.truncated_normal(stddev=0.1))

        biases = tf.get_variable(name = 'biases',
                                 shape = [depth],
                                 dtype = tf.float32,
                                 trainable = True,
                                 initializer = tf.constant_initializer(0.1))

    # Convolutional Layer
    with tf.name_scope(layer.name + '/Convolution/'):
        conv = tf.nn.conv2d(input_tensor, weights, [1, 1, 1, 1], padding = padMethod)

    with tf.name_scope(layer.name + '/conv_plus_b/'):
        tmp = conv + biases

    with tf.name_scope(layer.name + '/Activation/'):
        hidden = tf.nn.relu(tmp)
        #tf.summary.histogram('output_tensor', output_tensor)

    with tf.name_scope(layer.name + '/Max_Pooling/'):
        pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
        
    return pool, weights, biases

def conv_layer_with_Pooling(input_tensor, patch_size, num_channels, depth, layer_name, pool = 'max', padMethod='SAME' ):
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.variable_scope(layer_name, reuse = tf.AUTO_REUSE) as layer:
        '''VARIABLES'''
        # This Variable will hold the state of the weights for the layer
        weights = tf.get_variable(name = 'weights',
                                   shape = [patch_size, patch_size, num_channels, depth],
                                   dtype = tf.float32,
                                   trainable = True,
                                   initializer = tf.initializers.truncated_normal(stddev=0.1))

        biases = tf.get_variable(name = 'biases',
                                 shape = [depth],
                                 dtype = tf.float32,
                                 trainable = True,
                                 initializer = tf.constant_initializer(0.1))

    # Convolutional Layer
    with tf.name_scope(layer.name + '/Convolution/'):
        conv = tf.nn.conv2d(input_tensor, weights, [1, 1, 1, 1], padding = padMethod)

    with tf.name_scope(layer.name + '/conv_plus_b/'):
        tmp = conv + biases

    with tf.name_scope(layer.name + '/Activation/'):
        hidden = tf.nn.relu(tmp)
        #tf.summary.histogram('output_tensor', output_tensor)

    if (pool=='max'):
        with tf.name_scope(layer.name + '/Max_Pooling/'):
            pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding = padMethod)
    elif (pool=='avg'):
        with tf.name_scope(layer.name + '/Average_Pooling/'):
            pool = tf.nn.avg_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding = padMethod)
    
    return pool, weights, biases
       
def fully_layer(input_tensor, input_dim, output_dim, layer_name, keep_prob = 1.0):
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.variable_scope(layer_name, reuse = tf.AUTO_REUSE) as layer:
        '''VARIABLES'''
        # These are the parameters that we are going to be training. The weight
        # matrix will be initialized using random values following a (truncated)
        # normal distribution. The biases get initialized to zero.

        # This Variable will hold the state of the weights for the layer
        #with tf.name_scope('weights'):
        # weights = weight_variable([input_dim, output_dim])
            #variable_summaries(weights)
            
        weights = tf.get_variable(name = 'weights',
                                   shape = [input_dim, output_dim],
                                   dtype = tf.float32,
                                   initializer = tf.initializers.truncated_normal(stddev=0.1))

        #with tf.name_scope('biases'):
        #    biases = bias_variable([output_dim])
            #variable_summaries(biases)
            
        biases = tf.get_variable(name = 'biases',
                                   shape = [output_dim],
                                   dtype = tf.float32,
                                   initializer = tf.constant_initializer(0.1))

    # Reshape only when it is required    
    if (input_tensor.shape[1] != weights.shape[1]):
        with tf.name_scope(layer.name + '/reshape/'):
            shape = tf.shape(input_tensor)
            reshape = tf.reshape(input_tensor, [shape[0], shape[1] * shape[2] * shape[3]])
    else:
        reshape = input_tensor
        
    '''TRAINING COMPUTATION'''
    # We multiply the inputs with the weight matrix, and add biases. We compute
    # the softmax and cross-entropy (it's one operation in TensorFlow, because
    # it's very common, and it can be optimized)
    with tf.name_scope(layer.name + '/Wx_plus_b/'):
        logit = tf.matmul(reshape, weights) + biases
        #tf.summary.histogram('logits', logits)

    with tf.name_scope(layer.name + '/Activation/'):
         hidden = tf.nn.relu(logit)
        #tf.summary.histogram('output_tensor', output_tensor)

    if (keep_prob != 1.0):
        with tf.name_scope(layer.name + '/Drop_Out/'):
            output_tensor = tf.nn.dropout(hidden, rate = 1.0 - keep_prob)
    else:
        output_tensor = hidden        
        
    return output_tensor, weights, biases

def layer(input_tensor, input_dim, output_dim, layer_name, act = tf.nn.relu):
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.variable_scope(layer_name, reuse = tf.AUTO_REUSE) as layer:
        '''VARIABLES'''
        # These are the parameters that we are going to be training. The weight
        # matrix will be initialized using random values following a (truncated)
        # normal distribution. The biases get initialized to zero.

        # This Variable will hold the state of the weights for the layer
#         with tf.name_scope('weights'):
#             weights = weight_variable([input_dim, output_dim])
#             #variable_summaries(weights)
    
        weights = tf.get_variable(name = 'weights',
                                   shape = [input_dim, output_dim],
                                   dtype = tf.float32,
                                   initializer = tf.initializers.truncated_normal(stddev=0.1))

#         with tf.name_scope('biases'):
#             biases = bias_variable([output_dim])
#             #variable_summaries(biases)

        biases = tf.get_variable(name = 'biases',
                                   shape = [output_dim],
                                   dtype = tf.float32,
                                   initializer = tf.constant_initializer(0.1))

    '''TRAINING COMPUTATION'''
    # We multiply the inputs with the weight matrix, and add biases. We compute
    # the softmax and cross-entropy (it's one operation in TensorFlow, because
    # it's very common, and it can be optimized)
    with tf.name_scope(layer.name + '/Wx_plus_b/'):
        logits = tf.matmul(input_tensor, weights) + biases
        #tf.summary.histogram('logits', logits)

    return logits, weights, biases 

def create_layer(shapeWeight, depth, layer_name):
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.name_scope(layer_name) as vs:
        '''VARIABLES'''
        # These are the parameters that we are going to be training. The weight
        # matrix will be initialized using random values following a (truncated)
        # normal distribution. The biases get initialized to zero.

        # This Variable will hold the state of the weights for the layer
        with tf.name_scope('weights'):
            weights = weight_variable(shapeWeight)
            #variable_summaries(weights)

        with tf.name_scope('biases'):
            biases = bias_variable([depth])
            #variable_summaries(biases)

        return vs, weights, biases 
    
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])

In [None]:
# Output size calculation
# for padding "same" which is -1
# for padding
output_1 = (28.00 - 5.00 - (-2.00)) / 2.00 + 1.00
print(np.ceil(output_1))
output_2 = (output_1 - 5.00 - (-2.00)) / 2.00 + 1.00
print(np.ceil(output_2))

New function for Image Size: No Pooling

In [6]:
# Create image size function based on input, filter size, padding and stride
# 2 convolutions only
def output_size_no_pool(input_size, filter_size, padding, conv_stride):
    if padding == 'same':
        padding = -1.00
    elif padding == 'valid':
        padding = 0.00
    else:
        return None
    output_1 = float(((input_size - filter_size - 2*padding) / conv_stride) + 1.00)
    output_2 = float(((output_1 - filter_size - 2*padding) / conv_stride) + 1.00)
    return int(np.ceil(output_2))

patch_size = 5
final_image_size = output_size_no_pool(image_size, patch_size, padding='same', conv_stride=2)

Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

tf.placeholder(
    dtype,
    shape=None,
    name=None
)

Inserts a placeholder for a tensor that will be always fed.

In [None]:
batch_size = 16
patch_size = 5
# Depth is the number of output channels 
# On the other hand, num_channels is the number of input channels set at 1 previously
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():
    # Input data.
    with tf.name_scope('Input'):
        tf_train_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
        tf_train_labels = tf.placeholder(tf.float32, shape=(None, num_labels), name = 'y-input')
        with tf.name_scope('Validation'):
            #tf_valid_dataset = tf.constant(valid_dataset, name = 'x-input')
            tf_valid_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
            
        with tf.name_scope('Test'):
            tf_test_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
            #tf_test_dataset = tf.constant(test_dataset, name = 'x-input')
    
    '''Model'''
    def model(data):
        with tf.variable_scope('Model', reuse = tf.AUTO_REUSE) as scope:
            #====================================
            # Convolution 1 Layer
            #====================================
            # Input channels: num_channels = 1
            # Output channels: depth = 16
            #layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch|_size, num_channels, depth], stddev=0.1))
            #layer1_biases = tf.Variable(tf.zeros([depth]))
            # First Convolutional Layer
            #conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
            #hidden = tf.nn.relu(conv + layer1_biases)
            hidden, layer1_weights, layer1_biases = conv_layer(data,
                                                               patch_size,
                                                               num_channels,
                                                               depth,
                                                               "Layer1",
                                                               padMethod = 'SAME')

            #====================================
            # Convolution 2 Layer
            #====================================
            # Input channels: depth = 16
            # Output channels: depth = 16
            #layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
            #layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
            # Second Convolutional Layer
            #conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
            #hidden = tf.nn.relu(conv + layer2_biases)
            hidden, layer2_weights, layer2_biases = conv_layer(hidden,
                                                               patch_size,
                                                               depth,
                                                               depth,
                                                               "Layer2",
                                                               padMethod='SAME')

            #================================================
            # Fully Connected Layer (Densely Connected Layer)
            #================================================
            # Use neurons to allow processing of entire image
            final_image_size = output_size_no_pool(image_size, patch_size, padding='same', conv_stride=2)
            #layer3_weights = tf.Variable(tf.truncated_normal([final_image_size * final_image_size * depth, num_hidden], stddev=0.1))
            #layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
            # Full Connected Layer
            #shape = hidden.get_shape().as_list()
            #reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
            #hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
            hidden, layer3_weights, layer3_biases = fully_layer(hidden,
                                                                final_image_size * final_image_size * depth,
                                                                num_hidden,
                                                                "Layer3")

            #layer3_weights = tf.Variable(tf.truncated_normal(
            #    [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
            #layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))

            #================================================
            # Readout layer: Softmax Layer
            #================================================
            #layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
            #layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
            # Readout Layer: Softmax Layer
            #tf.matmul(hidden, layer4_weights) + layer4_biases
            output, layer4_weights, layer4_biases = layer(hidden, num_hidden, num_labels, "Layer4")
        
        return output
  
#         '''Model'''
#         def model(data):
#             # First Convolutional Layer
#             #conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
#             #hidden = tf.nn.relu(conv + layer1_biases)
#             hidden, layer1_weights, layer1_biases = conv_layer(data, patch_size, num_channels, depth, "Layer1", padMethod='SAME'):
    
#             # Second Convolutional Layer
#             #conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
#             #hidden = tf.nn.relu(conv + layer2_biases)
#             hidden, layer2_weights, layer2_biases = conv_layer(hidden, patch_size, num_channels, depth, "Layer2", padMethod='SAME'):
    
#             # Full Connected Layer
#             #shape = hidden.get_shape().as_list()
#             #reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
#             #hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
#             hidden, layer3_weights, layer3_biases = fully_layer(hidden, final_image_size * final_image_size * depth, num_hidden, "Layer3")
    
#             # Readout Layer: Softmax Layer
#             #tf.matmul(hidden, layer4_weights) + layer4_biases
#             output, layer4_weights, layer4_biases = layer(hidden, num_hidden, num_labels, "Layer")
        
#             return output
  
#         '''Training computation'''
#         logits = model(tf_train_dataset)
#         loss = tf.reduce_mean(
#         tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits))
    
#         '''Optimizer'''
#         # Learning rate of 0.05
#         optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    '''Training computation'''
    logits = model(tf_train_dataset)
    with tf.name_scope('Cost_function'):
        with tf.name_scope('cross_entropy'):
            soft = tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits)
        loss = tf.reduce_mean(soft)

    #Add to Graph
    tf.summary.scalar("Loss", loss)
        
    '''Optimizer'''
    with tf.name_scope('Optimizer'):
        # Learning rate of 0.05
        optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    '''Predictions for the training, validation, and test data'''
    with tf.name_scope('Prediction'):
        train_prediction = tf.nn.softmax(logits)

    vlogits = model(tf_valid_dataset)
    with tf.name_scope('Validation'):
        with tf.name_scope('Prediction'):
            valid_prediction = tf.nn.softmax(vlogits)

    tlogits = model(tf_test_dataset)
    with tf.name_scope('Test'):
        with tf.name_scope('Prediction'):
            test_prediction = tf.nn.softmax(tlogits)

In [None]:
num_steps = 30000

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    merged = tf.summary.merge_all()
    write = tf.summary.FileWriter(log_dir + "/convolutional", session.graph)
    print('Initialized')
    t = time.process_time()
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        summary, _, l, predictions = session.run([merged, optimizer, loss, train_prediction], feed_dict=feed_dict)
        write.add_summary(summary, step)
        if (step % 5000 == 0):
            print('===========================================')
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.2f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.2f%%' % accuracy(valid_prediction.eval({tf_valid_dataset: valid_dataset}), valid_labels))

    write.close()
    elapsed_time = time.process_time() - t
    print('=====================Finished!======================')
    print('Running time:  %.2fs' % elapsed_time)
    print('Test accuracy: %.2f%%' % accuracy(test_prediction.eval({tf_test_dataset: test_dataset}), test_labels))

In [None]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():
    # Input data.
    with tf.name_scope('Input'):
        tf_train_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
        tf_train_labels = tf.placeholder(tf.float32, shape=(None, num_labels), name = 'y-input')
        with tf.name_scope('Validation'):
            #tf_valid_dataset = tf.constant(valid_dataset, name = 'x-input')
            tf_valid_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')

        with tf.name_scope('Test'):
            tf_test_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
            #tf_test_dataset = tf.constant(test_dataset, name = 'x-input')

    # Variables.
    with tf.name_scope('Train'):
        with tf.name_scope('Model'):
            #====================================
            # Convolution 1 Layer
            #====================================
            # Input channels: num_channels = 1
            # Output channels: depth = 16
            #layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1))
            #layer1_biases = tf.Variable(tf.zeros([depth]))
            layer1, layer1_weights, layer1_biases = create_layer([patch_size, patch_size, num_channels, depth],
                                                                 depth,
                                                                 "Layer_1")

            #====================================
            # Convolution 2 Layer
            #====================================
            # Input channels: depth = 16
            # Output channels: depth = 16
            #layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
            #layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
            layer2, layer2_weights, layer2_biases = create_layer([patch_size, patch_size, depth, depth],
                                                                 depth,
                                                                 "Layer_2")

            #================================================
            # Fully Connected Layer (Densely Connected Layer)
            #================================================
            # Use neurons to allow processing of entire image
            final_image_size = output_size_no_pool(image_size, patch_size, padding = 'same', conv_stride = 2)
            #layer3_weights = tf.Variable(tf.truncated_normal([final_image_size * final_image_size * depth, num_hidden], stddev=0.1))
            #layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
            layer3, layer3_weights, layer3_biases = create_layer([final_image_size * final_image_size * depth, num_hidden],
                                                                 num_hidden,
                                                                 "Layer_3")

            #layer3_weights = tf.Variable(tf.truncated_normal(
            #    [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
            #layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))

            #================================================
            # Readout layer: Softmax Layer
            #================================================
            #layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
            #layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
            layer4, layer4_weights, layer4_biases = create_layer([num_hidden, num_labels],
                                                                 num_labels,
                                                                 "Layer_4")

    '''Model'''
    def model(data):
        # First Convolutional Layer
        with tf.name_scope(layer1):
#             conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
#             hidden = tf.nn.relu(conv + layer1_biases)
            with tf.name_scope('Convolution'):
                conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding = 'SAME')

            with tf.name_scope('conv_plus_b'):
                tmp = conv + layer1_biases

            with tf.name_scope('Activation'):
                hidden = tf.nn.relu(tmp)

        # Second Convolutional Layer
        with tf.name_scope(layer2):
#             #conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
#             #hidden = tf.nn.relu(conv + layer2_biases)
            with tf.name_scope('Convolution'):
                conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding = 'SAME')

            with tf.name_scope('conv_plus_b'):
                tmp = conv + layer2_biases

            with tf.name_scope('Activation'):
                hidden = tf.nn.relu(tmp)

        # Full Connected Layer
        #shape = hidden.get_shape().as_list()
        with tf.name_scope(layer3):
#             shape = tf.shape(hidden)
#             reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
#             hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
            shape = tf.shape(hidden)
            with tf.name_scope('reshape'):
                reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])

            with tf.name_scope('Wx_plus_b'):
                logit = tf.matmul(reshape, layer3_weights) + layer3_biases

            with tf.name_scope('Activation'):
                hidden = tf.nn.relu(logit)

        # Readout Layer: Softmax Layer
        with tf.name_scope(layer4):
            with tf.name_scope('Wx_plus_b'):
                output = tf.matmul(hidden, layer4_weights) + layer4_biases

        return output

#     '''Training computation'''
#     logits = model(tf_train_dataset)
#     loss = tf.reduce_mean(
#     tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits))
    
#     #Add to Graph
#     tf.summary.scalar("Loss", loss)

#     '''Optimizer'''
#     # Learning rate of 0.05
#     optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

#     # Predictions for the training, validation, and test data.
#     train_prediction = tf.nn.softmax(logits)
#     valid_prediction = tf.nn.softmax(model(tf_valid_dataset, 'Validation'))
#     test_prediction = tf.nn.softmax(model(tf_test_dataset, 'Test'))
    
    '''Training computation'''
    logits = model(tf_train_dataset)
    
    with tf.name_scope('Cost_function'):
        with tf.name_scope('cross_entropy'):
            soft = tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits)
        loss = tf.reduce_mean(soft)

    '''Optimizer'''
    with tf.name_scope('Optimizer'):
        # Learning rate of 0.05
        optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    '''Predictions for the training, validation, and test data'''
    with tf.name_scope('Prediction'):
        train_prediction = tf.nn.softmax(logits)
        
    #Add to Graph
    tf.summary.scalar("Loss", loss)

    vlogits = model(tf_valid_dataset)
    with tf.name_scope('Validation'):
        with tf.name_scope('Prediction'):
            valid_prediction = tf.nn.softmax(vlogits)
    
    tlogits = model(tf_test_dataset)
    with tf.name_scope('Test'):
        with tf.name_scope('Prediction'):
            test_prediction = tf.nn.softmax(tlogits)        

In [None]:
num_steps = 30000

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    merged = tf.summary.merge_all()
    write = tf.summary.FileWriter(log_dir + "/convolutional", session.graph)
    print('Initialized')
    t = time.process_time()
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        summary, _, l, predictions = session.run([merged, optimizer, loss, train_prediction], feed_dict=feed_dict)
        write.add_summary(summary, step)
        if (step % 5000 == 0):
            print('===========================================')
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.2f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.2f%%' % accuracy(valid_prediction.eval({tf_valid_dataset: valid_dataset}), valid_labels))

    write.close()
    elapsed_time = time.process_time() - t
    print('=====================Finished!======================')
    print('Running time:  %.2fs' % elapsed_time)
    print('Test accuracy: %.2f%%' % accuracy(test_prediction.eval({tf_test_dataset: test_dataset}), test_labels))

---
Problem 1
---------

The convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides by a max pooling operation (`nn.max_pool()`) of stride 2 and kernel size 2.

---

Formats
* tf.nn.conv2d(input, filter, strides, padding)
* tf.nn.max_pool(value, ksize, strides, padding)

Part 1: Load Data & Build Computation Graph


In [None]:
batch_size = 16
# Depth is the number of output channels 
# On the other hand, num_channels is the number of input channels set at 1 previously
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():

    '''Input data'''
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)

    '''Variables'''
    # Convolution 1 Layer
    # Input channels: num_channels = 1
    # Output channels: depth = 16
    layer1_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1))
    layer1_biases = tf.Variable(tf.zeros([depth]))
    
    # Convolution 2 Layer
    # Input channels: depth = 16
    # Output channels: depth = 16
    layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
    layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
    
    # Fully Connected Layer (Densely Connected Layer)
    # Use neurons to allow processing of entire image
    layer3_weights = tf.Variable(tf.truncated_normal([final_image_size * final_image_size * depth, num_hidden], stddev=0.1))
    layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
    
    # Readout layer: Softmax Layer
    layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
    layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

    '''Model'''
    def model(data):
        # First Convolutional Layer with Pooling
        conv_1 = tf.nn.conv2d(data, layer1_weights, strides=[1, 1, 1, 1], padding='SAME')
        hidden_1 = tf.nn.relu(conv_1 + layer1_biases)
        pool_1 = tf.nn.max_pool(hidden_1, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
        
        # Second Convolutional Layer with Pooling
        conv_2 = tf.nn.conv2d(pool_1, layer2_weights, strides=[1, 1, 1, 1], padding='SAME')
        hidden_2 = tf.nn.relu(conv_2 + layer2_biases)
        pool_2 = tf.nn.max_pool(hidden_2, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
        
        # Full Connected Layer
        shape = pool_2.get_shape().as_list()
        reshape = tf.reshape(pool_2, [shape[0], shape[1] * shape[2] * shape[3]])
        hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
        
        # Readout Layer: Softmax Layer
        return tf.matmul(hidden, layer4_weights) + layer4_biases

    '''Training computation'''
    logits = model(tf_train_dataset)
    # Original loss function
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits))

    '''Optimizer'''
    # Learning rate of 0.05
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    '''Predictions for the training, validation, and test data'''
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    test_prediction = tf.nn.softmax(model(tf_test_dataset))

Part 2: Run Computation & Iterate

In [None]:
num_steps = 30000

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    t = time.process_time()
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 5000 == 0):
            print('===========================================')
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels))
    
    elapsed_time = time.process_time() - t
    print('=====================Finished!======================')        
    print('Running time:  %.2fs' % elapsed_time)
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

In [None]:
batch_size = 16
# Depth is the number of output channels 
# On the other hand, num_channels is the number of input channels set at 1 previously
depth = 16
num_hidden = 64

graph = tf.Graph()
with graph.as_default():
    # Input data.
    with tf.name_scope('Input'):
        tf_train_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
        tf_train_labels = tf.placeholder(tf.float32, shape=(None, num_labels), name = 'y-input')
        with tf.name_scope('Validation'):
            #tf_valid_dataset = tf.constant(valid_dataset, name = 'x-input')
            tf_valid_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
            
        with tf.name_scope('Test'):
            tf_test_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
            #tf_test_dataset = tf.constant(test_dataset, name = 'x-input')

    # Variables.
#     with tf.name_scope('Train'):
    with tf.name_scope('Model'):
        #====================================
        # Convolution 1 Layer
        #====================================
        # Input channels: num_channels = 1
        # Output channels: depth = 16
        #layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1))
        #layer1_biases = tf.Variable(tf.zeros([depth]))
        layer1, layer1_weights, layer1_biases = create_layer([patch_size, patch_size, num_channels, depth],
                                                             depth,
                                                             "Layer_1")

        #====================================
        # Convolution 2 Layer
        #====================================
        # Input channels: depth = 16
        # Output channels: depth = 16
        #layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
        #layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
        layer2, layer2_weights, layer2_biases = create_layer([patch_size, patch_size, depth, depth],
                                                             depth,
                                                             "Layer_2")

        #================================================
        # Fully Connected Layer (Densely Connected Layer)
        #================================================
        # Use neurons to allow processing of entire image
        final_image_size = output_size_no_pool(image_size, patch_size, padding = 'same', conv_stride = 2)
        #layer3_weights = tf.Variable(tf.truncated_normal([final_image_size * final_image_size * depth, num_hidden], stddev=0.1))
        #layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
        layer3, layer3_weights, layer3_biases = create_layer([final_image_size * final_image_size * depth, num_hidden],
                                                             num_hidden,
                                                             "Layer_3")

        #layer3_weights = tf.Variable(tf.truncated_normal(
        #    [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
        #layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))

        #================================================
        # Readout layer: Softmax Layer
        #================================================
        #layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
        #layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
        layer4, layer4_weights, layer4_biases = create_layer([num_hidden, num_labels],
                                                             num_labels,
                                                             "Layer_4")
        
    #Convolution = tf.get_variable("Convolution", [])
    #convScope = tf.VariableScope(reuse = tf.AUTO_REUSE, name = 'Layer_1')
#     def foo(name):
#         with tf.variable_scope(name, reuse=tf.AUTO_REUSE) as vs:
#             pass
#         return vs.original_name_scope
    
#     with tf.variable_scope(layer1, reuse = True):
#         convScope = foo("Convolution")
    
    # First Convolutional Layer with Pooling
    #with tf.variable_scope(layer1, reuse = True):
    #    with tf.name_scope('Convolution') as convScope:
    #        pass
    
#     print(type(convScope))
    
    '''Model'''
    def model(data):
        # First Convolutional Layer with Pooling
        with tf.name_scope(layer1 + 'Convolution/'):
            conv_1 = tf.nn.conv2d(data, layer1_weights, strides=[1, 1, 1, 1], padding='SAME')

        with tf.name_scope(layer1 + 'Activation/'):
            hidden_1 = tf.nn.relu(conv_1 + layer1_biases)

        with tf.name_scope(layer1 + 'Max_Pooling/'):
            pool_1 = tf.nn.max_pool(hidden_1, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

        # Second Convolutional Layer with Pooling
        with tf.name_scope(layer2 + 'Convolution/'):
            conv_2 = tf.nn.conv2d(pool_1, layer2_weights, strides=[1, 1, 1, 1], padding='SAME')
            
        with tf.name_scope(layer2 + 'Activation/'):
            hidden_2 = tf.nn.relu(conv_2 + layer2_biases)
        
        with tf.name_scope(layer2 + 'Max_Pooling/'):
            pool_2 = tf.nn.max_pool(hidden_2, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

        # Fully Connected Layer
        with tf.name_scope(layer3 + 'reshape/'):
            shape = tf.shape(pool_2)
            reshape = tf.reshape(pool_2, [shape[0], shape[1] * shape[2] * shape[3]])
        
        with tf.name_scope(layer3 + 'Wx_plus_b/'):
            logit = tf.matmul(reshape, layer3_weights) + layer3_biases

        with tf.name_scope(layer3 + 'Activation/'):
            hidden = tf.nn.relu(logit)

        # Readout Layer: Softmax Layer
        with tf.name_scope(layer4 + 'Wx_plus_b/'):
            output = tf.matmul(hidden, layer4_weights) + layer4_biases
        
        return output

    '''Training computation'''
    logits = model(tf_train_dataset)
    
    with tf.name_scope('Cost_function'):
        with tf.name_scope('cross_entropy'):
            soft = tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits)
        loss = tf.reduce_mean(soft)

    '''Optimizer'''
    with tf.name_scope('Optimizer'):
        # Learning rate of 0.05
        optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    '''Predictions for the training, validation, and test data'''
    with tf.name_scope('Prediction'):
        train_prediction = tf.nn.softmax(logits)
        
    #Add to Graph
    tf.summary.scalar("Loss", loss)

    vlogits = model(tf_valid_dataset)
    with tf.name_scope('Validation'):
        with tf.name_scope('Prediction'):
            valid_prediction = tf.nn.softmax(vlogits)
    
    tlogits = model(tf_test_dataset)
    with tf.name_scope('Test'):
        with tf.name_scope('Prediction'):
            test_prediction = tf.nn.softmax(tlogits)  

In [None]:
num_steps = 30000

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    merged = tf.summary.merge_all()
    write = tf.summary.FileWriter(log_dir + "/problem1", session.graph)
    print('Initialized')
    t = time.process_time()
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        summary, _, l, predictions = session.run([merged, optimizer, loss, train_prediction], feed_dict=feed_dict)
        write.add_summary(summary, step)
        if (step % 5000 == 0):
            print('===========================================')
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.2f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.2f%%' % accuracy(valid_prediction.eval({tf_valid_dataset: valid_dataset}), valid_labels))

    write.close()
    elapsed_time = time.process_time() - t
    print('=====================Finished!======================')
    print('Running time:  %.2fs' % elapsed_time)
    print('Test accuracy: %.2f%%' % accuracy(test_prediction.eval({tf_test_dataset: test_dataset}), test_labels))

In [None]:
batch_size = 16
# Depth is the number of output channels 
# On the other hand, num_channels is the number of input channels set at 1 previously
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():
    # Input data.
    with tf.name_scope('Input'):
        tf_train_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
        tf_train_labels = tf.placeholder(tf.float32, shape=(None, num_labels), name = 'y-input')
        with tf.name_scope('Validation'):
            #tf_valid_dataset = tf.constant(valid_dataset, name = 'x-input')
            tf_valid_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
            
        with tf.name_scope('Test'):
            tf_test_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
            #tf_test_dataset = tf.constant(test_dataset, name = 'x-input')
            
    '''Model'''
    def model(data):
        with tf.variable_scope('Model', reuse = tf.AUTO_REUSE) as scope:
            #====================================
            # Convolution 1 Layer
            #====================================
            # Input channels: num_channels = 1
            # Output channels: depth = 16
            #layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch|_size, num_channels, depth], stddev=0.1))
            #layer1_biases = tf.Variable(tf.zeros([depth]))
            # First Convolutional Layer
            #conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
            #hidden = tf.nn.relu(conv + layer1_biases)
            hidden, layer1_weights, layer1_biases = conv_layer_with_Pooling(data,
                                                                               patch_size,
                                                                               num_channels,
                                                                               depth,
                                                                               "Layer1",
                                                                               padMethod = 'SAME')

            #====================================
            # Convolution 2 Layer
            #====================================
            # Input channels: depth = 16
            # Output channels: depth = 16
            #layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
            #layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
            # Second Convolutional Layer
            #conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
            #hidden = tf.nn.relu(conv + layer2_biases)
            hidden, layer2_weights, layer2_biases = conv_layer_with_Pooling(hidden,
                                                                               patch_size,
                                                                               depth,
                                                                               depth,
                                                                               "Layer2",
                                                                               padMethod='SAME')

            #================================================
            # Fully Connected Layer (Densely Connected Layer)
            #================================================
            # Use neurons to allow processing of entire image
            final_image_size = output_size_no_pool(image_size, patch_size, padding = 'same', conv_stride = 2)
            #layer3_weights = tf.Variable(tf.truncated_normal([final_image_size * final_image_size * depth, num_hidden], stddev=0.1))
            #layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
            # Full Connected Layer
            #shape = hidden.get_shape().as_list()
            #reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
            #hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
            hidden, layer3_weights, layer3_biases = fully_layer(hidden,
                                                                final_image_size * final_image_size * depth,
                                                                num_hidden,
                                                                "Layer3")

            #layer3_weights = tf.Variable(tf.truncated_normal(
            #    [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
            #layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))

            #================================================
            # Readout layer: Softmax Layer
            #================================================
            #layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
            #layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
            # Readout Layer: Softmax Layer
            #tf.matmul(hidden, layer4_weights) + layer4_biases
            output, layer4_weights, layer4_biases = layer(hidden, num_hidden, num_labels, "Layer4")
        
        return output
  
    '''Training computation'''
    logits = model(tf_train_dataset)
    
    with tf.name_scope('Cost_function'):
        with tf.name_scope('cross_entropy'):
            soft = tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits)
        loss = tf.reduce_mean(soft)

    #Add to Graph
    tf.summary.scalar("Loss", loss)
        
    '''Optimizer'''
    with tf.name_scope('Optimizer'):
        # Learning rate of 0.05
        optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    '''Predictions for the training, validation, and test data'''
    with tf.name_scope('Prediction'):
        train_prediction = tf.nn.softmax(logits)

    vlogits = model(tf_valid_dataset)
    with tf.name_scope('Validation'):
        with tf.name_scope('Prediction'):
            valid_prediction = tf.nn.softmax(vlogits)
    
    tlogits = model(tf_test_dataset)
    with tf.name_scope('Test'):
        with tf.name_scope('Prediction'):
            test_prediction = tf.nn.softmax(tlogits)  


In [None]:
num_steps = 30000

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    merged = tf.summary.merge_all()
    write = tf.summary.FileWriter(log_dir + "/problem1_2", session.graph)
    print('Initialized')
    t = time.process_time()
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        summary, _, l, predictions = session.run([merged, optimizer, loss, train_prediction], feed_dict=feed_dict)
        write.add_summary(summary, step)
        if (step % 5000 == 0):
            print('===========================================')
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.2f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.2f%%' % accuracy(valid_prediction.eval({tf_valid_dataset: valid_dataset}), valid_labels))

    write.close()
    elapsed_time = time.process_time() - t
    print('=====================Finished!======================')
    print('Running time:  %.2fs' % elapsed_time)
    print('Test accuracy: %.2f%%' % accuracy(test_prediction.eval({tf_test_dataset: test_dataset}), test_labels))

---
Problem 2
---------

Try to get the best performance you can using a convolutional net. Look for example at the classic [LeNet5](http://yann.lecun.com/exdb/lenet/) architecture, adding Dropout, and/or adding learning rate decay.

---

Details

* image_size = 28
* Convolutions
    * conv_filter_size = 5
    * conv_stride = 1
* Average Pooling
    * pool_filter_size = 2
    * pool_stride = 2
* padding='valid'
* Prevent overfitting
    * Learning rate decay
    * Regularization
    * Dropout
* Layers
    * Convolution
    * Pooling
    * Convolution
    * Pooling
    * Fully-connected
    * Fully-connected
    * Readout

New function for Image Size: Pooling


In [7]:
image_size = 28
# Create image size function based on input, filter size, padding and stride
# 2 convolutions only with 2 pooling
def output_size_pool(input_size, conv_filter_size, pool_filter_size, padding, conv_stride, pool_stride):
    if padding == 'same':
        padding = -1.00
    elif padding == 'valid':
        padding = 0.00
    else:
        return None
    # After convolution 1
    output_1 = (((input_size - conv_filter_size - 2*padding) / conv_stride) + 1.00)
    # After pool 1
    output_2 = (((output_1 - pool_filter_size - 2*padding) / pool_stride) + 1.00)    
    # After convolution 2
    output_3 = (((output_2 - conv_filter_size - 2*padding) / conv_stride) + 1.00)
    # After pool 2
    output_4 = (((output_3 - pool_filter_size - 2*padding) / pool_stride) + 1.00)  
    return int(output_4)

final_image_size = output_size_pool(input_size=image_size, conv_filter_size=5, pool_filter_size=2, padding='valid', conv_stride=1, pool_stride=2)

Part 1: Load Data and Build Computational Graph


In [None]:
batch_size = 16
# Depth is the number of output channels 
# On the other hand, num_channels is the number of input channels set at 1 previously
depth = 32
num_hidden = 64
beta = 0.001

graph = tf.Graph()

with graph.as_default():

    '''Input data'''
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)

    '''Variables'''
    # Convolution 1 Layer
    # Input channels: num_channels = 1
    # Output channels: depth = 16
    layer1_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1))
    layer1_biases = tf.Variable(tf.zeros([depth]))
    
    # Convolution 2 Layer
    # Input channels: depth = 16
    # Output channels: depth = 16
    layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
    layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
    
    # First Fully Connected Layer (Densely Connected Layer)
    # Use neurons to allow processing of entire image
    final_image_size = output_size_pool(input_size=image_size, conv_filter_size=5, pool_filter_size=2, padding='valid', conv_stride=1, pool_stride=2)
    layer3_weights = tf.Variable(tf.truncated_normal([final_image_size * final_image_size * depth, num_hidden], stddev=0.1))
    layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
    
    # Second Fully Connected Layer
    layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_hidden], stddev=0.1))
    layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
    
    # Readout layer: Softmax Layer
    layer5_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
    layer5_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

    '''Model'''
    def model(data):
        # First Convolutional Layer with Pooling
        conv_1 = tf.nn.conv2d(data, layer1_weights, strides=[1, 1, 1, 1], padding='VALID')
        hidden_1 = tf.nn.relu(conv_1 + layer1_biases)
        pool_1 = tf.nn.avg_pool(hidden_1, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
        
        # Second Convolutional Layer with Pooling
        conv_2 = tf.nn.conv2d(pool_1, layer2_weights, strides=[1, 1, 1, 1], padding='VALID')
        hidden_2 = tf.nn.relu(conv_2 + layer2_biases)
        pool_2 = tf.nn.avg_pool(hidden_2, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
        
        # First Fully Connected Layer
        shape = pool_2.get_shape().as_list()
        reshape = tf.reshape(pool_2, [shape[0], shape[1] * shape[2] * shape[3]])
        hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
        keep_prob = 0.5
        hidden_drop = tf.nn.dropout(hidden, keep_prob)
        
        # Second Fully Connected Layer
        hidden_2 = tf.nn.relu(tf.matmul(hidden_drop, layer4_weights) + layer4_biases)
        hidden_2_drop = tf.nn.dropout(hidden_2, keep_prob)
        
        # Readout Layer: Softmax Layer
        return tf.matmul(hidden_2_drop, layer5_weights) + layer5_biases

    '''Training computation'''
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = tf_train_labels, logits=logits))
    # Loss function with L2 Regularization 
    # regularizers = tf.nn.l2_loss(layer4_weights) + \
                   # tf.nn.l2_loss(layer5_weights)
    # loss = tf.reduce_mean(loss + beta * regularizers)

    '''Optimizer'''
    # Decaying learning rate
    global_step = tf.Variable(0)  # count the number of steps taken.
    start_learning_rate = 0.05
    learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, 100000, 0.96, staircase=True)

    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    '''Predictions for the training, validation, and test data'''
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    test_prediction = tf.nn.softmax(model(tf_test_dataset))

Part 2: Run Computation & Iterate

In [None]:
num_steps = 30000

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    t = time.process_time()
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 5000 == 0):
            print('===========================================')
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels))
    elapsed_time = time.process_time() - t
    print('=====================Finished!======================')        
    print('Running time:  %.2fs' % elapsed_time)
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

In [10]:
batch_size = 16
# Depth is the number of output channels 
# On the other hand, num_channels is the number of input channels set at 1 previously
depth = 32
num_hidden = 64
beta = 0.001

graph = tf.Graph()
with graph.as_default():
    # Input data.
    with tf.name_scope('Input'):
        tf_train_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
        tf_train_labels = tf.placeholder(tf.float32, shape=(None, num_labels), name = 'y-input')
        with tf.name_scope('Validation'):
            #tf_valid_dataset = tf.constant(valid_dataset, name = 'x-input')
            tf_valid_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
            
        with tf.name_scope('Test'):
            tf_test_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels), name = 'x-input')
            #tf_test_dataset = tf.constant(test_dataset, name = 'x-input')
            
    '''Model'''
    def model(data):
        with tf.variable_scope('Model', reuse = tf.AUTO_REUSE) as scope:
            #====================================
            # First Convolutional Layer with Pooling
            #====================================
            hidden, layer1_weights, layer1_biases = conv_layer_with_Pooling(data,
                                                                            patch_size,
                                                                            num_channels,
                                                                            depth,
                                                                            "Layer1",
                                                                            pool = 'avg',
                                                                            padMethod = 'VALID')

            #====================================
            # Second Convolutional Layer with Pooling
            #====================================
            hidden, layer2_weights, layer2_biases = conv_layer_with_Pooling(hidden,
                                                                               patch_size,
                                                                               depth,
                                                                               depth,
                                                                               "Layer2",
                                                                               pool = 'avg',
                                                                               padMethod = 'VALID')

            #================================================
            # First Fully Connected Layer
            #================================================
            final_image_size = output_size_pool(input_size = image_size,
                                                conv_filter_size = 5,
                                                pool_filter_size = 2,
                                                padding='valid',
                                                conv_stride = 1,
                                                pool_stride = 2)

            hidden, layer3_weights, layer3_biases = fully_layer(hidden,
                                                                final_image_size * final_image_size * depth,
                                                                num_hidden,
                                                                "Layer3",
                                                                keep_prob = 0.5)

            
            #================================================
            # Second Fully Connected Layer
            #================================================
            hidden, layer4_weights, layer4_biases = fully_layer(hidden,
                                                                num_hidden,
                                                                num_hidden,
                                                                "Layer4",
                                                                keep_prob = 0.5)

            #================================================
            # Readout layer: Softmax Layer
            #================================================
            output, layer4_weights, layer4_biases = layer(hidden, num_hidden, num_labels, "Layer5")
        
        return output
  
    '''Training computation'''
    logits = model(tf_train_dataset)
    
    with tf.name_scope('Cost_function'):
        with tf.name_scope('cross_entropy'):
            soft = tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_train_labels, logits=logits)
        loss = tf.reduce_mean(soft)

    #Add to Graph
    tf.summary.scalar("Loss", loss)
        
    '''Optimizer'''
    with tf.name_scope('Optimizer'):
        # Decaying learning rate
        global_step = tf.Variable(0, name = 'Step')  # count the number of steps taken.
        start_learning_rate = 0.05
        learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, 100000, 0.96, staircase=True)

        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
        
    '''Predictions for the training, validation, and test data'''
    with tf.name_scope('Prediction'):
        train_prediction = tf.nn.softmax(logits)
        
    vlogits = model(tf_valid_dataset)
    with tf.name_scope('Validation'):
        with tf.name_scope('Prediction'):
            valid_prediction = tf.nn.softmax(vlogits)
    
    tlogits = model(tf_test_dataset)
    with tf.name_scope('Test'):
        with tf.name_scope('Prediction'):
            test_prediction = tf.nn.softmax(tlogits)  


In [11]:
num_steps = 30000

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    merged = tf.summary.merge_all()
    write = tf.summary.FileWriter(log_dir + "/problem2", session.graph)
    print('Initialized')
    t = time.process_time()
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        summary, _, l, predictions = session.run([merged, optimizer, loss, train_prediction], feed_dict=feed_dict)
        write.add_summary(summary, step)
        if (step % 5000 == 0):
            print('===========================================')
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.2f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.2f%%' % accuracy(valid_prediction.eval({tf_valid_dataset: valid_dataset}), valid_labels))

    write.close()
    elapsed_time = time.process_time() - t
    print('=====================Finished!======================')
    print('Running time:  %.2fs' % elapsed_time)
    print('Test accuracy: %.2f%%' % accuracy(test_prediction.eval({tf_test_dataset: test_dataset}), test_labels))

Initialized
Minibatch loss at step 0: 2.394534
Minibatch accuracy: 18.75%
Validation accuracy: 9.98%
Minibatch loss at step 5000: 0.379734
Minibatch accuracy: 87.50%
Validation accuracy: 81.56%
Minibatch loss at step 10000: 0.879654
Minibatch accuracy: 68.75%
Validation accuracy: 83.73%
Minibatch loss at step 15000: 0.541010
Minibatch accuracy: 81.25%
Validation accuracy: 84.58%
Minibatch loss at step 20000: 0.407756
Minibatch accuracy: 87.50%
Validation accuracy: 85.35%
Minibatch loss at step 25000: 0.623925
Minibatch accuracy: 81.25%
Validation accuracy: 85.41%
Running time:  633.92s
Test accuracy: 92.54%
