In [3]:
from __future__ import print_function
import numpy as np
from six.moves import cPickle as pickle
import tensorflow as tf
from tensorflow.python.client import timeline

import time
import matplotlib.pyplot as plt
%matplotlib inline

print("import done")

import done


#### Load pickled dataset into memory

In [11]:
pickle_file = './udacity/notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


#### Reformat data for training

In [12]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset[:300], valid_labels[:300])
#test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
#print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (300, 28, 28, 1) (300, 10)


In [13]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [43]:
#How many files are supplied per batch.
batch_size=16
#How many filters to learn for the residual.
residual_channels=16
#Hidden layer size for fully connected layer
hidden_size=64
#number of steps after which learning rate is decayed
decay_steps=200

filter_width=5

slim = tf.contrib.slim

#Construct computation graph
graph = tf.Graph()

with graph.as_default():
    
    # Input data
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
                
    def network(batch_data, reuse=False):
        with tf.variable_scope('eegnet', reuse=reuse):
            with slim.arg_scope([slim.conv2d, slim.fully_connected], 
                                activation_fn=tf.nn.elu, 
                                weights_initializer=tf.contrib.layers.xavier_initializer()):
                hidden = slim.conv2d(batch_data, residual_channels, filter_width, stride=1, rate=1)
                hidden = slim.conv2d(hidden, residual_channels, filter_width, stride=1, rate=2)
                hidden = slim.conv2d(hidden, residual_channels, filter_width, stride=1, rate=4)
                # Residual connection
                hidden = tf.add(hidden, batch_data)
                # Avg pool to reduce dimension before FC layers
                hidden = slim.avg_pool2d(hidden, 8, 8)
                shape = hidden.get_shape().as_list()
                hidden = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
                hidden = slim.fully_connected(hidden, hidden_size)
                return slim.fully_connected(hidden, num_labels)

    with tf.name_scope('eegnet'):
        logits = network(tf_train_dataset)
        loss = slim.losses.softmax_cross_entropy(logits, tf_train_labels)
        tf.scalar_summary('loss', loss)
        global_step = tf.Variable(0)
        learning_rate = tf.train.exponential_decay(0.05, global_step, decay_steps, 0.96, staircase=True)
        #optimizer = tf.train.RMSPropOptimizer(learning_rate, 0.9).minimize(loss, global_step=global_step)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
        tf.scalar_summary('learning_rate', learning_rate)
        train_predictions = tf.nn.softmax(logits)
        valid_predictions = tf.nn.softmax(network(tf_valid_dataset, True))

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)
    
    #Merge all summaries and write to a folder
    merged = tf.merge_all_summaries()
    results_writer = tf.train.SummaryWriter('./results', graph)
    
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()
    
    #tracing for timeline
    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()    
    
print('computational graph created')

computational graph created


In [44]:
num_steps = 1001

trace_file = open('./tracing/timeline.json', 'w')
save_path = './checkpoints/model.ckpt'

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    tf.initialize_local_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_dataset.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
        _, l, lrate, predictions, summary = session.run(
            [optimizer, loss, learning_rate, train_predictions, merged], 
            feed_dict=feed_dict, 
            options=run_options,
            run_metadata=run_metadata)
        results_writer.add_summary(summary, step)
        if (step % 50 == 0):
            print('Minibatch loss at step %d: %f' % (step, l), 'Learning rate:', lrate)
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(valid_predictions.eval(), valid_labels))
            print('Predictions | Labels:\n', np.concatenate((predictions[:2,:2], batch_labels[:2,:2]), axis=1))
            
    # Save tracing into disl
    trace = timeline.Timeline(step_stats=run_metadata.step_stats)
    trace_file.write(trace.generate_chrome_trace_format(show_memory=True))
            
    # Save the variables to disk.
    saver.save(session, save_path)
    print("Model saved in file: %s" % save_path)
            
    results_writer.flush()
    results_writer.close()

    print('Finished training')

Initialized
Minibatch loss at step 0: 2.406898 Learning rate: 0.05
Minibatch accuracy: 12.5%
Validation accuracy: 6.7%
Predictions | Labels:
 [[ 0.08819073  0.21308757  0.          0.        ]
 [ 0.06302567  0.30972046  0.          0.        ]]
Minibatch loss at step 50: 1.201549 Learning rate: 0.05
Minibatch accuracy: 56.2%
Validation accuracy: 50.0%
Predictions | Labels:
 [[ 0.01959432  0.04450238  0.          0.        ]
 [ 0.32109222  0.07031856  1.          0.        ]]
Minibatch loss at step 100: 0.911138 Learning rate: 0.05
Minibatch accuracy: 68.8%
Validation accuracy: 69.3%
Predictions | Labels:
 [[ 0.02025793  0.4340997   0.          1.        ]
 [ 0.01021392  0.00477806  0.          0.        ]]
Minibatch loss at step 150: 1.012183 Learning rate: 0.05
Minibatch accuracy: 62.5%
Validation accuracy: 74.0%
Predictions | Labels:
 [[ 0.16096431  0.03936802  0.          0.        ]
 [ 0.08489948  0.014693    0.          0.        ]]
Minibatch loss at step 200: 0.487762 Learning ra

KeyboardInterrupt: 

## Evaluate Model

In [None]:
with tf.Session(graph=graph) as session:
    saver.restore(session, save_path)
    print('Model Loaded')
    for step in np.arange(data_split.shape[0]):
        offset = batch_size*step
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        predictions = train_prediction.eval(feed_dict={tf_train_dataset: batch_data})
        print('accuracy:', accuracy(predictions, batch_labels))
        print('Predictions:', predictions, 'Labels:', batch_labels)   
        
    print('Finished validation')

In [None]:
valid_batch_size = 1

def accuracy_notpercent(predictions, labels):
  return np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))

with tf.Session(graph=graph) as session:
    saver.restore(session, save_path)
    print('Model Loaded')
    data_split = np.array_split(valid_dataset, valid_dataset.shape[0]//valid_batch_size, axis=0)
    labels_split = np.array_split(valid_labels, valid_labels.shape[0]//valid_batch_size, axis=0)
    correct_predictions = 0
    for idx, batch_data in enumerate(data_split):
        correct_predictions += accuracy_notpercent(
            train_prediction.eval(feed_dict={tf_train_dataset: batch_data}), 
            labels_split[idx])
        print('accuracy:', (100.0*correct_predictions)/((idx+1)*valid_batch_size))
        
        
    print('Finished validation')