Deep Learning
=============

Assignment 4
------------

Previously in `2_fullyconnected.ipynb` and `3_regularization.ipynb`, we trained fully connected networks to classify [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) characters.

The goal of this assignment is make the neural network convolutional.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [3]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [4]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

In [5]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data):
    conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer1_biases)
    conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer2_biases)
    shape = hidden.get_shape().as_list()
    reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [6]:
num_steps = 301

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 3.409376
Minibatch accuracy: 12.5%
Validation accuracy: 10.0%
Minibatch loss at step 50: 1.410510
Minibatch accuracy: 56.2%
Validation accuracy: 51.5%
Minibatch loss at step 100: 1.145692
Minibatch accuracy: 68.8%
Validation accuracy: 67.2%
Minibatch loss at step 150: 0.759528
Minibatch accuracy: 68.8%
Validation accuracy: 73.0%
Minibatch loss at step 200: 0.681328
Minibatch accuracy: 87.5%
Validation accuracy: 79.1%
Minibatch loss at step 250: 1.268279
Minibatch accuracy: 62.5%
Validation accuracy: 77.0%
Minibatch loss at step 300: 0.325314
Minibatch accuracy: 93.8%
Validation accuracy: 80.6%
Test accuracy: 86.8%


---
Problem 1
---------

The convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides by a max pooling operation (`nn.max_pool()`) of stride 2 and kernel size 2.

---

In [7]:
pickle_file = 'notMNIST.pickle'

#resize the validation and training set to fit in the computer
with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


In [8]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

def accuracy(predictions, labels):
  #print(predictions)
  #print(labels)
  print(predictions.shape)
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


scipt version

In [14]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64


graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
  
    # Variables.
    def conv2d_s1(x,W):
        return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
    
    def max_pool_2x2(x):
        return tf.nn.max_pool(x,ksize=[1,2,2,1],
                             strides=[1,2,2,1],padding='SAME')
    
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)
    
    def bias_variable(shape):
        initial = tf.constant(1.0, shape=shape)
        return tf.Variable(initial)

    W_conv1 = weight_variable([5,5,1,16])
    b_conv1 = bias_variable([16])

    W_conv2 = weight_variable([5,5,16,16])
    b_conv2 = bias_variable([16])

    W_fc1 = weight_variable([7*7*16,64])
    b_fc1 = bias_variable([64])

    W_fc2 = weight_variable([64,10])
    b_fc2 = bias_variable([10])
    
    
    #Model
    def model(data):
        h_conv1 = tf.nn.relu(conv2d_s1(data,W_conv1)+b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)

        h_conv2 = tf.nn.relu(conv2d_s1(h_pool1,W_conv2)+b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)
        
        h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*16])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1)

        return tf.matmul(h_fc1,W_fc2)+b_fc2

  
    # Training computation.
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
    # Optimizer.
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [15]:
#16 min with cpu. 1min with gpu
num_steps = 20000

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(
                valid_prediction.eval(), valid_labels))
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 3.951977
Minibatch accuracy: 12.5%
Validation accuracy: 10.0%
Minibatch loss at step 50: 2.125788
Minibatch accuracy: 25.0%
Validation accuracy: 31.6%
Minibatch loss at step 100: 1.758945
Minibatch accuracy: 43.8%
Validation accuracy: 54.8%
Minibatch loss at step 150: 1.466299
Minibatch accuracy: 50.0%
Validation accuracy: 55.7%
Minibatch loss at step 200: 0.872890
Minibatch accuracy: 81.2%
Validation accuracy: 72.7%
Minibatch loss at step 250: 1.211735
Minibatch accuracy: 68.8%
Validation accuracy: 74.2%
Minibatch loss at step 300: 0.282206
Minibatch accuracy: 87.5%
Validation accuracy: 78.2%
Minibatch loss at step 350: 0.666730
Minibatch accuracy: 81.2%
Validation accuracy: 80.1%
Minibatch loss at step 400: 0.348083
Minibatch accuracy: 87.5%
Validation accuracy: 78.9%
Minibatch loss at step 450: 0.594155
Minibatch accuracy: 87.5%
Validation accuracy: 79.0%
Minibatch loss at step 500: 0.547581
Minibatch accuracy: 81.2%
Validation accuracy: 79.4%
M

---
Problem 2
---------

Try to get the best performance you can using a convolutional net. Look for example at the classic [LeNet5](http://yann.lecun.com/exdb/lenet/) architecture, adding Dropout, and/or adding learning rate decay.

---

In [16]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
dropout = 0.5


graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    #tf_valid_dataset = tf.constant(valid_dataset)
    #tf_test_dataset = tf.constant(test_dataset)

    # Variables.
    def conv2d_s1(x,W):
        return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

    def max_pool_2x2(x):
        return tf.nn.max_pool(x,ksize=[1,2,2,1],
                             strides=[1,2,2,1],padding='SAME')

    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.constant(1.0, shape=shape)
        return tf.Variable(initial)
    

    W_conv1 = weight_variable([5,5,1,16])
    b_conv1 = bias_variable([16])

    W_conv2 = weight_variable([5,5,16,32])
    b_conv2 = bias_variable([32])

    W_fc1 = weight_variable([7*7*32,1028])
    b_fc1 = bias_variable([1028])

    W_fc2 = weight_variable([1028,10])
    b_fc2 = bias_variable([10])


    #Model
    def model(data,train=True):
        h_conv1 = tf.nn.relu(conv2d_s1(data,W_conv1)+b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)

        h_conv2 = tf.nn.relu(conv2d_s1(h_pool1,W_conv2)+b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)

        h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*32])
        if train:
            h_fc1 =tf.nn.dropout(tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1),dropout)
        else:
            h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1)

        return tf.matmul(h_fc1,W_fc2)+b_fc2


    # Training computation.
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

    # Optimizer.
    #optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
    optimizer = tf.train.AdamOptimizer().minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    #valid_prediction = tf.nn.softmax(model(tf_valid_dataset,train=False))
    #test_prediction = tf.nn.softmax(model(tf_test_dataset,train=False))

In [17]:
num_steps=20000
#num_steps = 10000

with tf.Session(graph=graph,config=tf.ConfigProto(log_device_placement=True)) as session:
#with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            #print('Validation accuracy: %.1f%%' % accuracy(
            #    valid_prediction.eval(), valid_labels))
    #print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 22.592258
Minibatch accuracy: 25.0%
Minibatch loss at step 50: 4.096557
Minibatch accuracy: 18.8%
Minibatch loss at step 100: 3.035065
Minibatch accuracy: 37.5%
Minibatch loss at step 150: 2.507319
Minibatch accuracy: 31.2%
Minibatch loss at step 200: 2.407819
Minibatch accuracy: 62.5%
Minibatch loss at step 250: 0.884773
Minibatch accuracy: 68.8%
Minibatch loss at step 300: 0.276521
Minibatch accuracy: 87.5%
Minibatch loss at step 350: 1.195624
Minibatch accuracy: 75.0%
Minibatch loss at step 400: 0.247992
Minibatch accuracy: 93.8%
Minibatch loss at step 450: 0.612312
Minibatch accuracy: 81.2%
Minibatch loss at step 500: 0.656624
Minibatch accuracy: 75.0%
Minibatch loss at step 550: 0.746145
Minibatch accuracy: 87.5%
Minibatch loss at step 600: 0.262853
Minibatch accuracy: 93.8%
Minibatch loss at step 650: 0.504109
Minibatch accuracy: 87.5%
Minibatch loss at step 700: 1.062117
Minibatch accuracy: 62.5%
Minibatch loss at step 750: 0.535464
Minibatc

Above was 95.2% 

## 3 Conv layers 1FC layer

In [18]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
dropout = 0.5


graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    # Variables.
    def conv2d_s1(x,W):
        return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
    
    def max_pool_2x2(x):
        return tf.nn.max_pool(x,ksize=[1,2,2,1],
                             strides=[1,2,2,1],padding='SAME')
    
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)
    
    def bias_variable(shape):
        initial = tf.constant(1.0, shape=shape)
        return tf.Variable(initial)

    W_conv1 = weight_variable([5,5,1,16])
    b_conv1 = bias_variable([16])

    W_conv2 = weight_variable([5,5,16,32])
    b_conv2 = bias_variable([32])
    
    W_conv3 = weight_variable([3,3,32,64])
    b_conv3 = bias_variable([64])

    W_fc1 = weight_variable([4*4*64,1028])
    b_fc1 = bias_variable([1028])

    W_fc2 = weight_variable([1028,10])
    b_fc2 = bias_variable([10])
    
    
    #Model
    def model(data,train=True):
        h_conv1 = tf.nn.relu(conv2d_s1(data,W_conv1)+b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)

        h_conv2 = tf.nn.relu(conv2d_s1(h_pool1,W_conv2)+b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)
        
        h_conv3 = tf.nn.relu(conv2d_s1(h_pool2,W_conv3)+b_conv3)
        h_pool3 = max_pool_2x2(h_conv3)
        
        h_pool3_flat = tf.reshape(h_pool3,[-1,4*4*64])
        if train:
            h_fc1 =tf.nn.dropout(tf.nn.relu(tf.matmul(h_pool3_flat,W_fc1)+b_fc1),dropout)
        else:
            h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat,W_fc1)+b_fc1)

        return tf.matmul(h_fc1,W_fc2)+b_fc2

  
    # Training computation.
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
    # Optimizer.
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
    #optimizer = tf.train.AdamOptimizer().minimize(loss)
  
    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset,train=False))
    test_prediction = tf.nn.softmax(model(tf_test_dataset,train=False))

In [19]:
num_steps = 20000

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(
                valid_prediction.eval(), valid_labels))
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 50.722755
Minibatch accuracy: 6.2%
Validation accuracy: 12.4%
Minibatch loss at step 50: nan
Minibatch accuracy: 6.2%
Validation accuracy: 10.0%
Minibatch loss at step 100: nan
Minibatch accuracy: 0.0%
Validation accuracy: 10.0%
Minibatch loss at step 150: nan
Minibatch accuracy: 12.5%
Validation accuracy: 10.0%
Minibatch loss at step 200: nan
Minibatch accuracy: 18.8%
Validation accuracy: 10.0%
Minibatch loss at step 250: nan
Minibatch accuracy: 6.2%
Validation accuracy: 10.0%
Minibatch loss at step 300: nan
Minibatch accuracy: 12.5%
Validation accuracy: 10.0%
Minibatch loss at step 350: nan
Minibatch accuracy: 18.8%
Validation accuracy: 10.0%
Minibatch loss at step 400: nan
Minibatch accuracy: 0.0%
Validation accuracy: 10.0%
Minibatch loss at step 450: nan
Minibatch accuracy: 6.2%
Validation accuracy: 10.0%
Minibatch loss at step 500: nan
Minibatch accuracy: 18.8%
Validation accuracy: 10.0%
Minibatch loss at step 550: nan
Minibatch accuracy: 12.5

94.8%

## 3 Conv layers 2 FC layers

In [20]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
dropout = 0.5


graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
  
    # Variables.
    def conv2d_s1(x,W):
        return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
    
    def max_pool_2x2(x):
        return tf.nn.max_pool(x,ksize=[1,2,2,1],
                             strides=[1,2,2,1],padding='SAME')
    
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(initial)
    
    def bias_variable(shape):
        initial = tf.constant(1.0, shape=shape)
        return tf.Variable(initial)

    W_conv1 = weight_variable([5,5,1,16])
    b_conv1 = bias_variable([16])

    W_conv2 = weight_variable([5,5,16,32])
    b_conv2 = bias_variable([32])
    
    W_conv3 = weight_variable([3,3,32,64])
    b_conv3 = bias_variable([64])

    W_fc1 = weight_variable([4*4*64,1028])
    b_fc1 = bias_variable([1028])
    
    W_fc2 = weight_variable([1028,1028])
    b_fc2 = bias_variable([1028])

    W_fc3 = weight_variable([1028,10])
    b_fc3 = bias_variable([10])
    
    
    #Model
    def model(data,train=True):
        h_conv1 = tf.nn.relu(conv2d_s1(data,W_conv1)+b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)

        h_conv2 = tf.nn.relu(conv2d_s1(h_pool1,W_conv2)+b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)
        
        h_conv3 = tf.nn.relu(conv2d_s1(h_pool2,W_conv3)+b_conv3)
        h_pool3 = max_pool_2x2(h_conv3)
        
        h_pool3_flat = tf.reshape(h_pool3,[-1,4*4*64])
        if train:
            h_fc1 =tf.nn.dropout(tf.nn.relu(tf.matmul(h_pool3_flat,W_fc1)+b_fc1),dropout)
            h_fc2 =tf.nn.dropout(tf.nn.relu(tf.matmul(h_fc1,W_fc2)+b_fc2),dropout)
        else:
            h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat,W_fc1)+b_fc1)
            h_fc2 = tf.nn.relu(tf.matmul(h_fc1,W_fc2)+b_fc2)

        return tf.matmul(h_fc2,W_fc3)+b_fc3

  
    # Training computation.
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
    # Optimizer.
    #optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
    optimizer = tf.train.AdamOptimizer().minimize(loss)
  
    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset,train=False))
    test_prediction = tf.nn.softmax(model(tf_test_dataset,train=False))

In [21]:
num_steps = 500000

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(
                valid_prediction.eval(), valid_labels))
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 78.329208
Minibatch accuracy: 25.0%
Validation accuracy: 10.0%
Minibatch loss at step 50: 8.514968
Minibatch accuracy: 31.2%
Validation accuracy: 18.9%
Minibatch loss at step 100: 7.592811
Minibatch accuracy: 25.0%
Validation accuracy: 33.0%
Minibatch loss at step 150: 4.569750
Minibatch accuracy: 31.2%
Validation accuracy: 53.5%
Minibatch loss at step 200: 3.004250
Minibatch accuracy: 31.2%
Validation accuracy: 63.2%
Minibatch loss at step 250: 1.975613
Minibatch accuracy: 50.0%
Validation accuracy: 67.2%
Minibatch loss at step 300: 0.923202
Minibatch accuracy: 68.8%
Validation accuracy: 71.2%
Minibatch loss at step 350: 1.520465
Minibatch accuracy: 62.5%
Validation accuracy: 75.1%
Minibatch loss at step 400: 1.147632
Minibatch accuracy: 50.0%
Validation accuracy: 73.0%
Minibatch loss at step 450: 0.995324
Minibatch accuracy: 68.8%
Validation accuracy: 76.0%
Minibatch loss at step 500: 1.134022
Minibatch accuracy: 75.0%
Validation accuracy: 76.8%


92.5%

100,000 steps: 94.4%

500,000 steps: 91.8%

# GPU

In [17]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
dropout = 0.5


graph = tf.Graph()
with graph.as_default():
        # Defines a function from `Operation` to device string.
        def matmul_on_gpu(n):
            if n.type == "MatMul":
                return "/gpu:0"
            else:
                return "/cpu:0"

        with tf.device(matmul_on_gpu):
            # Input data.
            tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
            tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
            #tf_valid_dataset = tf.constant(valid_dataset)
            #tf_test_dataset = tf.constant(test_dataset)

            # Variables.
            def conv2d_s1(x,W):
                return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

            def max_pool_2x2(x):
                return tf.nn.max_pool(x,ksize=[1,2,2,1],
                                     strides=[1,2,2,1],padding='SAME')

            def weight_variable(shape):
                initial = tf.truncated_normal(shape, stddev=0.1)
                return tf.Variable(initial)

            def bias_variable(shape):
                initial = tf.constant(1.0, shape=shape)
                return tf.Variable(initial)


            W_conv1 = weight_variable([5,5,1,16])
            b_conv1 = bias_variable([16])

            W_conv2 = weight_variable([5,5,16,32])
            b_conv2 = bias_variable([32])

            W_fc1 = weight_variable([7*7*32,1028])
            b_fc1 = bias_variable([1028])

            W_fc2 = weight_variable([1028,10])
            b_fc2 = bias_variable([10])


            #Model
            def model(data,train=True):
                h_conv1 = tf.nn.relu(conv2d_s1(data,W_conv1)+b_conv1)
                h_pool1 = max_pool_2x2(h_conv1)

                h_conv2 = tf.nn.relu(conv2d_s1(h_pool1,W_conv2)+b_conv2)
                h_pool2 = max_pool_2x2(h_conv2)

                h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*32])
                if train:
                    h_fc1 =tf.nn.dropout(tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1),dropout)
                else:
                    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1)

                return tf.matmul(h_fc1,W_fc2)+b_fc2


            # Training computation.
            logits = model(tf_train_dataset)
            loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

            # Optimizer.
            #optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
            optimizer = tf.train.AdamOptimizer().minimize(loss)

            # Predictions for the training, validation, and test data.
            train_prediction = tf.nn.softmax(logits)
            #with tf.device('/cpu:0'):
                #valid_prediction = tf.nn.softmax(model(tf_valid_dataset,train=False))
                #test_prediction = tf.nn.softmax(model(tf_test_dataset,train=False))

In [18]:
#num_steps=20000
num_steps = 10000

with tf.Session(graph=graph,config=tf.ConfigProto(log_device_placement=True)) as session:
#with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            #print('Validation accuracy: %.1f%%' % accuracy(
                #valid_prediction.eval(), valid_labels))
    #print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 26.291773
(16, 10)
Minibatch accuracy: 0.0%
Minibatch loss at step 50: 3.625330
(16, 10)
Minibatch accuracy: 25.0%
Minibatch loss at step 100: 2.337433
(16, 10)
Minibatch accuracy: 37.5%
Minibatch loss at step 150: 1.578582
(16, 10)
Minibatch accuracy: 62.5%
Minibatch loss at step 200: 1.676110
(16, 10)
Minibatch accuracy: 68.8%
Minibatch loss at step 250: 1.319177
(16, 10)
Minibatch accuracy: 43.8%
Minibatch loss at step 300: 0.418931
(16, 10)
Minibatch accuracy: 87.5%
Minibatch loss at step 350: 0.768900
(16, 10)
Minibatch accuracy: 87.5%
Minibatch loss at step 400: 0.464657
(16, 10)
Minibatch accuracy: 75.0%
Minibatch loss at step 450: 0.992480
(16, 10)
Minibatch accuracy: 81.2%
Minibatch loss at step 500: 0.624649
(16, 10)
Minibatch accuracy: 68.8%
Minibatch loss at step 550: 0.420785
(16, 10)
Minibatch accuracy: 75.0%
Minibatch loss at step 600: 0.239297
(16, 10)
Minibatch accuracy: 93.8%
Minibatch loss at step 650: 0.561214
(16, 10)
Minibatch

Kinda works

In [None]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
dropout = 0.5


graph = tf.Graph()
with graph.as_default():
        with tf.device('/gpu:0'):
            tf_train_dataset = tf.placeholder(
            tf.float32, shape=(batch_size, image_size, image_size, num_channels))
            tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        #with tf.device('/cpu:0'):
            #tf_valid_dataset = tf.constant(valid_dataset)
            #tf_test_dataset = tf.constant(test_dataset)
            
        #with tf.device('/gpu:0'):
            # Variables.
            def conv2d_s1(x,W):
                return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

            def max_pool_2x2(x):
                return tf.nn.max_pool(x,ksize=[1,2,2,1],
                                     strides=[1,2,2,1],padding='SAME')

            def weight_variable(shape):
                initial = tf.truncated_normal(shape, stddev=0.1)
                return tf.Variable(initial)

            def bias_variable(shape):
                initial = tf.constant(1.0, shape=shape)
                return tf.Variable(initial)


            W_conv1 = weight_variable([5,5,1,16])
            b_conv1 = bias_variable([16])

            W_conv2 = weight_variable([5,5,16,32])
            b_conv2 = bias_variable([32])

            W_fc1 = weight_variable([7*7*32,128])
            b_fc1 = bias_variable([128])

            W_fc2 = weight_variable([128,10])
            b_fc2 = bias_variable([10])


            #Model
            def model(data,train=True):
                h_conv1 = tf.nn.relu(conv2d_s1(data,W_conv1)+b_conv1)
                h_pool1 = max_pool_2x2(h_conv1)

                h_conv2 = tf.nn.relu(conv2d_s1(h_pool1,W_conv2)+b_conv2)
                h_pool2 = max_pool_2x2(h_conv2)

                h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*32])
                if train:
                    h_fc1 =tf.nn.dropout(tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1),dropout)
                else:
                    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1)

                return tf.matmul(h_fc1,W_fc2)+b_fc2


            # Training computation.
            logits = model(tf_train_dataset)
            loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

            # Optimizer.
            #optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
            optimizer = tf.train.AdamOptimizer().minimize(loss)

            # Predictions for the training, validation, and test data.
            train_prediction = tf.nn.softmax(logits)
        #with tf.device('/cpu:0'):
            #valid_prediction = tf.nn.softmax(model(tf_valid_dataset,train=False))
            #test_prediction = tf.nn.softmax(model(tf_test_dataset,train=False))

In [None]:
num_steps=100
#num_steps = 10000

with tf.Session(graph=graph,config=tf.ConfigProto(log_device_placement=True)) as session:
#with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            #print('Validation accuracy: %.1f%%' % accuracy(
                #valid_prediction.eval(), valid_labels))
    #print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

# Inception Modules

In [5]:
#need to batch the test data because running low on memory
class test_batchs:
    def __init__(self,data):
        self.data = data
        self.batch_index = 0
    def nextBatch(self,batch_size):
        if (batch_size+self.batch_index) > self.data.shape[0]:
            print("batch sized is messed up")
        batch = self.data[self.batch_index:(self.batch_index+batch_size),:,:,:]
        self.batch_index= self.batch_index+batch_size
        return batch

#set the test batchsize
test_batch_size = 100

In [6]:
import os

In [7]:
#use os to get our current working directory so we can save variable
file_path = os.getcwd()+'/model.ckpt'

In [11]:
batch_size = 50
map1 = 32
map2 = 64
num_fc1 = 700 #1028
num_fc2 = 10
reduce1x1 = 16
dropout=0.99

graph = tf.Graph()
with graph.as_default():
    #train data and labels
    X = tf.placeholder(tf.float32,shape=(batch_size,28,28,1))
    y_ = tf.placeholder(tf.float32,shape=(batch_size,10))
    
    #validation data
    tf_valX = tf.placeholder(tf.float32,shape=(test_batch_size,28,28,1))
    
    #test data
    tf_testX=tf.placeholder(tf.float32,shape=(test_batch_size,28,28,1))
    
    def createWeight(size,Name):
        return tf.Variable(tf.truncated_normal(size, stddev=0.1),
                          name=Name)
    
    def createBias(size,Name):
        return tf.Variable(tf.constant(0.1,shape=size),
                          name=Name)
    
    def conv2d_s1(x,W):
        return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
    
    def max_pool_3x3_s1(x):
        return tf.nn.max_pool(x,ksize=[1,3,3,1],
                             strides=[1,1,1,1],padding='SAME')
    
    
    #Inception Module1
    #
    #follows input
    W_conv1_1x1_1 = createWeight([1,1,1,map1],'W_conv1_1x1_1')
    b_conv1_1x1_1 = createWeight([map1],'b_conv1_1x1_1')
    
    #follows input
    W_conv1_1x1_2 = createWeight([1,1,1,reduce1x1],'W_conv1_1x1_2')
    b_conv1_1x1_2 = createWeight([reduce1x1],'b_conv1_1x1_2')
    
    #follows input
    W_conv1_1x1_3 = createWeight([1,1,1,reduce1x1],'W_conv1_1x1_3')
    b_conv1_1x1_3 = createWeight([reduce1x1],'b_conv1_1x1_3')
    
    #follows 1x1_2
    W_conv1_3x3 = createWeight([3,3,reduce1x1,map1],'W_conv1_3x3')
    b_conv1_3x3 = createWeight([map1],'b_conv1_3x3')
    
    #follows 1x1_3
    W_conv1_5x5 = createWeight([5,5,reduce1x1,map1],'W_conv1_5x5')
    b_conv1_5x5 = createBias([map1],'b_conv1_5x5')
    
    #follows max pooling
    W_conv1_1x1_4= createWeight([1,1,1,map1],'W_conv1_1x1_4')
    b_conv1_1x1_4= createWeight([map1],'b_conv1_1x1_4')
    
    
    
    #Inception Module2
    #
    #follows inception1
    W_conv2_1x1_1 = createWeight([1,1,4*map1,map2],'W_conv2_1x1_1')
    b_conv2_1x1_1 = createWeight([map2],'b_conv2_1x1_1')
    
    #follows inception1
    W_conv2_1x1_2 = createWeight([1,1,4*map1,reduce1x1],'W_conv2_1x1_2')
    b_conv2_1x1_2 = createWeight([reduce1x1],'b_conv2_1x1_2')
    
    #follows inception1
    W_conv2_1x1_3 = createWeight([1,1,4*map1,reduce1x1],'W_conv2_1x1_3')
    b_conv2_1x1_3 = createWeight([reduce1x1],'b_conv2_1x1_3')
    
    #follows 1x1_2
    W_conv2_3x3 = createWeight([3,3,reduce1x1,map2],'W_conv2_3x3')
    b_conv2_3x3 = createWeight([map2],'b_conv2_3x3')
    
    #follows 1x1_3
    W_conv2_5x5 = createWeight([5,5,reduce1x1,map2],'W_conv2_5x5')
    b_conv2_5x5 = createBias([map2],'b_conv2_5x5')
    
    #follows max pooling
    W_conv2_1x1_4= createWeight([1,1,4*map1,map2],'W_conv2_1x1_4')
    b_conv2_1x1_4= createWeight([map2],'b_conv2_1x1_4')
    
    

    #Fully connected layers
    #since padding is same, the feature map with there will be 4 28*28*map2
    W_fc1 = createWeight([28*28*(4*map2),num_fc1],'W_fc1')
    b_fc1 = createBias([num_fc1],'b_fc1')
    
    W_fc2 = createWeight([num_fc1,num_fc2],'W_fc2')
    b_fc2 = createBias([num_fc2],'b_fc2')

    def model(x,train=True):
        #Inception Module 1
        conv1_1x1_1 = conv2d_s1(x,W_conv1_1x1_1)+b_conv1_1x1_1
        conv1_1x1_2 = tf.nn.relu(conv2d_s1(x,W_conv1_1x1_2)+b_conv1_1x1_2)
        conv1_1x1_3 = tf.nn.relu(conv2d_s1(x,W_conv1_1x1_3)+b_conv1_1x1_3)
        conv1_3x3 = conv2d_s1(conv1_1x1_2,W_conv1_3x3)+b_conv1_3x3
        conv1_5x5 = conv2d_s1(conv1_1x1_3,W_conv1_5x5)+b_conv1_5x5
        maxpool1 = max_pool_3x3_s1(x)
        conv1_1x1_4 = conv2d_s1(maxpool1,W_conv1_1x1_4)+b_conv1_1x1_4
        
        #concatenate all the feature maps and hit them with a relu
        inception1 = tf.nn.relu(tf.concat(3,[conv1_1x1_1,conv1_3x3,conv1_5x5,conv1_1x1_4]))

        
        #Inception Module 2
        conv2_1x1_1 = conv2d_s1(inception1,W_conv2_1x1_1)+b_conv2_1x1_1
        conv2_1x1_2 = tf.nn.relu(conv2d_s1(inception1,W_conv2_1x1_2)+b_conv2_1x1_2)
        conv2_1x1_3 = tf.nn.relu(conv2d_s1(inception1,W_conv2_1x1_3)+b_conv2_1x1_3)
        conv2_3x3 = conv2d_s1(conv2_1x1_2,W_conv2_3x3)+b_conv2_3x3
        conv2_5x5 = conv2d_s1(conv2_1x1_3,W_conv2_5x5)+b_conv2_5x5
        maxpool2 = max_pool_3x3_s1(inception1)
        conv2_1x1_4 = conv2d_s1(maxpool2,W_conv2_1x1_4)+b_conv2_1x1_4
        
        #concatenate all the feature maps and hit them with a relu
        inception2 = tf.nn.relu(tf.concat(3,[conv2_1x1_1,conv2_3x3,conv2_5x5,conv2_1x1_4]))

        #flatten features for fully connected layer
        inception2_flat = tf.reshape(inception2,[-1,28*28*4*map2])
        
        #Fully connected layers
        if train:
            h_fc1 =tf.nn.dropout(tf.nn.relu(tf.matmul(inception2_flat,W_fc1)+b_fc1),dropout)
        else:
            h_fc1 = tf.nn.relu(tf.matmul(inception2_flat,W_fc1)+b_fc1)

        return tf.matmul(h_fc1,W_fc2)+b_fc2
    
    
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(model(X),y_))
    opt = tf.train.AdamOptimizer(1e-4).minimize(loss)
    
    predictions_val = tf.nn.softmax(model(tf_valX,train=False))
    predictions_test = tf.nn.softmax(model(tf_testX,train=False))
    
    #initialize variable
    #init = tf.initialize_all_variables()
    init = tf.global_variables_initializer()
    
    #use to save variables so we can pick up later
    saver = tf.train.Saver()

In [13]:
num_steps = 5000
sess = tf.Session(graph=graph)

#initialize variables
sess.run(init)
print("Model initialized.")

#set use_previous=1 to use file_path model
#set use_previous=0 to start model from scratch
use_previous = 1

#use the previous model or don't and initialize variables
if use_previous:
    saver.restore(sess,file_path)
    print("Model restored.")

#training
for s in range(num_steps):
    offset = (s*batch_size) % (len(train_dataset)-batch_size)
    batch_x,batch_y = train_dataset[offset:(offset+batch_size),:],train_labels[offset:(offset+batch_size),:]
    feed_dict={X : batch_x, y_ : batch_y}
    _,loss_value = sess.run([opt,loss],feed_dict=feed_dict)
    if s%100 == 0:
        result_val = np.array([]).reshape(0,valid_labels.shape[1])
        
        #use the batches class
        batch_valX=test_batchs(valid_dataset)

        for i in range(len(valid_dataset)/test_batch_size):
            feed_dict = {tf_valX : batch_valX.nextBatch(test_batch_size)}
            preds=sess.run(predictions_val, feed_dict=feed_dict)
            result_val=np.concatenate((result_val,preds),axis=0)
        
    
        
        print("step: "+str(s))
        print("validation accuracy: "+str(accuracy(valid_labels,result_val)))
        print(" ")
        
    #get test accuracy and save model
    if s == (num_steps-1):
        #create an array to store the outputs for the test
        result = np.array([]).reshape(0,10)

        #use the batches class
        batch_testX=test_batchs(test_dataset)

        for i in range(len(test_dataset)/test_batch_size):
            feed_dict = {tf_testX : batch_testX.nextBatch(test_batch_size)}
            preds=sess.run(predictions_test, feed_dict=feed_dict)
            result=np.concatenate((result,preds),axis=0)
        
        print("test accuracy: "+str(accuracy(test_labels,result)))
        
        save_path = saver.save(sess,file_path)
        print("Model saved.")

Model initialized.
Model restored.
step: 0
validation accuracy: 91.16
 
step: 100
validation accuracy: 91.24
 
step: 200
validation accuracy: 90.98
 
step: 300
validation accuracy: 90.96
 
step: 400
validation accuracy: 90.79
 
step: 500
validation accuracy: 90.96
 
step: 600
validation accuracy: 90.82
 
step: 700
validation accuracy: 90.94
 
step: 800
validation accuracy: 91.07
 
step: 900
validation accuracy: 90.72
 
step: 1000
validation accuracy: 90.94
 
step: 1100
validation accuracy: 91.14
 
step: 1200
validation accuracy: 91.37
 
step: 1300
validation accuracy: 90.98
 
step: 1400
validation accuracy: 90.87
 
step: 1500
validation accuracy: 91.15
 
step: 1600
validation accuracy: 91.1
 
step: 1700
validation accuracy: 90.89
 
step: 1800
validation accuracy: 91.17
 
step: 1900
validation accuracy: 90.93
 
step: 2000
validation accuracy: 91.27
 
step: 2100
validation accuracy: 90.98
 
step: 2200
validation accuracy: 91.28
 
step: 2300
validation accuracy: 91.6
 
step: 2400
validati

5,000: 94.67%

10,000: 96.16%

15,000: 96.06%