In [19]:
import os 
import struct
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
IMAGE_SIZE = 28
NUM_CHANNELS = 1
PIXEL_DEPTH = 255
NUM_LABELS = 10
VALIDATION_SIZE = 5000  # Size of the validation set.
SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 64
NUM_EPOCHS = 10
EVAL_BATCH_SIZE = 64
EVAL_FREQUENCY = 100 
def load_mnist(path,kind='train'):
    labels_path = os.path.join(path,'%s-labels.idx1-ubyte' % kind)
    images_path = os.path.join(path,'%s-images.idx3-ubyte' % kind)
    with open(labels_path,'rb') as lbpath:
        magic,n = struct.unpack('>II',lbpath.read(8))
        labels = np.fromfile(lbpath,dtype = np.uint8)
    
    with open(images_path,'rb') as imgpath:
        magic,num,rows,cols = struct.unpack(">IIII", imgpath.read(16))
        images = np.fromfile(imgpath,dtype=np.uint8).reshape(len(labels),IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS)
        
    return images,labels

#load data
train_data,train_labels = load_mnist('mnist',kind='train')
print('Rows: %d, columns: %d'%(train_data.shape[0],train_data.shape[1]))
test_data,test_labels = load_mnist('mnist',kind ='t10k' )
print('Rows: %d,clumns: %d' %(test_data.shape[0],test_data.shape[1]))
    

Rows: 60000, columns: 28
Rows: 10000,clumns: 28


In [20]:
validation_data = train_data[:VALIDATION_SIZE,...]
validation_labels = train_labels[:VALIDATION_SIZE]
train_data = train_data[VALIDATION_SIZE:,...]
train_labels = train_labels[VALIDATION_SIZE:]

In [21]:
#defining a 2D convolution in Tensorflow
#import tensorflow.compat.v1 as tf
#tf.disable_eager_execution()
#tf.nn.conv2d(input,filter,strides=[1,1,1,1],padding='SAME',use_cudnn_on_gpu=None,data_format=None,name=None)

In [22]:

#defining learnable weights for the convolutional layers
conv1_weights = tf.Variable(tf.truncated_normal([5,5,NUM_CHANNELS,32],stddev=0.1,dtype = tf.float32))
conv1_biases = tf.Variable(tf.zeros([32],dtype = tf.float32))
conv2_weights = tf.Variable(tf.truncated_normal([5,5,32,64],stddev = 0.1,seed=SEED,dtype=tf.float32))
conv2_biases = tf.Variable(tf.constant(0.1,shape=[64],dtype = tf.float32))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [23]:
#defining learnable weights for the fully connected layers
fc1_weights = tf.Variable(tf.truncated_normal([IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64,512],stddev=0.1,seed=SEED,dtype = tf.float32))
fc1_biases = tf.Variable(tf.constant(0.1,shape=[512],dtype = tf.float32))
fc2_weights = tf.Variable(tf.truncated_normal([512,NUM_LABELS],stddev=0.1,seed=SEED,dtype = tf.float32))
fc2_biases = tf.Variable(tf.constant(0.1,shape=[NUM_LABELS],dtype = tf.float32))

In [24]:
#LeNet-5 architecture
def model(data,train=False):
    conv = tf.nn.conv2d(data,conv1_weights,strides=[1,1,1,1],padding='SAME')
    relu = tf.nn.relu(tf.nn.bias_add(conv,conv1_biases))
    pool = tf.nn.max_pool(relu,ksize= [1,2,2,1],strides=[1,2,2,1],padding='SAME')
    conv = tf.nn.conv2d(pool,conv2_weights,strides=[1,1,1,1],padding='SAME')
    relu = tf.nn.relu(tf.nn.bias_add(conv,conv2_biases))
    pool = tf.nn.max_pool(relu,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
    #reshape the feauture map cuboid into a 2D matrix to feed it the fully connected layers
    pool_shape = pool.get_shape().as_list()
    reshape = tf.reshape(pool,[pool_shape[0],pool_shape[1]*pool_shape[2]*pool_shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
    #add 50% dropout during training
    if train:
        hidden = tf.nn.dropout(hidden,0.5,seed=SEED)
    return tf.matmul(hidden,fc2_weights) + fc2_biases

In [25]:
#defining placholders
train_data_node = tf.placeholder(tf.float32,shape=(BATCH_SIZE,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS))
train_labels_node = tf.placeholder(tf.int64,shape=(BATCH_SIZE,))
eval_data = tf.placeholder(tf.float32,shape=(EVAL_BATCH_SIZE,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS))

In [30]:
def eval_in_batches(data,sess):
    #get predictions for a dataset by running it in small batches
    size = data.shape[0]
    if size <EVAL_BATCH_SIZE:
        raise ValueError('batch size for evals larger than dataset: %d', size)
    predictions = numpy.ndarray(shape=(size,NUM_LABELS),dtype=numpy.float32)
    for begin in range(0,size,EVAL_BATCH_SIZE):
        end = begin + EVAL_BATCH_SIZE
        if end <= size:
            predictions[begin:end,:] = sess.run(eval_prediction,feed_dict={eval_data: data[begin:end, ...]})
        else:
            batch_predictions = sess.run(eval_prediction,feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]})
            predictions[begin:,:] = batch_predictions[begin - size:,:]
        return predictions
        

In [31]:
import numpy
def error_rate(predictions,labels):
    return 100.0 - (100.0 * numpy.sum(numpy.argmax(predictions,1)==labels)/predictions.shape[0])

In [32]:

num_epochs = NUM_EPOCHS
train_size = train_labels.shape[0]
# Training computation: logits + cross-entropy loss.
logits = model(train_data_node, True)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    labels=train_labels_node, logits=logits))

# L2 regularization for the fully connected parameters.
regularizers = (tf.nn.l2_loss(fc1_weights)
                + tf.nn.l2_loss(fc1_biases)
                + tf.nn.l2_loss(fc2_weights)
                + tf.nn.l2_loss(fc2_biases))
# Add the regularization term to the loss.
loss += 5e-4 * regularizers

# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(0.01,batch* BATCH_SIZE,train_size, 0.95, staircase=True)
# Use simple momentum for the optimization.
optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss,global_step=batch)

# Predictions for the current training minibatch.
train_prediction = tf.nn.softmax(logits)

# Predictions for the test and validation, which we'll compute less
# often.
eval_prediction = tf.nn.softmax(model(eval_data))

In [34]:
import sys
import time
start_time = time.time()
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for step in range(int(NUM_EPOCHS*train_size)// BATCH_SIZE):
        offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
        batch_data = train_data[offset:(offset + BATCH_SIZE),...]
        batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
        feed_dict = {train_data_node: batch_data,train_labels_node:batch_labels}
        #run the optimizer to update weights
        sess.run(optimizer,feed_dict=feed_dict)
        if step % EVAL_FREQUENCY == 0:
            l,lr,predictions = sess.run([loss,learning_rate,train_prediction],feed_dict=feed_dict)
            elapsed_time = time.time() - start_time
            start_time = time.time()
            print('Step %d (epoch %.2f), %.1f ms' % (step, float(step) * BATCH_SIZE / train_size,1000*elapsed_time / EVAL_FREQUENCY))
            print('Minibatch loss: %.3f, learning rate: %.6f' %(l,lr))
            print('Minibatch error: %.1f%%' % error_rate(predictions,batch_labels))
            print('Validation error :%.1f%%' % error_rate(eval_in_batches(validation_data,sess),validation_labels))
            sys.stdout.flush()
            
            
    test_error = error_rate(eval_in_batches(test_data,sess),test_labels)
    print('Test error: %.1f%%' % test_error)

Step 0 (epoch 0.00), 4.1 ms
Minibatch loss: 276522.531, learning rate: 0.010000
Minibatch error: 89.1%
Validation error :90.9%
Step 100 (epoch 0.12), 84.5 ms
Minibatch loss: nan, learning rate: 0.010000
Minibatch error: 87.5%
Validation error :90.9%
Step 200 (epoch 0.23), 82.0 ms
Minibatch loss: nan, learning rate: 0.010000
Minibatch error: 96.9%
Validation error :90.9%
Step 300 (epoch 0.35), 84.0 ms
Minibatch loss: nan, learning rate: 0.010000
Minibatch error: 95.3%
Validation error :90.9%
Step 400 (epoch 0.47), 81.4 ms
Minibatch loss: nan, learning rate: 0.010000
Minibatch error: 90.6%
Validation error :90.9%
Step 500 (epoch 0.58), 89.8 ms
Minibatch loss: nan, learning rate: 0.010000
Minibatch error: 92.2%
Validation error :90.9%
Step 600 (epoch 0.70), 89.1 ms
Minibatch loss: nan, learning rate: 0.010000
Minibatch error: 89.1%
Validation error :90.9%
Step 700 (epoch 0.81), 83.1 ms
Minibatch loss: nan, learning rate: 0.010000
Minibatch error: 92.2%
Validation error :90.3%
Step 800 (ep

Step 6700 (epoch 7.80), 91.4 ms
Minibatch loss: nan, learning rate: 0.006983
Minibatch error: 85.9%
Validation error :90.9%
Step 6800 (epoch 7.91), 87.9 ms
Minibatch loss: nan, learning rate: 0.006983
Minibatch error: 85.9%
Validation error :90.6%
Step 6900 (epoch 8.03), 85.0 ms
Minibatch loss: nan, learning rate: 0.006634
Minibatch error: 87.5%
Validation error :90.1%
Step 7000 (epoch 8.15), 86.2 ms
Minibatch loss: nan, learning rate: 0.006634
Minibatch error: 92.2%
Validation error :90.2%
Step 7100 (epoch 8.26), 88.6 ms
Minibatch loss: nan, learning rate: 0.006634
Minibatch error: 90.6%
Validation error :90.0%
Step 7200 (epoch 8.38), 85.6 ms
Minibatch loss: nan, learning rate: 0.006634
Minibatch error: 87.5%
Validation error :90.1%
Step 7300 (epoch 8.49), 88.3 ms
Minibatch loss: nan, learning rate: 0.006634
Minibatch error: 95.3%
Validation error :90.3%
Step 7400 (epoch 8.61), 89.0 ms
Minibatch loss: nan, learning rate: 0.006634
Minibatch error: 92.2%
Validation error :90.1%
Step 750