# Convolutional Neural Networks
## Standard LeNet5 with TensorFlow
### Xavier Bresson, Sept. 2017

Implementation of the original LeNet5 Convolutional Neural Networks:<br>
Gradient-based learning applied to document recognition<br>
Y LeCun, L Bottou, Y Bengio, P Haffner<br>
Proceedings of the IEEE 86 (11), 2278-2324<br>

In [1]:
import tensorflow as tf
import numpy as np
import time
import collections

# MNIST

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('datasets', one_hot=False) # load data in folder datasets/

train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32)
test_data = mnist.test.images.astype(np.float32)
train_labels = mnist.train.labels
val_labels = mnist.validation.labels
test_labels = mnist.test.labels
print(train_data.shape)
print(train_labels.shape)
print(val_data.shape)
print(val_labels.shape)
print(test_data.shape)
print(test_labels.shape)

Extracting datasets/train-images-idx3-ubyte.gz
Extracting datasets/train-labels-idx1-ubyte.gz
Extracting datasets/t10k-images-idx3-ubyte.gz
Extracting datasets/t10k-labels-idx1-ubyte.gz
(55000, 784)
(55000,)
(5000, 784)
(5000,)
(10000, 784)
(10000,)


# ConvNet LeNet5
### CL32-MP4-CL64-MP4-FC512-FC10

In [3]:
class ConvNet_LeNet5(object):
    
    # Constructor
    def __init__(self, net_parameters):
        
        print('ConvNet: LeNet5')
        
        # parameters
        Nx, Ny, CL1_F, CL1_K, CL2_F, CL2_K, FC1_F, FC2_F = net_parameters
        self.Nx = Nx
        self.Ny = Ny
        self.FC1Fin = CL2_F*(Nx//4)**2
        
        self.WCL1 = self.init_weights([CL1_K,CL1_K,1,CL1_F],CL1_K**2,CL1_F)
        self.bCL1 = tf.zeros([CL1_F],tf.float32)
        self.WCL2 = self.init_weights([CL2_K,CL2_K,CL1_F,CL2_F],CL1_F*CL2_K**2,CL2_F)
        self.bCL2 = tf.zeros([CL2_F],tf.float32)
        self.WFC1 = self.init_weights([CL2_F*(Nx//4)**2,FC1_F],CL2_F*(Nx//4)**2,FC1_F)
        self.bFC1 = tf.zeros([FC1_F],tf.float32)
        self.WFC2 = self.init_weights([FC1_F,FC2_F],FC1_F,FC2_F)
        self.bFC2 = tf.zeros([FC2_F],tf.float32)
        
        # Variables for the computational graph
        self.WCL1 = tf.Variable(self.WCL1)
        self.bCL1 = tf.Variable(self.bCL1)
        self.WCL2 = tf.Variable(self.WCL2)
        self.bCL2 = tf.Variable(self.bCL2)
        self.WFC1 = tf.Variable(self.WFC1)
        self.bFC1 = tf.Variable(self.bFC1)
        self.WFC2 = tf.Variable(self.WFC2)
        self.bFC2 = tf.Variable(self.bFC2)
           
        
    def init_weights(self, shape, Fin, Fout):
    
        scale = tf.sqrt( 2.0/ (Fin+Fout) )
        W = tf.random_uniform( shape, minval=-scale, maxval=scale ) 
        return W
    
    
    def conv2d(self, x, W):
        
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    
    def max_pool_2x2(self, x):
        
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
    
    def forward(self, x, d):
        
        # CL1
        x = tf.reshape(x, [-1,self.Ny,self.Nx,1])
        x = self.conv2d(x, self.WCL1) + self.bCL1
        x = tf.nn.relu(x)
        x = self.max_pool_2x2(x)
            
        # CL2
        x = self.conv2d(x, self.WCL2) + self.bCL2
        x = tf.nn.relu(x)
        x = self.max_pool_2x2(x)

        # FC1
        x = tf.reshape(x, [-1,self.FC1Fin])
        x = tf.matmul(x, self.WFC1) + self.bFC1
        x = tf.nn.relu(x)
        x = tf.nn.dropout(x, d)

        # FC2
        x = tf.matmul(x, self.WFC2) + self.bFC2
        
        return x
    
    
    def loss(self, x, x_target, l2_regularization): 
        
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=x_target, logits=x)
        loss = tf.reduce_mean(cross_entropy)
        
        l2_loss = 0.0
        tvars = tf.trainable_variables()
        for var in tvars:
            l2_loss += tf.nn.l2_loss(var) 

        loss += l2_regularization* l2_loss
           
        return loss
    
    
    def backward(self, loss, learning_rate, train_size, batch_size):            

        batch = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(learning_rate,batch * batch_size, train_size, 0.95,
                staircase=True)
        optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)  
        backward = optimizer.minimize(loss, global_step=batch) 
        
        return backward, learning_rate
    
    
    def evaluation(self, x, x_target):
        
        predicted_classes = tf.cast( tf.argmax( tf.nn.softmax(x), 1 ), tf.int32 )
        accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted_classes,x_target), tf.float32))
        
        return 100.0* accuracy
    

In [4]:
# Delete existing network if exists
try:
    del net
    tf.reset_default_graph() 
    print('Delete existing network\n')
except NameError:
    print('No existing network to delete\n')

    
# network parameters
Nx = Ny = 28
CL1_F = 32
CL1_K = 5
CL2_F = 64
CL2_K = 5
FC1_F = 512
FC2_F = 10
net_parameters = [Nx, Ny, CL1_F, CL1_K, CL2_F, CL2_K, FC1_F, FC2_F]

# instantiate the object net of the class 
net = ConvNet_LeNet5(net_parameters)


# learning parameters
learning_rate = 0.05
dropout_value = 0.5
l2_regularization = 5e-4 
batch_size = 100
num_epochs = 20
train_size = train_data.shape[0]
nb_iter = int(num_epochs * train_size) // batch_size
print('num_epochs=',num_epochs,', train_size=',train_size,', nb_iter=',nb_iter)


# computational graph
x = tf.placeholder(tf.float32, (None, 784))
x_target = tf.placeholder(tf.int32, (None))
d = tf.placeholder(tf.float32)

x_score = net.forward(x,d)
loss = net.loss(x_score,x_target,l2_regularization)
backward, lr_op = net.backward(loss,learning_rate,train_size,batch_size)
evaluation = net.evaluation(x_score,x_target)


# train
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)


# loop over epochs
indices = collections.deque()
for epoch in range(num_epochs):  # loop over the dataset multiple times

    # reshuffle 
    indices.extend(np.random.permutation(train_size)) # rand permutation
    
    # reset time
    t_start = time.time()
    
    # extract batches
    running_loss = 0.0
    running_accuray = 0
    running_total = 0
    while len(indices) >= batch_size:
        
        # extract batches
        batch_idx = [indices.popleft() for i in range(batch_size)]
        batch_x, batch_y = train_data[batch_idx,:], train_labels[batch_idx]
        batch_y = np.array(batch_y,np.int32)
        if type(batch_x) is not np.ndarray:
            batch_x = batch_x.toarray()  # convert to full matrices if sparse
        
        # run computational graph 
        _,acc_train,loss_train,lr_train = sess.run([backward,evaluation,loss,lr_op], feed_dict={x: batch_x, x_target: batch_y, d: dropout_value})
        
        # loss, accuracy
        running_loss += loss_train
        running_accuray += acc_train
        running_total += 1
        
        # print        
        if not running_total%100: # print every x mini-batches
            print('epoch= %d, i= %4d, loss(batch)= %.4f, accuray(batch)= %.2f' % (epoch+1, running_total, loss_train, acc_train))
        

    t_stop = time.time() - t_start
    print('epoch= %d, loss(train)= %.3f, accuracy(train)= %.3f, time= %.3f, lr= %.5f' % 
          (epoch+1, running_loss/running_total, running_accuray/running_total, t_stop, lr_train))
 

    # Test set
    running_accuray_test = 0
    running_total_test = 0
    indices_test = collections.deque()
    indices_test.extend(range(test_data.shape[0]))
    t_start_test = time.time()
    while len(indices_test) >= batch_size:
        batch_idx_test = [indices_test.popleft() for i in range(batch_size)]
        batch_x_test, batch_y_test = test_data[batch_idx_test,:], test_labels[batch_idx_test]
        batch_y_test = np.array(batch_y_test,np.int32)
        if type(batch_x_test) is not np.ndarray:
            batch_x_test = batch_x_test.toarray()  # convert to full matrices if sparse
        acc_test = sess.run(evaluation, feed_dict={x: batch_x_test, x_target: batch_y_test, d: 1.0})
        running_accuray_test += acc_test
        running_total_test += 1
    t_stop_test = time.time() - t_start_test
    print('  accuracy(test) = %.3f %%, time= %.3f' % (running_accuray_test / running_total_test, t_stop_test))  



No existing network to delete

ConvNet: LeNet5
num_epochs= 20 , train_size= 55000 , nb_iter= 11000
Instructions for updating:
Use `tf.global_variables_initializer` instead.
epoch= 1, i=  100, loss(batch)= 0.2747, accuray(batch)= 94.00
epoch= 1, i=  200, loss(batch)= 0.1452, accuray(batch)= 98.00
epoch= 1, i=  300, loss(batch)= 0.1559, accuray(batch)= 99.00
epoch= 1, i=  400, loss(batch)= 0.2485, accuray(batch)= 97.00
epoch= 1, i=  500, loss(batch)= 0.1383, accuray(batch)= 99.00
epoch= 1, loss(train)= 0.313, accuracy(train)= 93.369, time= 6.836, lr= 0.05000
  accuracy(test) = 98.620 %, time= 0.390
epoch= 2, i=  100, loss(batch)= 0.1222, accuray(batch)= 100.00
epoch= 2, i=  200, loss(batch)= 0.1786, accuray(batch)= 97.00
epoch= 2, i=  300, loss(batch)= 0.1771, accuray(batch)= 97.00
epoch= 2, i=  400, loss(batch)= 0.1606, accuray(batch)= 98.00
epoch= 2, i=  500, loss(batch)= 0.1377, accuray(batch)= 99.00
epoch= 2, loss(train)= 0.154, accuracy(train)= 98.276, time= 6.439, lr= 0.04750
  acc