# LeNet5 Implementation from Scratch

In [None]:
import numpy as np 
import sys
import pickle
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib as mpl
import time
from evaluation_matrix import *

In [None]:
from utils.LayerObjects import *
from utils.utils_func import *

the path of the dataset

In [None]:
test_image_path = r'./MNIST/t10k-images-idx3-ubyte'
test_label_path = r'./MNIST/t10k-labels-idx1-ubyte'
train_image_path = r'./MNIST/train-images-idx3-ubyte'
train_label_path = r'./MNIST/train-labels-idx1-ubyte'
trainset = (train_image_path, train_label_path)
testset = (test_image_path, test_label_path)

In[4]:

read the dataset with readDataset()

In [None]:
(train_image, train_label) = readDataset(trainset)
(test_image, test_label) = readDataset(testset)
n_m, n_m_test = len(train_label), len(test_label)
print("The shape of training image:", train_image.shape)
print("The shape of testing image: ", test_image.shape)
print("Length of the training set: ", n_m)
print("Length of the training set: ", n_m_test)
print("Shape of a single image: ", train_image[0].shape)

## 1.2 Zero-padding & Normalization

In [None]:
train_image_normalized_pad = normalize(zero_pad(train_image[:,:,:,np.newaxis], 2),'lenet5')
test_image_normalized_pad  = normalize(zero_pad(test_image[:,:,:,np.newaxis],  2),'lenet5')
print("The shape of training image with padding:", train_image_normalized_pad.shape)
print("The shape of testing image with padding: ", test_image_normalized_pad.shape)

# 2. Structure of LeNet5

The layers used here is:
**C1** → a1 → **S2** → **C3** → a2 → **S4** → **C5** → a3 → **F6** → a4 → **RBF**<br>


In [None]:
C3_mapping = [[0,1,2],[1,2,3],[2,3,4],[3,4,5],[4,5,0],[5,0,1],              [0,1,2,3],[1,2,3,4],[2,3,4,5],[3,4,5,0],[4,5,0,1],[5,0,1,2],              [0,1,3,4],[1,2,4,5],[0,2,3,5],              [0,1,2,3,4,5]]

In[63]:

The fixed weight (7x12 preset ASCII bitmaps) used in the RBF layer.

In [None]:
bitmap = rbf_init_weight()
fig, axarr = plt.subplots(2,5,figsize=(20,8))
for i in range(10):
    x,y = int(i/5), i%5
    axarr[x,y].set_title(str(i))
    axarr[x,y].imshow(bitmap[i,:].reshape(12,7), cmap=mpl.cm.Greys)

In[7]:

LeNet5 object (also stored in utils/LayerObjects.py)

In [None]:
class LeNet5(object):
    def __init__(self):
        kernel_shape = {"C1": (5,5,1,6),
                        "C3": (5,5,6,16),    ### C3 has designated combinations
                        "C5": (5,5,16,120),  ### It's actually a FC layer
                        "F6": (120,84),
                        "OUTPUT": (84,10)}
        
        hparameters_convlayer = {"stride": 1, "pad": 0}
        hparameters_pooling   = {"stride": 2, "f": 2}        
        
        self.C1 = ConvLayer(kernel_shape["C1"], hparameters_convlayer)
        self.a1 = Activation("LeNet5_squash") # squash used to normalize vectors rather than the scalers to be used in capsule network to make a relation between same objects like nose mouth etc....
        self.S2 = PoolingLayer(hparameters_pooling, "average")
        
        self.C3 = ConvLayer_maps(kernel_shape["C3"], hparameters_convlayer, C3_mapping)
        self.a2 = Activation("LeNet5_squash")
        self.S4 = PoolingLayer(hparameters_pooling, "average")
        
        self.C5 = ConvLayer(kernel_shape["C5"], hparameters_convlayer)
        self.a3 = Activation("LeNet5_squash")
        self.F6 = FCLayer(kernel_shape["F6"])
        self.a4 = Activation("LeNet5_squash")
        
        self.Output = RBFLayer(bitmap)
        
    def Forward_Propagation(self, input_image, input_label, mode): 
        self.label = input_label
        self.C1_FP = self.C1.foward_prop(input_image)
        self.a1_FP = self.a1.foward_prop(self.C1_FP)
        self.S2_FP = self.S2.foward_prop(self.a1_FP)
        self.C3_FP = self.C3.foward_prop(self.S2_FP)
        self.a2_FP = self.a2.foward_prop(self.C3_FP)
        self.S4_FP = self.S4.foward_prop(self.a2_FP)
        self.C5_FP = self.C5.foward_prop(self.S4_FP)
        self.a3_FP = self.a3.foward_prop(self.C5_FP)
        self.flatten = self.a3_FP[:,0,0,:]
        self.F6_FP = self.F6.foward_prop(self.flatten)
        self.a4_FP = self.a4.foward_prop(self.F6_FP)  
        
        # output sum of the loss over mini-batch when mode = 'train'
        # output tuple of (0/1 error, class_predict) when mode = 'test'
        out  = self.Output.foward_prop(self.a4_FP, input_label, mode) 
        return out 
        
    def Back_Propagation(self, momentum, weight_decay):
        dy_pred = self.Output.back_prop()
        
        dy_pred = self.a4.back_prop(dy_pred)
        F6_BP = self.F6.back_prop(dy_pred, momentum, weight_decay)
        reverse_flatten = F6_BP[:,np.newaxis,np.newaxis,:]
        
        reverse_flatten = self.a3.back_prop(reverse_flatten) 
        C5_BP = self.C5.back_prop(reverse_flatten, momentum, weight_decay)
        
        S4_BP = self.S4.back_prop(C5_BP)
        S4_BP = self.a2.back_prop(S4_BP)
        C3_BP = self.C3.back_prop(S4_BP, momentum, weight_decay) 
        
        S2_BP = self.S2.back_prop(C3_BP)
        S2_BP = self.a1.back_prop(S2_BP)  
        C1_BP = self.C1.back_prop(S2_BP, momentum, weight_decay)
        
    # Stochastic Diagonal Levenberg-Marquaedt method for determining the learning rate before the beginning of each epoch
    def SDLM(self, mu, lr_global):
        d2y_pred = self.Output.SDLM()
        d2y_pred = self.a4.SDLM(d2y_pred)
        
        F6_SDLM = self.F6.SDLM(d2y_pred, mu, lr_global)
        reverse_flatten = F6_SDLM[:,np.newaxis,np.newaxis,:]
        
        reverse_flatten = self.a3.SDLM(reverse_flatten) 
        C5_SDLM = self.C5.SDLM(reverse_flatten, mu, lr_global)
        
        S4_SDLM = self.S4.SDLM(C5_SDLM)
        S4_SDLM = self.a2.SDLM(S4_SDLM)
        C3_SDLM = self.C3.SDLM(S4_SDLM, mu, lr_global)
        
        S2_SDLM = self.S2.SDLM(C3_SDLM)
        S2_SDLM = self.a1.SDLM(S2_SDLM)  
        C1_SDLM = self.C1.SDLM(S2_SDLM, mu, lr_global)

In[8]:

In [None]:
ConvNet = LeNet5()

# 3. Training & Testing

## 3.1 Hyperparameters

Stochastic Diagonal Levenberg-Marquaedt method is used in the original LeNet5. 

Number of epoches & learning rate in the original paper

In [None]:
epoch_orig, lr_global_orig = 0, np.array([5e-4]*2 + [2e-4]*3 + [1e-4]*3 + [5e-5]*4 + [1e-5]*8) 

Number of epoches & learning rate I used

In [None]:
epoches, lr_global_list = epoch_orig, lr_global_orig*100

In [None]:
momentum = 0.9
weight_decay = 0
batch_size = 256

## 3.2 Start Training

In [None]:
st = time.time()
cost_last, count = np.Inf, 0
err_rate_list = []
for epoch in range(0,epoches):
    print("---------- epoch", epoch+1, "begin ----------")
    
    # Stochastic Diagonal Levenberg-Marquaedt method for determining the learning rate 
    (batch_image, batch_label) = random_mini_batches(train_image_normalized_pad, train_label, mini_batch_size = 500, one_batch=True)
    ConvNet.Forward_Propagation(batch_image, batch_label, 'train')
    lr_global = lr_global_list[epoch]
    ConvNet.SDLM(0.02, lr_global)
    
    # print info
    print("global learning rate:", lr_global)
    print("learning rates in trainable layers:", np.array([ConvNet.C1.lr, ConvNet.C3.lr, ConvNet.C5.lr, ConvNet.F6.lr]))
    print("batch size:", batch_size)
    print("Momentum:",momentum,", weight decay:",weight_decay)
    
    #loop over each batch
    ste = time.time()
    cost = 0
    mini_batches = random_mini_batches(train_image_normalized_pad, train_label, batch_size)
    for i in range(len(mini_batches)):
        batch_image, batch_label = mini_batches[i]
        
        loss = ConvNet.Forward_Propagation(batch_image, batch_label, 'train')     
        cost += loss
        
        ConvNet.Back_Propagation(momentum, weight_decay) 

        # print progress
        if i%(int(len(mini_batches)/100))==0:
            sys.stdout.write("\033[F")   #CURSOR_UP_ONE
            sys.stdout.write("\033[K")   #ERASE_LINE
            print ("progress:", int(100*(i+1)/len(mini_batches)), "%, ", "cost =", cost, end='\r')
    sys.stdout.write("\033[F")   #CURSOR_UP_ONE
    sys.stdout.write("\033[K")   #ERASE_LINE
    
    print ("Done, cost of epoch", epoch+1, ":", cost,"                                             ")
    
    error01_train, _ = ConvNet.Forward_Propagation(train_image_normalized_pad, train_label, 'test')  
    error01_test, _  = ConvNet.Forward_Propagation(test_image_normalized_pad,  test_label,  'test')     
    err_rate_list.append([error01_train/60000, error01_test/10000])
    print("0/1 error of training set:",  error01_train, "/", len(train_label))
    print("0/1 error of testing set: ",  error01_test,  "/", len(test_label))
    print("Time used: ",time.time() - ste, "sec")
    print("---------- epoch", epoch+1, "end ------------")
    with open('model_data_'+str(epoch)+'.pkl', 'wb') as output:
        pickle.dump(ConvNet, output, pickle.HIGHEST_PROTOCOL)
   
    
err_rate_list = np.array(err_rate_list).T
print("Total time used: ", time.time() - st, "sec")

In[30]:<br>
########## if n_epoch=0 comment the following lines######## 

In [None]:
# This shows the error rate of training and testing data after each epoch<br>
#x = np.arange(epoches)
#plt.xlabel('epoches')
#plt.ylabel('error rate')
#plt.plot(x, err_rate_list[0])
#plt.plot(x, err_rate_list[1])
#plt.legend(['training data', 'testing data'], loc='upper right')
#plt.show()
################################################################

In [None]:
with open('model_data_13.pkl', 'rb') as input_:
    ConvNet = pickle.load(input_)

In [None]:
    ############ for evaluation metrics in training ##################
train_image_normalized_pad = normalize(zero_pad(train_image[:,:,:,np.newaxis], 2),'lenet5')
error01, class_pred = ConvNet.Forward_Propagation(train_image_normalized_pad, train_label, 'test')  
micro_f1 = micro_F1_SCORE(train_label,class_pred)
print("micro F1 score for training = micro precision = micro recall = "+str(micro_f1)+'\n')

In [None]:
hot_form_y=hot_form(train_label,10)
hot_form_pred=hot_form(class_pred,10)

In [None]:
f1_score_arr, precision_arr, recall_arr =f1_score_labels(hot_form_y ,hot_form_pred)
print("f1 score for train = "+str(f1_score_arr)+'\n')
print("precision for train = "+str(precision_arr)+'\n')
print("recall for train = "+str(recall_arr)+'\n')
macro_f1_score_train,macro_precision_train,macro_recall_train = macro_f1_score(f1_score_arr, precision_arr, recall_arr ,10)
print("macro_f1_score for train =  "+str(macro_f1_score_train)+'\n')
print("macro_precision for train =  "+str(macro_precision_train)+'\n')
print("macro_recall for train =  "+str(macro_recall_train)+'\n')
confusion_matrix_train=confusion_matrix(hot_form_y,hot_form_pred)
print("confusion matrix for train --->"+'\n'+str(confusion_matrix_train)+'\n')
visualise_confusion_for_mnist(confusion_matrix_train)
plt.show()
    
 ###########################################################################   
    

########## for evaluation metrics in testing ##################

In [None]:
test_image_normalized_pad = normalize(zero_pad(test_image[:,:,:,np.newaxis], 2), 'lenet5')
error01, class_pred = ConvNet.Forward_Propagation(test_image_normalized_pad, test_label, 'test')
#print(class_pred)
print("error rate:", error01/len(class_pred))

In [None]:
micro_f1_test = micro_F1_SCORE(test_label,class_pred)
print("micro F1 score for test = micro precision = micro recall = "+str(micro_f1_test)+'\n')

In [None]:
hot_form_y_test=hot_form(test_label,10)
hot_form_pred_test=hot_form(class_pred,10)

In [None]:
f1_score_arr_test, precision_arr_test, recall_arr_test =f1_score_labels(hot_form_y_test ,hot_form_pred_test)
print("f1 score for test = "+str(f1_score_arr_test)+'\n')
print("precision for test = "+str(precision_arr_test)+'\n')
print("recall for test = "+str(recall_arr_test)+'\n')
macro_f1_score_test,macro_precision_test,macro_recall_test = macro_f1_score(f1_score_arr_test, precision_arr_test, recall_arr_test,10)
print("macro_f1_score for test=  "+str(macro_f1_score_test)+'\n')
print("macro_precision for test =  "+str(macro_precision_test)+'\n')
print("macro_recall for test =  "+str(macro_recall_test)+'\n')
confusion_matrix_test=confusion_matrix(hot_form_y_test,hot_form_pred_test)
print("confusion matrix for test --->"+'\n'+str(confusion_matrix_test)+'\n')
visualise_confusion_for_mnist(confusion_matrix_test)
plt.show()
#############################################################

In [None]:
    
    ######### random selection from the training dataset ##############
index = np.random.randint(60000,size=2)
train_image_sample = train_image_normalized_pad[index,:,:,:]
train_label_sample = train_label[index]
print("Correct label:",train_label_sample[0])
plt.imshow(train_image_sample[0,:,:,0], cmap=mpl.cm.Greys)
_, pred = ConvNet.Forward_Propagation(train_image_sample, train_label_sample, 'test')
print("Predict label:",pred[0])

In [None]:
index = np.random.randint(60000,size=2)
train_image_sample = train_image_normalized_pad[index,:,:,:]
train_label_sample = train_label[index]
print("Correct label:",train_label_sample[0])
plt.imshow(train_image_sample[0,:,:,0], cmap=mpl.cm.Greys)
_, pred = ConvNet.Forward_Propagation(train_image_sample, train_label_sample, 'test')
print("Predict label:",pred[0])

In [None]:
index = np.random.randint(60000,size=2)
train_image_sample = train_image_normalized_pad[index,:,:,:]
train_label_sample = train_label[index]
print("Correct label:",train_label_sample[0])
plt.imshow(train_image_sample[0,:,:,0], cmap=mpl.cm.Greys)
_, pred = ConvNet.Forward_Propagation(train_image_sample, train_label_sample, 'test')
print("Predict label:",pred[0])

In [None]:
index = np.random.randint(60000,size=2)
train_image_sample = train_image_normalized_pad[index,:,:,:]
train_label_sample = train_label[index]
print("Correct label:",train_label_sample[0])
plt.imshow(train_image_sample[0,:,:,0], cmap=mpl.cm.Greys)
_, pred = ConvNet.Forward_Propagation(train_image_sample, train_label_sample, 'test')
print("Predict label:",pred[0])