## Convolutional Neural Network for MNIST image classficiation 

In [1]:
import numpy as np
# from sklearn.utils.extmath import softmax
from matplotlib import pyplot as plt
import re
from tqdm import trange
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pandas as pd
from sklearn.datasets import fetch_openml

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif']

In [2]:
import sys
sys.path.append("../")
from src.CNN import CNN, compute_accuracy_metrics, multiclass_accuracy_metrics, list2onehot, onehot2list

In [3]:
# Load data from https://www.openml.org/d/554
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
# X = X.values  ### Uncomment this line if you are having type errors in plotting. It is loading as a pandas dataframe, but our indexing is for numpy array. 
X = X / 255.

print('X.shape', X.shape)
print('y.shape', y.shape)

'''
Each row of X is a vectroization of an image of 28 x 28 = 784 pixels.  
The corresponding row of y holds the true class label from {0,1, .. , 9}.
'''

X.shape (70000, 784)
y.shape (70000,)


'\nEach row of X is a vectroization of an image of 28 x 28 = 784 pixels.  \nThe corresponding row of y holds the true class label from {0,1, .. , 9}.\n'

In [4]:
def random_padding(img, thickness=1):
    # img = a x b image 
    [a,b] = img.shape
    Y = np.zeros(shape=[a+thickness, b+thickness])
    r_loc = np.random.choice(np.arange(thickness+1))
    c_loc = np.random.choice(np.arange(thickness+1))
    Y[r_loc:r_loc+a, c_loc:c_loc+b] = img
    return Y

In [5]:
def sample_multiclass_MNIST_padding(list_digits=['0','1', '2'], full_MNIST=[X,y], padding_thickness=10):
    # get train and test set from MNIST of given digits
    # e.g., list_digits = ['0', '1', '2']
    # pad each 28 x 28 image with zeros so that it has now "padding_thickness" more rows and columns
    # The original image is superimposed at a uniformly chosen location 
    if full_MNIST is not None:
        X, y = full_MNIST
    else:
        X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
        X = X / 255.
    Y = list2onehot(y.tolist(), list_digits)
    
    idx = [i for i in np.arange(len(y)) if y[i] in list_digits] # list of indices where the label y is in list_digits
    
    X01 = X[idx,:]
    y01 = Y[idx,:]

    X_train = []
    X_test = []
    y_test = [] # list of one-hot encodings (indicator vectors) of each label  
    y_train = [] # list of one-hot encodings (indicator vectors) of each label  

    for i in trange(X01.shape[0]):
        # for each example i, make it into train set with probabiliy 0.8 and into test set otherwise 
        U = np.random.rand() # Uniform([0,1]) variable
        img_padded = random_padding(X01[i,:].reshape(28,28), thickness=padding_thickness)
        img_padded_vec = img_padded.reshape(1,-1)
        if U<0.8:
            X_train.append(img_padded_vec[0,:].copy())
            y_train.append(y01[i,:].copy())
        else:
            X_test.append(img_padded_vec[0,:].copy())
            y_test.append(y01[i,:].copy())

    X_train = np.asarray(X_train)
    X_test = np.asarray(X_test)
    y_train = np.asarray(y_train)
    y_test = np.asarray(y_test)
    return X_train, X_test, y_train, y_test

In [6]:
# Simple MNIST binary classification experiments 

X_train, X_test, y_train, y_test = sample_multiclass_MNIST_padding(list_digits=['0','1'], 
                                                                   full_MNIST=[X,y],
                                                                   padding_thickness=0)

# data subsampling 
train_size = 100

idx = np.random.choice(np.arange(len(y_train)), train_size)
X_train0 = X_train[idx, :]/np.max(X_train)
y_train0 = y_train[idx, :]

# preprocessing 
out = []
# populate the tuple list with the data
for i in range(X_train0.shape[0]):
    item = list((X_train0[i,:].reshape(1,28,28), y_train0[i,:])) 
    out.append(item)
    
# CNN training
CNN0 = CNN(training_data = out,
           f = 5, # conv filter dim
           f_pool = 2, # maxpool filter dim
           num_filt1 = 8, # num filters for the first conv layer
           num_filt2 = 8, # num filters for the second conv layer
           conv_stride = 1,
           pool_stride = 2,
           hidden_nodes = 128)

CNN0.train(lr = 0.01,
           beta1 = 0.95,
           beta2 = 0.99,
           minibatch_size = 32,
           num_epochs = 20,
           verbose = True)



100%|██████████| 14780/14780 [00:00<00:00, 65308.99it/s]
  0%|          | 0/20 [00:00<?, ?it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A

self.img_x_dim 28
LR:0.01, MiniBatch Size:32



 25%|██▌       | 1/4 [00:03<00:10,  3.58s/it][A
 50%|█████     | 2/4 [00:07<00:07,  3.58s/it][A
 75%|███████▌  | 3/4 [00:10<00:03,  3.60s/it][A
100%|██████████| 4/4 [00:11<00:00,  2.82s/it][A
  5%|▌         | 1/20 [00:11<03:34, 11.27s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A

iteration 0, error 0.68890



 25%|██▌       | 1/4 [00:03<00:10,  3.65s/it][A
 50%|█████     | 2/4 [00:07<00:07,  3.62s/it][A
 75%|███████▌  | 3/4 [00:10<00:03,  3.59s/it][A
100%|██████████| 4/4 [00:11<00:00,  2.79s/it][A
 10%|█         | 2/20 [00:22<03:22, 11.24s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:03<00:10,  3.54s/it][A
 50%|█████     | 2/4 [00:07<00:07,  3.53s/it][A
 75%|███████▌  | 3/4 [00:10<00:03,  3.53s/it][A
100%|██████████| 4/4 [00:11<00:00,  2.75s/it][A
 15%|█▌        | 3/20 [00:33<03:09, 11.17s/it]
  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:03<00:10,  3.56s/it][A
 50%|█████     | 2/4 [00:07<00:07,  3.57s/it][A
 75%|███████▌  | 3/4 [00:10<00:03,  3.56s/it][A
100%|██████████| 4/4 [00:11<00:00,  2.79s/it][A
 15%|█▌        | 3/20 [00:44<04:12, 14.87s/it]


[0.6933378066621136,
 0.6906707405747162,
 0.6902438714813413,
 0.6889048251734811,
 0.6806493379551022,
 0.6695235884666044,
 0.6409188775856846,
 0.5593134291009453,
 0.5524240496998056,
 0.4655036662297015,
 0.33555008699342814,
 0.20025944567295076,
 0.04921552544836933,
 0.04684577436101258,
 0.12173684540981777,
 0.005389959953714185]

In [None]:
# CNN prediction

X_test /= np.max(X_test)
print('X_test.shape', X_test.shape)
out_test = []


for i in range(X_test.shape[0]):
    out_test.append((X_test[i,:].reshape(1,28,28)))
                
y_hat = CNN0.predict(image_list=out_test)

y_test_label = np.asarray(onehot2list(y_test))
P_pred = np.asarray([p[1] for p in y_hat])

compute_accuracy_metrics(Y_test=y_test_label, P_pred=P_pred, use_opt_threshold=False, verbose=True)

  0%|          | 3/2972 [00:00<02:14, 22.06it/s]

X_test.shape (2972, 784)


 32%|███▏      | 960/2972 [00:45<01:38, 20.42it/s]

In [None]:
# Draw Filters

f1 = CNN0.params.get('f1')

# make plot
nrows = 2
ncols = 4
fig, ax = plt.subplots(nrows=2, ncols=ncols, figsize=[10,4.5])

for i in np.arange(nrows):
    for j in np.arange(ncols):
        im = ax[i,j].imshow(f1[j + 4*i,0,:,:])
        ax[i,j].set_xticks([])
        ax[i,j].set_yticks([])
        fig.colorbar(im, ax=ax[i,j], fraction=0.0457, pad=0.04)
        
fig.suptitle("Learned filters for the first convolutional layer", fontsize=15)
plt.tight_layout(rect=[0, 0.03, 1, 0.9])
plt.subplots_adjust(left=0.01, right=0.9, bottom=0.1, top=0.9, wspace=0.2, hspace=0.1)
plt.savefig('MNIST_CNN_filter_ex1.pdf', bbox_inches="tight")



In [None]:
# Simple MNIST binary classification experiments 

thickness = 10
X_train, X_test, y_train, y_test = sample_multiclass_MNIST_padding(list_digits=['0','1'], 
                                                                   full_MNIST=[X,y],
                                                                   padding_thickness=thickness)

# data subsampling 
train_size = 100

idx = np.random.choice(np.arange(len(y_train)), train_size)
X_train0 = X_train[idx, :]/np.max(X_train)
y_train0 = y_train[idx, :]

# preprocessing 
out = []
# populate the tuple list with the data
for i in range(X_train0.shape[0]):
    item = list((X_train0[i,:].reshape(1,28+thickness,28+thickness), y_train0[i,:])) 
    out.append(item)
    
# FFNN training
CNN0 = CNN(training_data = out,
           f = 5, # conv filter dim
           f_pool = 2, # maxpool filter dim
           num_filt1 = 8, # num filters for the first conv layer
           num_filt2 = 8, # num filters for the second conv layer
           conv_stride = 1,
           pool_stride = 2,
           hidden_nodes = 128)

CNN0.train(lr = 0.01,
           beta1 = 0.95,
           beta2 = 0.99,
           minibatch_size = 32,
           num_epochs = 30,
           verbose = True)

# CNN prediction




idx = np.random.choice(np.arange(len(y_test)), 100)
X_test0 = X_test[idx, :]/np.max(X_test)
y_test0 = y_test[idx, :]

out_test = []

for i in range(X_test0.shape[0]):
    out_test.append((X_test0[i,:].reshape(1,28+thickness,28+thickness)))
                
y_hat = CNN0.predict(image_list=out_test)

y_test_label = np.asarray(onehot2list(y_test0))
P_pred = np.asarray([p[1] for p in y_hat])

compute_accuracy_metrics(Y_test=y_test_label, P_pred=P_pred, use_opt_threshold=False, verbose=True)





In [None]:
# compute comparative multiclass classification metrics on test data

thinkness = 10
n_filters = [2, 5, 10, 15, 20]
list_digits=['0','1','2','3','4']

# Data preprocessing

X_train, X_test, y_train, y_test = sample_multiclass_MNIST_padding(list_digits=['0','1'], 
                                                                   full_MNIST=[X,y],
                                                                   padding_thickness=thinkness)
train_size_list = [1000, 50, 100]

# make plot
ncols = len(train_size_list)
fig, ax = plt.subplots(nrows=1, ncols=ncols, figsize=[13,5])

for t in np.arange(len(train_size_list)):
    accuracy_list_test = []
    accuracy_list_train = []
    
    train_size = train_size_list[t]
    idx = np.random.choice(np.arange(len(y_train)), train_size)
    X_train0 = X_train[idx, :]/np.max(X_train)
    y_train0 = y_train[idx, :]
    
    idx = np.random.choice(np.arange(len(y_test)), 1000)
    X_test0 = X_test[idx, :]/np.max(X_test)
    y_test0 = y_test[idx, :]

    out = []
    out_train = []
    # populate the tuple list with the data
    for i in range(X_train0.shape[0]):
        item = list((X_train0[i,:].reshape(1,28+thinkness,28+thinkness), y_train0[i,:])) 
        out.append(item)
        out_train.append(X_train0[i,:].reshape(1,28+thinkness,28+thinkness))
        
    X_test /= np.max(X_test)
    out_test = []
    for i in range(X_test0.shape[0]):
        out_test.append((X_test0[i,:].reshape(1,28+thinkness,28+thinkness)))

    for M in n_filters:

        # FFNN training
        CNN0 = CNN(training_data = out,
           f = 5, # conv filter dim
           f_pool = 2, # maxpool filter dim
           num_filt1 = M, # num filters for the first conv layer
           num_filt2 = M, # num filters for the second conv layer
           conv_stride = 1,
           pool_stride = 2,
           hidden_nodes = 128)

        CNN0.train(lr = 0.01,
                   beta1 = 0.95,
                   beta2 = 0.99,
                   minibatch_size = 32,
                   num_epochs = 100,
                   verbose = True)
        
        # FFNN prediction
        print()
        y_hat_train = np.asarray(CNN0.predict(out_train))
        y_hat_test = np.asarray(CNN0.predict(out_test))

        y_train_label = np.asarray(onehot2list(y_train0))
        y_test_label = np.asarray(onehot2list(y_test0))

        results_train = multiclass_accuracy_metrics(Y_test=y_train0, P_pred=y_hat_train)
        results_test = multiclass_accuracy_metrics(Y_test=y_test0, P_pred=y_hat_test)

        accuracy_list_train.append(results_train.get('Accuracy'))
        accuracy_list_test.append(results_test.get('Accuracy'))
    
    ## Plot
    ax[t].plot(n_filters, accuracy_list_train, color='blue', label="train accuracy")
    ax[t].plot(n_filters, accuracy_list_test, color='red', label="test accuracy")
    ax[t].set_xlabel('Number of filters', fontsize=15)
    ax[t].set_ylabel('Classification Accuracy', fontsize=15)
    ax[t].title.set_text("num training ex = %i" % (train_size)) 
    ax[t].legend(fontsize=15)
            
plt.tight_layout(rect=[0, 0.03, 1, 0.9])
plt.savefig('MNIST_CNN_accuracy_ex1.pdf')

## Classifying non-aligned MNIST images 

In [None]:
# compute comparative multiclass classification metrics on test data

padding_list = [0, 5, 10]
# list_digits=['0','1','2','3','4']
list_digits=['0','1']

## Train
train_size_list = [50, 100, 200]

# make plot
ncols = len(train_size_list)
fig, ax = plt.subplots(nrows=1, ncols=ncols, figsize=[13,5])

for t in np.arange(len(train_size_list)):
    accuracy_list_test = []
    accuracy_list_train = []
    
    train_size = train_size_list[t]
    
    for thickness in padding_list:
        # Data preprocessing
        X_train, X_test, y_train, y_test = sample_multiclass_MNIST_padding(list_digits=list_digits, 
                                                                           full_MNIST=[X,y], 
                                                                           padding_thickness=thickness)
            
        idx = np.random.choice(np.arange(len(y_train)), train_size)
        X_train0 = X_train[idx, :]/np.max(X_train)
        y_train0 = y_train[idx, :]

        
        
        idx = np.random.choice(np.arange(len(y_test)), 100)
        X_test0 = X_test[idx, :]/np.max(X_test)
        y_test0 = y_test[idx, :]

        out = []
        out_train = []
        # populate the tuple list with the data
        for i in range(X_train0.shape[0]):
            item = list((X_train0[i,:].reshape(1, 28+thickness, 28+thickness), y_train0[i,:])) 
            
            out.append(item)
            out_train.append(X_train0[i,:].reshape(1, 28+thickness, 28+thickness))

        X_test /= np.max(X_test)
        out_test = []
        for i in range(X_test0.shape[0]):
            out_test.append((X_test0[i,:].reshape(1, 28+thickness, 28+thickness)))
    
    
    
        # FFNN training
        CNN0 = CNN(training_data = out,
           f = 5, # conv filter dim
           f_pool = 2, # maxpool filter dim
           num_filt1 = 10, # num filters for the first conv layer
           num_filt2 = 10, # num filters for the second conv layer
           conv_stride = 1,
           pool_stride = 2,
           hidden_nodes = 128)

        CNN0.train(lr = 0.01,
                   beta1 = 0.95,
                   beta2 = 0.99,
                   minibatch_size = 32,
                   num_epochs = 50,
                   verbose = True)

        # FFNN prediction
        print()
        y_hat_train = np.asarray(CNN0.predict(out_train))
        y_hat_test = np.asarray(CNN0.predict(out_test))

        y_train_label = np.asarray(onehot2list(y_train0))
        y_test_label = np.asarray(onehot2list(y_test0))

        results_train = multiclass_accuracy_metrics(Y_test=y_train0, P_pred=y_hat_train)
        results_test = multiclass_accuracy_metrics(Y_test=y_test0, P_pred=y_hat_test)

        accuracy_list_train.append(results_train.get('Accuracy'))
        accuracy_list_test.append(results_test.get('Accuracy'))
    
    ## Plot
    ax[t].plot(padding_list, accuracy_list_train, color='blue', label="train accuracy")
    ax[t].plot(padding_list, accuracy_list_test, color='red', label="test accuracy")
    ax[t].set_xlabel('Padding thickness', fontsize=15)
    ax[t].set_ylabel('Classification Accuracy', fontsize=15)
    ax[t].title.set_text("num training ex = %i" % (train_size)) 
    ax[t].legend(fontsize=15)
            
plt.tight_layout(rect=[0, 0.03, 1, 0.9])
plt.savefig('MNIST_CNN_accuracy_padding_ex3.pdf')