# MNIST digit classification: Convolutional Neural Network (CNN) v.s. fully connected Neural Network (fcNN)

***

**Goal** In this notebook, we will compare the performance of a fully connected neural network (fcNN) and a convolutional neural network (CNN) on the MNIST dataset. We will see that the CNN performs better than the fcNN.

**Dataset**: We will work with the MNIST dataset which contains 60'000 28x28 pixel greyscale images of digits and want to classify them into the right label (0-9).

***

## Preparation and Imports

A pre requirement for this notebook is the installation of tensorflow 2.x

In [None]:
import tensorflow as tf
if (not tf.__version__.startswith('2')): #Checking if tf 2.0 is installed
    print('Please install tensorflow 2.0 to run this notebook')
print('Tensorflow version: ',tf.__version__)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('default')
from sklearn.metrics import confusion_matrix

import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Convolution2D, MaxPooling2D, Flatten , Activation
from tensorflow.keras.utils import to_categorical 
from tensorflow.keras import optimizers

## Import data

We will use the MNIST dataset which is available in the tensorflow datasets package. The dataset is already split into a training and test set and can be imported as seen in the second column.

In [None]:
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# separate x_train in X_train and X_val, same for y_train
X_train=x_train[0:50000] / 255 #divide by 255 so that they are in range 0 to 1
Y_train=keras.utils.to_categorical(y_train[0:50000],10) # one-hot encoding

X_val=x_train[50000:60000] / 255
Y_val=keras.utils.to_categorical(y_train[50000:60000],10)

X_test=x_test / 255
Y_test=keras.utils.to_categorical(y_test,10)

del x_train, y_train, x_test, y_test

X_train=np.reshape(X_train, (X_train.shape[0],28,28,1))
X_val=np.reshape(X_val, (X_val.shape[0],28,28,1))
X_test=np.reshape(X_test, (X_test.shape[0],28,28,1))

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_val.shape)
print(Y_test.shape)

To visualize the data we will use the code below to show the first 10 images of the training set.

In [None]:
# visualize the 4 first mnist images before shuffling the pixels
plt.figure(figsize=(12,12))
for i in range(0,2):
    for j in range(0, 5):
        plt.subplot(5,5,(i*10+j+1))
        plt.imshow((X_train[i*10+j,:,:,0]),cmap="gray")
        plt.title('true label: '+np.str(np.argmax(Y_train,axis=1)[i*10+j]))

In [None]:
# prepare data for fcNN - we need a vector as input
X_train_flat = X_train.reshape([X_train.shape[0], 784])
X_val_flat = X_val.reshape([X_val.shape[0], 784])
X_test_flat = X_test.reshape([X_test.shape[0], 784])

# check the shape
print(X_train_flat.shape)
print(Y_train.shape)
print(X_val_flat.shape)
print(Y_val.shape)

### fcNN 1: Default fcNN (sigmoid only)

In [None]:
# define fcNN with 2 hidden layers
model_fcNN = Sequential()

model_fcNN.add(Dense(100, batch_input_shape=(None, 784)))
model_fcNN.add(Activation('sigmoid'))
model_fcNN.add(Dense(50))
model_fcNN.add(Activation('sigmoid'))
model_fcNN.add(Dense(10))
model_fcNN.add(Activation('softmax'))

### CNN 1: Convolutional Neural Network (CNN)

In [None]:
model_cnn = Sequential()

model_cnn.add(Convolution2D(32, (3, 3), padding = 'valid', input_shape=(28, 28, 1)))
model_cnn.add(MaxPooling2D(pool_size=(2, 2)))
model_cnn.add(Convolution2D(64, (3, 3), padding = 'valid'))
model_cnn.add(MaxPooling2D(pool_size=(2, 2)))

model_cnn.add(Flatten())
model_cnn.add(Dense(10))
model_cnn.add(Activation('softmax'))

## Compile and Train Model

In [None]:
# compile model and intitialize weights
model_fcNN.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
              
# compile model and intitialize weights
model_cnn.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model_fcNN.summary()

In [None]:
model_cnn.summary()

Tain the fcNN and CNN models with the same parameters and compare the results.

In [None]:
# train the model
history_fcNN=model_fcNN.fit(X_train_flat, Y_train, 
                            batch_size=128, 
                            epochs=10,
                            verbose=2, 
                            validation_data=(X_val_flat, Y_val)
                           )

In [None]:
# train the model
history_cnn=model_cnn.fit(X_train, Y_train, 
                  batch_size=128, 
                  epochs=10,
                  verbose=2, 
                  validation_data=(X_val, Y_val)
                 )

## Compare the results

Comparing the results will show that the CNN performs better than the fcNN.

In [None]:
# plot the development of the accuracy and loss from fcNN and CNN during training
plt.figure(figsize=(12,4))
plt.subplot(1,2,(1))
plt.plot(history_fcNN.history['accuracy'],linestyle='-.')
plt.plot(history_fcNN.history['val_accuracy'])
plt.plot(history_cnn.history['accuracy'],linestyle='-.')
plt.plot(history_cnn.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_fcNN', 'valid_fcNN', 'train_CNN', 'valid_CNN'], loc='lower right')
plt.subplot(1,2,(2))
plt.plot(history_fcNN.history['loss'],linestyle='-.')
plt.plot(history_fcNN.history['val_loss'])
plt.plot(history_cnn.history['loss'],linestyle='-.')
plt.plot(history_cnn.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_fcNN', 'valid_fcNN', 'train_CNN', 'valid_CNN'], loc='upper right')

### fcNN

In [None]:
pred_fcNN=model_fcNN.predict(X_test_flat)
print(confusion_matrix(np.argmax(Y_test,axis=1),np.argmax(pred_fcNN,axis=1)))
acc_fc_orig = np.sum(np.argmax(Y_test,axis=1)==np.argmax(pred_fcNN,axis=1))/len(pred_fcNN)
print("Acc_fc_orig_flat = " , acc_fc_orig)

### CNN

In [None]:
pred_cnn=model_cnn.predict(X_test)
print(confusion_matrix(np.argmax(Y_test,axis=1),np.argmax(pred_cnn,axis=1)))
acc_fc_orig = np.sum(np.argmax(Y_test,axis=1)==np.argmax(pred_cnn,axis=1))/len(pred_cnn)
print("Acc_fc_orig_flat = " , acc_fc_orig)