# Homework 2

Build and train a MLP Model to classify MNIST dataset

1. MLP Network accepts 1D data. So we should flatten our 2D image, then print the dimension of the result arrays.
2. Normalize data by rescaling them to (0,1)
3. Convert label arrays to 1-hot representation (`keras.utils.to_categorical`)
4. Define Model
    - Hidden Layer 1: Fully Connected + Relu Activition (e.g. 512 Neurons)
    - Hidden Layer 2: Fully Connected + Relu Activition (e.g. 512 Neurons)
    - Outout Layer: Fully Connected + Softmax Activition
      
      
5. Build and train a CNN+MLP deep learning model with Keras with followings specs for MNIST dataset:
  
   ``` 
       Conv2D(32, kernel_size=(3, 3), activation='relu')
       Conv2D(64, kernel_size=(3, 3), activation='relu')
       MaxPooling2D(pool_size=(2, 2))
       Dense(128, activation='relu')
       Dense(num_classes, activation='softmax')
   ```
     
     
6. Also build another model with BatchNormalization and Dropout. Compare these two CNN + MLP models performance for test data

In [63]:
import keras
import matplotlib.pyplot as plt # This package is for plotting
import numpy as np
import pandas as pd
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Input, Conv2D, MaxPooling2D, Dropout, Flatten, BatchNormalization, Activation
from keras.optimizers import SGD
from keras.initializers import RandomNormal
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
import warnings
%matplotlib inline  
warnings.filterwarnings('ignore')

In [None]:
keras.backend.clear_session()

In [None]:
# get data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
# normalize x data to (0, 1)
X_train = X_train/np.max(X_train)
X_test = X_test/np.max(X_test)

# normalize y data to one-hot
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

In [54]:
# collect accuracy data of each model
acc = []

## MLP NN

In [46]:
# MLP NN only accepts 1D data, should flatten 2D image
X_train = np.reshape(X_train, [-1, 28**2])
X_test = np.reshape(X_test, [-1, 28**2])

In [47]:
mlp_model = Sequential()
mlp_model.add(Dense(512, activation="relu", input_shape=(784,)))
mlp_model.add(Dense(512, activation="relu", input_shape=(784,)))
mlp_model.add(Dense(10, activation="softmax")) 

In [None]:
sgd = SGD(lr=0.01)
mlp_model.compile(
    optimizer=sgd, 
    loss='categorical_crossentropy', 
    metrics=["accuracy"]
)

mlp_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

In [55]:
loss, accuracy = mlp_model.evaluate(X_test, y_test, verbose=0)
acc.append(accuracy)
print("Accuracy = {:.5f}".format(accuracy))

Accuracy = 0.96880


In [51]:
mlp_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 512)               401920    
_________________________________________________________________
dense_14 (Dense)             (None, 512)               262656    
_________________________________________________________________
dense_15 (Dense)             (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________


## CNN + MLP

In [58]:
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)

X_train.shape

(60000, 28, 28, 1)

In [None]:
cnn_mlp_model = Sequential()
cnn_mlp_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28,28,1)))
cnn_mlp_model.add(MaxPooling2D(pool_size=(2, 2)))
cnn_mlp_model.add(Flatten())
cnn_mlp_model.add(Dense(128, activation='relu'))
cnn_mlp_model.add(Dense(10, activation='softmax'))

In [None]:
sgd = SGD(lr=0.01)
cnn_mlp_model.compile(
    optimizer=sgd, 
    loss='categorical_crossentropy', 
    metrics=["accuracy"]
)

cnn_mlp_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

In [60]:
loss, accuracy = cnn_mlp_model.evaluate(X_test, y_test, verbose=0)
acc.append(accuracy)
print("Accuracy = {:.5f}".format(accuracy))

Accuracy = 0.97600


In [52]:
cnn_mlp_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 5408)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 128)               692352    
_________________________________________________________________
dense_10 (Dense)             (None, 10)                1290      
Total params: 693,962
Trainable params: 693,962
Non-trainable params: 0
_________________________________________________________________


## CNN + MLP with BatchNormalization and Dropout

In [None]:
cnn_mlp_model_2 = Sequential()

"""
Batch normalization uses weights as usual but does NOT add a bias term. 
This is because its calculations include gamma and beta variables that 
make the bias term unnecessary
https://www.dlology.com/blog/one-simple-trick-to-train-keras-model-faster-with-batch-normalization/
"""
cnn_mlp_model_2.add(Conv2D(32, kernel_size=(3, 3), input_shape=(28,28,1), use_bias=False))
cnn_mlp_model_2.add(BatchNormalization())

cnn_mlp_model_2.add(Activation("relu"))
cnn_mlp_model_2.add(MaxPooling2D(pool_size=(2, 2)))

"""
Dropout is only used after the pooling layers, but this is just a rough heuristic"
https://machinelearningmastery.com/how-to-reduce-overfitting-with-dropout-regularization-in-keras/
"""
cnn_mlp_model_2.add(Dropout(0.5)) # use of 0.5 as dropout rate is arbitrary

cnn_mlp_model_2.add(Flatten())
cnn_mlp_model_2.add(Dense(128, activation='relu'))
cnn_mlp_model_2.add(Dense(10, activation='softmax'))

In [None]:
sgd = SGD(lr=0.01)
cnn_mlp_model_2.compile(
    optimizer=sgd, 
    loss='categorical_crossentropy', 
    metrics=["accuracy"]
)

cnn_mlp_model_2.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

In [61]:
loss, accuracy = cnn_mlp_model_2.evaluate(X_test, y_test, verbose=0)
acc.append(accuracy)
print("Accuracy = {:.5f}".format(accuracy))

Accuracy = 0.98800


In [45]:
cnn_mlp_model_2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 26, 26, 32)        288       
_________________________________________________________________
batch_normalization_1 (Batch (None, 26, 26, 32)        128       
_________________________________________________________________
activation_1 (Activation)    (None, 26, 26, 32)        0         
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 13, 13, 32)        0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 5408)              0         
_________________________________________________________________
dense_11 (Dense)             (None, 128)               692352    
__________

## Comparison

In [64]:
pd.DataFrame({ "Accuracy": acc }, index=["MLP", "CNN+MLP", "CNN+MLP+Batch+Dropout"])

Unnamed: 0,Accuracy
MLP,0.9688
CNN+MLP,0.976
CNN+MLP+Batch+Dropout,0.988
