# MNIST CNN - Assignment 5

### Imports

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
from IPython.display import display
params = {'legend.fontsize': 16,
          'legend.handlelength': 2,
          'figure.figsize': (14,12),
          'axes.titlesize': 16,
          'axes.labelsize': 16
         }
plt.rcParams.update(params)

In [None]:
# Keras models and layers
import h5py
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Input, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization

### Global Vars

In [None]:
batch_size = 128
num_classes = 10
epochs = 1

### Import Dataset and One-Hotize the output

Notice that the images are reshaped to add a single channel (line 9 & 10)

In [None]:
# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print("X_train original shape", x_train.shape)
print("y_train original shape", y_train.shape)

x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train /= 255
x_test /= 255

print('x_train shape (after adding channels):', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# CNN Model1

In [None]:
model1 = Sequential()
model1.add(Conv2D(32, kernel_size=(3, 3), strides=(1,1), activation='relu', input_shape=input_shape, name="Input_Conv2D_1"))
model1.add(Conv2D(64, kernel_size=(3, 3), strides=(1,1), activation='relu', name='Conv2D_2'))
model1.add(MaxPooling2D(pool_size=(2, 2), name='MaxPool_2'))
model1.add(Dropout(0.25, name='Dropout_2'))
model1.add(Flatten(name='Flatten'))
model1.add(Dense(128, activation='relu', name='Dense_3'))
model1.add(Dropout(0.5, name='Dropout_3'))
model1.add(Dense(num_classes, activation='softmax', name='Softmax_Output'))
model1.summary()

In [None]:
model1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model1.fit(x_train, y_train,batch_size=batch_size, epochs=epochs,
           validation_split=0.1, verbose=1)

In [None]:
score1 = model1.evaluate(x_test, y_test)
print()
print('Test accuracy: ', score1[1])

In [None]:
model1.save("mnist_cnn.h5", overwrite=True)

# CNN Model2 with Batch Normalization

In [None]:
model2 = Sequential()

model2.add(Conv2D(32, kernel_size=(3, 3), strides=(1,1), input_shape=input_shape, name='Input_Conv2D_1', activation='relu'))
model2.add(BatchNormalization(axis=-1, name='BatchNorm_1'))

model2.add(Conv2D(64, kernel_size=(3, 3), strides=(1,1), activation='relu', name='Conv2D_2'))
model2.add(BatchNormalization(axis=-1, name='BatchNorm_2'))
model2.add(MaxPooling2D(pool_size=(2, 2), name='MaxPool_2'))
model2.add(Dropout(0.25, name='Dropout_2'))

model2.add(Conv2D(64, kernel_size=(3, 3), strides=(1,1), name='Conv2D_3', activation='relu'))
model2.add(BatchNormalization(axis=-1, name='BatchNorm_3'))
model2.add(MaxPooling2D(pool_size=(2,2), name='MaxPool_3'))

model2.add(Flatten(name='Flatten')) # Fully connected layer
model2.add(BatchNormalization(name='BatchNorm_Flatten'))

model2.add(Dense(128, name='Dense_5', activation='relu'))
model2.add(BatchNormalization(name='BatchNorm_5'))
model2.add(Dropout(0.5, name='Dropout_5'))

model2.add(Dense(num_classes, activation='softmax', name='SoftMax_Output'))
model2.summary()

In [None]:
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model2.fit(x_train, y_train,batch_size=batch_size, epochs=epochs,
           validation_split=0.1, verbose=1)

In [None]:
score2 = model2.evaluate(x_test, y_test)
print()
print('Test accuracy: ', score2[1])

In [None]:
model2.save("mnist_cnn_batchnorm.h5", overwrite=True)

# Loading Saved Models

In [None]:
from keras.models import load_model
import keras.backend as K

In [None]:
loaded_model = load_model("mnist_cnn_batchnorm.h5")

# Visualize Weights

In [None]:
for l in loaded_model.layers:
    if l.name == 'Input_Conv2D_1':
        weights, biases = l.get_weights()
        print weights.shape, biases.shape
        activations = l.output
        print activations

### Plot histogram of all the weights from first convolutional layer named `Input_Conv2D_1`

In [None]:
plt.hist(weights.reshape(288))

### Visualize the weights of `15th` kernel from layer `Input_Conv2D_1`

In [None]:
plt.imshow(weights[:,:,:,15].reshape(3,3), cmap='gray')
plt.grid('off')
plt.colorbar()

### Visualize all kernels from layer `Input_Conv2D_1`

In [None]:
fig, axes = plt.subplots(nrows=6, ncols=6)
for i, ax in enumerate(axes.flat):
    if i < weights.shape[3]:
        im = ax.imshow(weights[:,:,:,i].reshape(weights.shape[0], weights.shape[1]))
    ax.xaxis.set_major_formatter(plt.NullFormatter())
    ax.yaxis.set_major_formatter(plt.NullFormatter())
    ax.grid(False)
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.1, 0.05, 0.8])
fig.colorbar(im, cax=cbar_ax)

# Visualize Activations

`get_layer_output` function takes in the inputs from `layer 0` and outputs activation from `layer 2`

In [None]:
get_layer_output = K.function([loaded_model.layers[0].input], [loaded_model.layers[7].output])

sample number `13` which happens to be a `zero` is provided for activation

In [None]:
sample_num = 13
activations = get_layer_output([[x_test[sample_num]]])[0]
plt.grid('off')
plt.imshow(x_test[sample_num].reshape(28,28), cmap='gray')

In [None]:
fig, axes = plt.subplots(nrows=8, ncols=8)
for i, ax in enumerate(axes.flat):
    if i < activations.shape[3]:
        im = ax.imshow(activations[:,:,:,i].reshape(activations.shape[1], activations.shape[2]))
    ax.xaxis.set_major_formatter(plt.NullFormatter())
    ax.yaxis.set_major_formatter(plt.NullFormatter())
    ax.grid(False)
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.1, 0.05, 0.8])
fig.colorbar(im, cax=cbar_ax)

---
# Assignment 5

1. Retrain the models for longer epochs and test if the accuracy gets better.
2. Generate statistics/ROC curves for the predictions using batch normalized model.
3. Import pretrained models using the following code. In the below snippet I am using VGG16. Note that if you initialize with `weights=None` the weights will be assigned to random. Use this model to visualize weights and activations as above. HINT: to visualize activations you need to provide a 3 channel image (use google), the image size can be determined from the model. You will also have to crop/rotate etc. MNIST data won't work here.
```
from keras.applications import mobilenet
#Load the model, defaults to pretrained weights from Imagenet
mobile_model = mobilenet.MobileNet()
# Print summary
mobile_model.summary()
```