In [2]:
import os
import csv
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow

from tensorflow import keras 
from keras import models
from keras import applications
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import *
from keras.models import *
from keras import backend as K
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

#@title Default title text
# Task 6: High Accuracy CNN for MNIST
Build your own CNN and try to achieve the highest possible accuracy on MNIST.
A basic structure is given below, play around with it.

The following model uses 2 convolutional layers, followed by 1 pooling layer, then dropout 25%, then a dense layer, another dropout layer but with 50% dropout, and finally the output layer. It reaches about 99.2% accuracy on the test set. This places this model roughly in the top 20% in the [MNIST Kaggle competition](https://www.kaggle.com/c/digit-recognizer/) (if we ignore the models with an accuracy greater than 99.79% which were most likely trained on the test set, as explained by Chris Deotte in [this post](https://www.kaggle.com/c/digit-recognizer/discussion/61480)). 

In order to reach an accuracy higher than 99.5% on the test set you might try:

a) batch normalization layers (https://keras.io/api/layers/normalization_layers/batch_normalization/)   
b) set a learning scheduler (Check Chapter 11)   
c) add image augmentation (Check Chapter 14)   
d) create an ensemble (Check Chapter 14)   
e) use hyperparameter tuning, e.g. with [keras tuner](https://www.tensorflow.org/tutorials/keras/keras_tuner) (Hyperband seems to work quite well) 

As long as you implement at least **two** of the above you will get full points on this one. 

In [27]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.mnist.load_data()
X_train_full = X_train_full / 255.
X_test = X_test / 255.
X_train, X_valid = X_train_full[:-5000], X_train_full[-5000:]
y_train, y_valid = y_train_full[:-5000], y_train_full[-5000:]

X_train = np.array(X_train[..., np.newaxis])   # 55000, 28, 28, 1)
X_valid = np.array(X_valid[..., np.newaxis])   # 5000, 28, 28, 1)
X_test = np.array(X_test[..., np.newaxis])     # 10000, 28, 28, 1)

In [19]:
type(X_train)

numpy.ndarray

In [32]:
data_gen = ImageDataGenerator(validation_split = 0.2, rotation_range = 10,
                              zoom_range = 0.1, width_shift_range = 0.1, height_shift_range = 0.1)

NoneType

In [37]:
model = keras.models.Sequential([ 
        keras.layers.BatchNormalization(),     # Batch normalization layer                      
        keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', padding='same'),
        keras.layers.BatchNormalization(),     # Batch normalization layer 
        keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
        keras.layers.MaxPool2D(),              # Max Pooling layer
        keras.layers.Flatten(),
        keras.layers.Dropout(rate=0.25),       # 0.25 Dropout layer 
        keras.layers.BatchNormalization(),     # Batch normalization layer 
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(rate=0.5),        # 0.5 Dropout layer 
        keras.layers.BatchNormalization(),     # Batch normalization layer 
        keras.layers.Dense(10, activation='softmax')
])

In [38]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])


In [None]:
data_train = data_gen.flow(X_train, y_train, shuffle = True)
history = model.fit(data_train, validation_data = (X_valid, y_valid), epochs = 50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

In [36]:
model.evaluate(X_test, y_test)



[0.03755970671772957, 0.9901999831199646]