In [1]:
import numpy as np
import pandas as pd

### Load MNIST Data

In [7]:
mnist_full = pd.read_csv('/train.csv')
mnist_test = pd.read_csv('/test.csv')
mnist_full

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
mnist_full['label'].value_counts()

1    4684
7    4401
3    4351
9    4188
2    4177
6    4137
0    4132
4    4072
8    4063
5    3795
Name: label, dtype: int64

In [9]:
y = mnist_full['label'].to_numpy()
y

array([1, 0, 1, ..., 7, 6, 9])

In [10]:
X = mnist_full.drop(columns='label').to_numpy().reshape(-1, 28, 28, 1)
X.shape

(42000, 28, 28, 1)

### Split Data

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1)

In [13]:
X_train.shape

(37800, 28, 28, 1)

## Define Build Model Function

In [14]:
import tensorflow as tf
from tensorflow.keras import *

In [15]:
def build_cnn_model(conv_layers, dense_layers, optimizer='adam', lr=0.001, dropout=0, with_batch_norm=False):
    model = Sequential([layers.Input(shape=(28, 28, 1))])
    for layer in conv_layers:
        filters, kernel_size = layer
        model.add(layers.Conv2D(filters, kernel_size, activation='relu'))
        model.add(layers.MaxPooling2D())
        if with_batch_norm:
            model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    for units in dense_layers:
        model.add(layers.Dense(units, activation='relu'))
        if dropout > 0:
            model.add(layers.Dropout(dropout))
    model.add(layers.Dense(10, activation='softmax'))
    
    select = {'adam': optimizers.Adam, 'sgd': optimizers.SGD, 'rmsprop': optimizers.RMSprop}
    model.compile(optimizer=select[optimizer](lr), loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

## Define Parameters Search Space

In [17]:
conv_blocks = [
    [(32, 4)],
    [(32, 7)],
    [(64, 4)],
    [(64, 7)],
    [(32, 4), (64, 4)], 
    [(32, 7), (64, 4)], 
    [(64, 4), (128, 4)], 
    [(64, 7), (128, 4)],
    [(32, 4), (64, 4), (128, 2)],
    [(64, 4), (128, 4), (256, 2)],
    [(32, 7), (64, 4), (128, 2)], 
    [(64, 7), (128, 4), (256, 2)]
]
dense_blocks = [
    [1024, 256, 64],
    [1024, 256, 32],
    [512, 256, 64],
    [512, 256, 32],
    [512, 128, 64],
    [512, 128, 32],
    [1024, 256],
    [1024, 128],
    [512, 64],
    [512, 32],
    [256, 64],
    [256, 32],
]
dropouts = [0, 0.15, 0.3]
optimisers = ['adam', 'rmsprop', 'sgd']
learning_rates = [0.003, 0.009, 0.03, 0.09]

ALL_PARAMS = []
BEST_ACCURACY = 0.0
BEST_PARAMS = {}
LAST_STATE = 0
for optimiser in optimisers:
    for lr in learning_rates:
        for conv in conv_blocks:
            for with_bn in [True, False]:
                for dense in dense_blocks:
                    for drop in dropouts:
                        ALL_PARAMS.append((conv, dense, optimiser, lr, drop, with_bn))
len(ALL_PARAMS)

10368

## Define Training and Evaluation Function

In [18]:
EPOCHS = 20
BATCH_SIZE = 100

In [19]:
def train_and_evaluate(model):
    early_stop = callbacks.EarlyStopping(monitor='val_acc', patience=3, restore_best_weights=True)
    model.fit(X_train, y_train, 
              epochs=EPOCHS, 
              batch_size=BATCH_SIZE, 
              validation_data=(X_val, y_val), 
              validation_batch_size=BATCH_SIZE, 
              callbacks=[early_stop],
              verbose=0)
    return model.evaluate(X_val, y_val, batch_size=BATCH_SIZE, verbose=0)[1]

## Perform Grid Search

In [25]:
for params in ALL_PARAMS[LAST_STATE:]:
    conv, dense, optimiser, lr, drop, with_bn = params
    try:
        model = build_cnn_model(conv, dense, optimiser, lr, drop, with_bn)
        acc = train_and_evaluate(model)
        if acc > BEST_ACCURACY:
            BEST_PARAMS = {'OPTIMIZER': optimiser, 'LEARNING_RATE': lr,'CONV' : conv, 
                        'BATCH_NORM': with_bn, 'DENSE': dense, 'DROPOUT': drop}
            BEST_ACCURACY = acc
        
        m = LAST_STATE + 1
        print(f'\nMODEL {m}: ACCURACY: {acc:.5f}\n\tOPTIMIZER: {optimiser}(learning_rate={lr}) \
                \n\tCONV : {conv}\tBATCH_NORM: {with_bn}\n\tDENSE: {dense}\tDROPOUT: {drop}')
    except ValueError:
        print(f'\nMODEL {m}: NON-FEASIBLE')
    LAST_STATE += 1                    

MODEL11: ACCURACY: 0.98095
	OPTIMIZER: adam(learning_rate=0.003)                 
	CONV : [(32, 4)]	BATCH_NORM: True
	DENSE: [512, 256, 32]	DROPOUT: 0.15
MODEL12: ACCURACY: 0.98048
	OPTIMIZER: adam(learning_rate=0.003)                 
	CONV : [(32, 4)]	BATCH_NORM: True
	DENSE: [512, 256, 32]	DROPOUT: 0.3
MODEL13: ACCURACY: 0.98214
	OPTIMIZER: adam(learning_rate=0.003)                 
	CONV : [(32, 4)]	BATCH_NORM: True
	DENSE: [512, 128, 64]	DROPOUT: 0
MODEL14: ACCURACY: 0.98048
	OPTIMIZER: adam(learning_rate=0.003)                 
	CONV : [(32, 4)]	BATCH_NORM: True
	DENSE: [512, 128, 64]	DROPOUT: 0.15
MODEL15: ACCURACY: 0.98000
	OPTIMIZER: adam(learning_rate=0.003)                 
	CONV : [(32, 4)]	BATCH_NORM: True
	DENSE: [512, 128, 64]	DROPOUT: 0.3
MODEL16: ACCURACY: 0.98262
	OPTIMIZER: adam(learning_rate=0.003)                 
	CONV : [(32, 4)]	BATCH_NORM: True
	DENSE: [512, 128, 32]	DROPOUT: 0
MODEL17: ACCURACY: 0.98357
	OPTIMIZER: adam(learning_rate=0.003)                 
	C

KeyboardInterrupt: ignored

In [26]:
BEST_ACCURACY

0.9859523773193359

In [27]:
BEST_PARAMS

{'BATCH_NORM': True,
 'CONV': [(32, 7)],
 'DENSE': [1024, 256, 32],
 'DROPOUT': 0.15,
 'LEARNING_RATE': 0.003,
 'OPTIMIZER': 'adam'}

## Build Best Model

In [None]:
best_model = build_cnn_model(conv_layers=BEST_PARAMS['CONV'], 
                             dense_layers=BEST_PARAMS['DENSE'],
                             optimizer=BEST_PARAMS['OPTIMIZER'], 
                             lr=BEST_PARAMS['LEARNING_RATE'], 
                             dropout=BEST_PARAMS['DROPOUT'], 
                             with_batch_norm=BEST_PARAMS['BATCH_NORM'])

In [None]:
model.summary()

### Retrain on entire data

In [None]:
model.fit(X, y, epochs=EPOCHS, batch_size=BATCH_SIZE)

### Predict on Test data

In [None]:
mnist_test

In [None]:
X_test = mnist_test.to_numpy().reshape(-1, 28, 28, 1)
X_test.shape

In [None]:
y_pred = model.predict(X_test)
y_pred

In [None]:
predictions = y_pred.argmax(axis=-1)
predictions

## Create Submission

In [None]:
submission = pd.DataFrame({'ImageId': range(1, len(y_pred) + 1), 'Label': predictions})
submission

In [None]:
submission.to_csv('/submission.csv', index=False)