# Score: 0.9919

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Load  Data

In [2]:
mnist_full = pd.read_csv('./data/train.csv')
mnist_test = pd.read_csv('./data/test.csv')
mnist_full

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
mnist_full['label'].value_counts()

1    4684
7    4401
3    4351
9    4188
2    4177
6    4137
0    4132
4    4072
8    4063
5    3795
Name: label, dtype: int64

In [4]:
y = mnist_full['label'].to_numpy()
y

array([1, 0, 1, ..., 7, 6, 9])

In [5]:
X = mnist_full.drop(columns='label').to_numpy().reshape(-1, 28, 28, 1)
X.shape

(42000, 28, 28, 1)

### Split Data

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1)

In [8]:
X_train.shape

(37800, 28, 28, 1)

## Build Model

In [9]:
import tensorflow as tf
from tensorflow.keras import *

In [10]:
def build_cnn_model(conv_layers, dense_layers, optimizer='adam', lr=0.001, dropout=0, with_batch_norm=False):
    model = Sequential([layers.Input(shape=(28, 28, 1))])
    for layer in conv_layers:
        filters, kernel_size = layer
        model.add(layers.Conv2D(filters, kernel_size, activation='relu'))
        model.add(layers.MaxPooling2D())
        if with_batch_norm:
            model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    for units in dense_layers:
        model.add(layers.Dense(units, activation='relu'))
        if dropout > 0:
            model.add(layers.Dropout(dropout))
    model.add(layers.Dense(10, activation='softmax'))
    
    select = {'adam': optimizers.Adam, 'sgd': optimizers.SGD, 'rmsprop': optimizers.RMSprop}
    model.compile(optimizer=select[optimizer](lr), loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

## Define Parameters

In [11]:
conv_blocks = [
    [(32, 4), (64, 4)], 
    [(32, 4), (64, 4), (128, 4)], 
    [(32, 4), (64, 4), (128, 4), (256, 4)], 
    [(32, 7), (64, 4)], 
    [(32, 7), (64, 4), (128, 4)], 
    [(32, 7), (64, 4), (128, 4), (256, 4)], 
    [(64, 4), (128, 4)], 
    [(64, 4), (128, 4), (256, 4)], 
    [(64, 7), (128, 4)], 
    [(64, 7), (128, 4), (256, 4)]
]
dense_blocks = [
    [1024, 256, 64],
    [1024, 256, 32],
    [512, 256, 64],
    [512, 256, 32],
    [512, 128, 64],
    [512, 128, 32],
    [1024, 256],
    [1024, 128],
    [512, 64],
    [512, 32],
    [256, 64],
    [256, 32],
]
dropouts = [0, 0.15, 0.3]
optimisers = ['adam', 'rmsprop', 'sgd']
learning_rates = [0.003, 0.009, 0.03, 0.09]

## Training

In [12]:
EPOCHS = 20
BATCH_SIZE = 100

In [13]:
def train_and_evaluate(model):
    early_stop = callbacks.EarlyStopping(monitor='val_acc', patience=3, restore_best_weights=True)
    model.fit(X_train, y_train, 
              epochs=EPOCHS, 
              batch_size=BATCH_SIZE, 
              validation_data=[X_val, y_val], 
              validation_batch_size=BATCH_SIZE, 
              callbacks=[early_stop],
              verbose=0)
    return model.evaluate(X_val, y_val, batch_size=BATCH_SIZE, verbose=0)[1]

## Grid Search

In [14]:
BEST_ACCURACY = 0
BEST_PARAMS = {}

m = 1
for optimiser in optimisers:
    for lr in learning_rates:
        for conv in conv_blocks:
            for bn in [True, False]:
                for dense in dense_blocks:
                    for drop in dropouts:
                        model = build_cnn_model(conv, dense, optimiser, lr, drop, with_batch_norm=bn)
                        acc = train_and_evaluate(model)
                        print(f'MODEL {m}:\n\tACCURACY: {acc:.5f}\n\tOPTIMIZER: {optimiser}\n\tLEARNING_RATE: {lr} \
                                \n\tCONV : {conv}\n\tBATCH_NORM: {bn}\n\tDENSE: {dense}\n\tDROPOUT: {drop}')
                        if acc > BEST_ACCURACY:
                            BEST_PARAMS = {'OPTIMIZER': optimiser, 'LEARNING_RATE': lr,'CONV' : conv, 
                                           'BATCH_NORM': bn, 'DENSE': dense, 'DROPOUT': drop}
                            BEST_ACCURACY = acc
                        m += 1

2022-03-07 11:19:03.264474: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-07 11:19:03.264554: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-03-07 11:19:03.435028: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Metal device set to: Apple M1


2022-03-07 11:19:03.594480: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-03-07 11:19:07.870228: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


MODEL 1:
	ACCURACY: 0.98905
	OPTIMIZER: adam
	LEARNING_RATE: 0.003                                 
	CONV : [(32, 4), (64, 4)]
	BATCH_NORM: True
	DENSE: [1024, 256, 64]
	DROPOUT: 0


2022-03-07 11:20:02.873093: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-03-07 11:20:07.214832: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


MODEL 2:
	ACCURACY: 0.98476
	OPTIMIZER: adam
	LEARNING_RATE: 0.003                                 
	CONV : [(32, 4), (64, 4)]
	BATCH_NORM: True
	DENSE: [1024, 256, 64]
	DROPOUT: 0.15


2022-03-07 11:20:33.759390: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-03-07 11:20:37.957652: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


MODEL 3:
	ACCURACY: 0.98571
	OPTIMIZER: adam
	LEARNING_RATE: 0.003                                 
	CONV : [(32, 4), (64, 4)]
	BATCH_NORM: True
	DENSE: [1024, 256, 64]
	DROPOUT: 0.3


2022-03-07 11:21:05.159108: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-03-07 11:21:09.316531: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


MODEL 4:
	ACCURACY: 0.98214
	OPTIMIZER: adam
	LEARNING_RATE: 0.003                                 
	CONV : [(32, 4), (64, 4)]
	BATCH_NORM: True
	DENSE: [1024, 256, 32]
	DROPOUT: 0


2022-03-07 11:21:36.040346: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-03-07 11:21:40.271338: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


MODEL 5:
	ACCURACY: 0.98595
	OPTIMIZER: adam
	LEARNING_RATE: 0.003                                 
	CONV : [(32, 4), (64, 4)]
	BATCH_NORM: True
	DENSE: [1024, 256, 32]
	DROPOUT: 0.15


2022-03-07 11:22:20.187872: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-03-07 11:22:24.564822: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


KeyboardInterrupt: 

In [15]:
BEST_ACCURACY

0.9890475869178772

In [16]:
BEST_PARAMS

{'OPTIMIZER': 'adam',
 'LEARNING_RATE': 0.003,
 'CONV': [(32, 4), (64, 4)],
 'BATCH_NORM': True,
 'DENSE': [1024, 256, 64],
 'DROPOUT': 0}

## Retrain on entire data

In [None]:
model.fit(X, y, epochs=2, batch_size=1)

### Predict on Test data

In [None]:
mnist_test

In [None]:
X_test = mnist_test.to_numpy().reshape(-1, 28, 28, 1)
X_test.shape

In [None]:
y_pred = model.predict(X_test)
y_pred

In [None]:
predictions = y_pred.argmax(axis=-1)
predictions

## Create Submission

In [None]:
submission = pd.DataFrame({'ImageId': range(1, len(y_pred) + 1), 'Label': predictions})
submission

In [None]:
submission.to_csv('data/submission.csv', index=False)