## Score: ```0.9915```
## Best Params:
```
 'OPTIMIZER': 'sgd',
 'LEARNING_RATE': 0.09,
 'CONV': [(64, 7), (128, 4)],
 'BATCH_NORM': True,
 'DENSE': [1024, 256],
 'DROPOUT': 0.2
```

In [23]:
import numpy as np
import pandas as pd

### Load MNIST Data

In [24]:
mnist_full = pd.read_csv('data/train.csv')
mnist_test = pd.read_csv('data/test.csv')
mnist_full

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
mnist_full['label'].value_counts()

1    4684
7    4401
3    4351
9    4188
2    4177
6    4137
0    4132
4    4072
8    4063
5    3795
Name: label, dtype: int64

In [26]:
y = mnist_full['label'].to_numpy()
y

array([1, 0, 1, ..., 7, 6, 9])

In [27]:
X = mnist_full.drop(columns='label').to_numpy().reshape(-1, 28, 28, 1)
X.shape

(42000, 28, 28, 1)

### Split Data

In [28]:
from sklearn.model_selection import train_test_split

In [29]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1)

In [30]:
X_train.shape

(37800, 28, 28, 1)

## Define Build Model Function

In [31]:
import tensorflow as tf
from tensorflow.keras import *

In [32]:
def build_cnn_model(conv_layers, dense_layers, optimizer='adam', lr=0.001, dropout=0, with_batch_norm=False):
    model = Sequential([layers.Input(shape=(28, 28, 1))])
    for layer in conv_layers:
        filters, kernel_size = layer
        model.add(layers.Conv2D(filters, kernel_size, activation='relu'))
        model.add(layers.MaxPooling2D())
        if with_batch_norm:
            model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    for units in dense_layers:
        model.add(layers.Dense(units, activation='relu'))
        if dropout > 0:
            model.add(layers.Dropout(dropout))
    model.add(layers.Dense(10, activation='softmax'))
    
    select = {'adam': optimizers.Adam, 'sgd': optimizers.SGD}
    model.compile(optimizer=select[optimizer](lr), loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

## Define Parameters Search Space

In [33]:
conv_blocks = [
    [(32, 7), (64, 4)],
    [(64, 7), (128, 4)],
    [(32, 7), (64, 4), (128, 2)], 
    [(64, 7), (128, 4), (256, 2)]
]
dense_blocks = [
    [1024, 256, 64],
    [512, 256, 32],
    [512, 128, 32],
    [1024, 256],
    [512, 64],
    [256, 32],
]
optimisers = ['adam', 'sgd']
learning_rates = [0.009, 0.03, 0.09]

ALL_PARAMS = []
BEST_ACCURACY = 0.0
BEST_PARAMS = {}

for optimiser in optimisers:
    for lr in learning_rates:
        for conv in conv_blocks:
            for dense in dense_blocks:
                ALL_PARAMS.append((conv, dense, optimiser, lr, 0.2, True))
len(ALL_PARAMS)

144

## Define Training and Evaluation Function

In [34]:
EPOCHS = 20
BATCH_SIZE = 100

In [35]:
early_stop = callbacks.EarlyStopping(monitor='val_acc', patience=5, restore_best_weights=True)
def train_and_evaluate(model):
    model.fit(X_train, y_train, 
              epochs=EPOCHS, 
              batch_size=BATCH_SIZE, 
              validation_data=(X_val, y_val), 
              validation_batch_size=BATCH_SIZE, 
              callbacks=[early_stop],
              verbose=0)
    return model.evaluate(X_val, y_val, batch_size=BATCH_SIZE, verbose=0)[1]

## Perform Grid Search

In [36]:
LAST_STATE = 0
STOP_STATE = len(ALL_PARAMS)

In [37]:
for params in ALL_PARAMS[LAST_STATE:STOP_STATE]:
    conv, dense, optimiser, lr, drop, with_bn = params
    try:
        model = build_cnn_model(conv, dense, optimiser, lr, drop, with_bn)
        acc = train_and_evaluate(model)
        if acc > BEST_ACCURACY:
            BEST_PARAMS = {'OPTIMIZER': optimiser, 'LEARNING_RATE': lr,'CONV' : conv, 
                        'BATCH_NORM': with_bn, 'DENSE': dense, 'DROPOUT': drop}
            BEST_ACCURACY = acc
        
        m = LAST_STATE + 1
        print(f'\nMODEL {m}: ACCURACY: {acc:.5f}\n\tOPTIMIZER: {optimiser}(learning_rate={lr}) \
                \n\tCONV : {conv}\tBATCH_NORM: {with_bn}\n\tDENSE: {dense}\tDROPOUT: {drop}')
    except ValueError:
        print(f'\nMODEL {m}: NON-FEASIBLE')
    LAST_STATE += 1                    

2022-03-13 19:35:58.498656: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-03-13 19:36:03.207578: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


KeyboardInterrupt: 

In [None]:
BEST_ACCURACY

0.0

In [39]:
BEST_PARAMS

## Build Best Model

In [40]:
best_model = build_cnn_model(conv_layers=BEST_PARAMS['CONV'], 
                             dense_layers=BEST_PARAMS['DENSE'],
                             optimizer=BEST_PARAMS['OPTIMIZER'], 
                             lr=BEST_PARAMS['LEARNING_RATE'], 
                             dropout=BEST_PARAMS['DROPOUT'], 
                             with_batch_norm=BEST_PARAMS['BATCH_NORM'])

In [41]:
best_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 22, 22, 64)        3200      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 11, 11, 64)       0         
 2D)                                                             
                                                                 
 batch_normalization_4 (Batc  (None, 11, 11, 64)       256       
 hNormalization)                                                 
                                                                 
 conv2d_5 (Conv2D)           (None, 8, 8, 128)         131200    
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 4, 4, 128)        0         
 2D)                                                             
                                                      

### Retrain

In [42]:
best_model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
          validation_data=(X_val, y_val), callbacks=[early_stop])

Epoch 1/20
  1/378 [..............................] - ETA: 1:36 - loss: 2.8046 - acc: 0.1100

2022-03-13 19:36:33.644082: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




2022-03-13 19:36:41.384423: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


<keras.callbacks.History at 0x16bee5ee0>

In [43]:
best_model.evaluate(X_val, y_val, batch_size=BATCH_SIZE)



[0.020185550674796104, 0.9952380657196045]

### Predict on Test data

In [48]:
mnist_test

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [49]:
X_test = mnist_test.to_numpy().reshape(-1, 28, 28, 1)
X_test.shape

(28000, 28, 28, 1)

In [50]:
y_pred = best_model.predict(X_test)
y_pred

array([[3.10608261e-10, 1.27968594e-10, 1.00000000e+00, ...,
        4.82168576e-11, 1.15056284e-10, 1.57078343e-13],
       [9.99998927e-01, 2.96706282e-09, 1.00896713e-07, ...,
        2.99831342e-08, 1.33479594e-09, 4.64673029e-08],
       [9.87519615e-12, 4.80399227e-11, 8.45413930e-12, ...,
        3.79033749e-11, 5.37021982e-09, 9.99999046e-01],
       ...,
       [8.67900750e-19, 5.27182752e-17, 2.03704902e-15, ...,
        4.83783478e-14, 1.52993361e-14, 1.48440254e-14],
       [1.36805817e-10, 1.75050599e-11, 1.00222380e-12, ...,
        6.45813014e-10, 4.32612002e-11, 9.99999762e-01],
       [1.84120991e-12, 3.50754083e-12, 1.00000000e+00, ...,
        8.52272680e-13, 1.73090597e-10, 2.84664205e-15]], dtype=float32)

In [51]:
predictions = y_pred.argmax(axis=-1)
predictions

array([2, 0, 9, ..., 3, 9, 2])

## Create Submission

In [52]:
submission = pd.DataFrame({'ImageId': range(1, len(y_pred) + 1), 'Label': predictions})
submission

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [54]:
submission.to_csv('data/submission.csv', index=False)