# Score: 0.9919

In [None]:
import numpy as np
import pandas as pd

### Load MNIST Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
mnist_full = pd.read_csv('/content/drive/MyDrive/train.csv')
mnist_full

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
mnist_full['label'].value_counts()

1    4684
7    4401
3    4351
9    4188
2    4177
6    4137
0    4132
4    4072
8    4063
5    3795
Name: label, dtype: int64

In [None]:
y = mnist_full['label'].to_numpy()
y

array([1, 0, 1, ..., 7, 6, 9])

In [None]:
X = mnist_full.drop(columns='label').to_numpy().reshape(-1, 28, 28, 1)
X.shape

(42000, 28, 28, 1)

### Split Data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1)

In [None]:
X_train.shape

(37800, 28, 28, 1)

## Build Model

In [None]:
import tensorflow as tf
from tensorflow.keras import *

In [None]:
def build_cnn_model(conv_layers, dense_layers, optimizer='adam', lr=0.001, dropout=0, with_batch_norm=False):
    model = Sequential([layers.Input(shape=(28, 28, 1))])
    for layer in conv_layers:
        filters, kernel_size = layer
        model.add(layers.Conv2D(filters, kernel_size, activation='relu'))
        model.add(layers.MaxPooling2D())
        if with_batch_norm:
            model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    for units in dense_layers:
        model.add(layers.Dense(units, activation='relu'))
        if dropout > 0:
            model.add(layers.Dropout(dropout))
    model.add(layers.Dense(10, activation='softmax'))
    
    select = {'adam': optimizers.Adam, 'sgd': optimizers.SGD, 'rmsprop': optimizers.RMSprop}
    model.compile(optimizer=select[optimizer](lr), loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

## Define Parameters

In [31]:
conv_blocks = [
    [(32, 7), (64, 4)],
    [(64, 7), (128, 4)],
    [(32, 7), (128, 4)], 
    [(64, 7), (256, 4)],
    [(32, 7)], 
    [(64, 7)]
]
dense_blocks = [
    [1024, 256, 64],
    [512, 256, 32],
    [512, 128, 32],
    [1024, 256],
    [512, 64],
    [256, 32],
]
optimisers = ['adam', 'sgd']
learning_rates = [0.009, 0.03, 0.09]

ALL_PARAMS = []
for optimiser in optimisers:
    for lr in learning_rates:
        for conv in conv_blocks:
            for dense in dense_blocks:
                ALL_PARAMS.append((conv, dense, optimiser, lr, 0.2, True))
len(ALL_PARAMS) 

216

In [None]:
BEST_ACCURACY = 0.0
BEST_PARAMS = {}

## Training

In [32]:
EPOCHS = 20
BATCH_SIZE = 100

In [33]:
def train_and_evaluate(model):
    early_stop = callbacks.EarlyStopping(monitor='val_acc', patience=3, restore_best_weights=True)
    model.fit(X_train, y_train, 
              epochs=EPOCHS, 
              batch_size=BATCH_SIZE, 
              validation_data=(X_val, y_val), 
              validation_batch_size=BATCH_SIZE, 
              callbacks=[early_stop],
              verbose=0)
    return model.evaluate(X_val, y_val, batch_size=BATCH_SIZE, verbose=0)[1]

## Grid Search

In [34]:
LAST_STATE = 170

In [35]:
for params in ALL_PARAMS[LAST_STATE:]:
    conv, dense, optimiser, lr, drop, with_bn = params
    try:
        model = build_cnn_model(conv, dense, optimiser, lr, drop, with_bn)
        acc = train_and_evaluate(model)
        if acc > BEST_ACCURACY:
            BEST_PARAMS = {'OPTIMIZER': optimiser, 'LEARNING_RATE': lr,'CONV' : conv, 
                        'BATCH_NORM': with_bn, 'DENSE': dense, 'DROPOUT': drop}
            BEST_ACCURACY = acc
        
        m = LAST_STATE + 1
        print(f'\nMODEL {m}: ACCURACY: {acc:.5f}\n\tOPTIMIZER: {optimiser}(learning_rate={lr}) \
                \n\tCONV : {conv}\tBATCH_NORM: {with_bn}\n\tDENSE: {dense}\tDROPOUT: {drop}')
    except ValueError:
        print(f'\nMODEL {m}: NON-FEASIBLE')
    LAST_STATE += 1                    


MODEL 171: ACCURACY: 0.99024
	OPTIMIZER: sgd(learning_rate=0.03)                 
	CONV : [(32, 7), (64, 4)]	BATCH_NORM: True
	DENSE: [512, 128, 32]	DROPOUT: 0

MODEL 172: ACCURACY: 0.99095
	OPTIMIZER: sgd(learning_rate=0.03)                 
	CONV : [(32, 7), (64, 4)]	BATCH_NORM: True
	DENSE: [1024, 256]	DROPOUT: 0

MODEL 173: ACCURACY: 0.98810
	OPTIMIZER: sgd(learning_rate=0.03)                 
	CONV : [(32, 7), (64, 4)]	BATCH_NORM: True
	DENSE: [512, 64]	DROPOUT: 0

MODEL 174: ACCURACY: 0.98857
	OPTIMIZER: sgd(learning_rate=0.03)                 
	CONV : [(32, 7), (64, 4)]	BATCH_NORM: True
	DENSE: [256, 32]	DROPOUT: 0

MODEL 175: ACCURACY: 0.98833
	OPTIMIZER: sgd(learning_rate=0.03)                 
	CONV : [(64, 7), (128, 4)]	BATCH_NORM: True
	DENSE: [1024, 256, 64]	DROPOUT: 0

MODEL 176: ACCURACY: 0.98952
	OPTIMIZER: sgd(learning_rate=0.03)                 
	CONV : [(64, 7), (128, 4)]	BATCH_NORM: True
	DENSE: [512, 256, 32]	DROPOUT: 0

MODEL 177: ACCURACY: 0.98905
	OPTIMIZER: sg

In [60]:
BEST_ACCURACY

0.9926190376281738

In [61]:
BEST_PARAMS

{'BATCH_NORM': True,
 'CONV': [(32, 7), (64, 4)],
 'DENSE': [512, 64],
 'DROPOUT': 0,
 'LEARNING_RATE': 0.09,
 'OPTIMIZER': 'sgd'}

## Build Best Model

In [65]:
best_model = build_cnn_model(conv_layers=BEST_PARAMS['CONV'], 
                             dense_layers=BEST_PARAMS['DENSE'],
                             optimizer=BEST_PARAMS['OPTIMIZER'], 
                             lr=BEST_PARAMS['LEARNING_RATE'], 
                             dropout=BEST_PARAMS['DROPOUT'], 
                             with_batch_norm=BEST_PARAMS['BATCH_NORM'])

In [66]:
model.summary()

Model: "sequential_262"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_644 (Conv2D)         (None, 22, 22, 64)        3200      
                                                                 
 max_pooling2d_644 (MaxPooli  (None, 11, 11, 64)       0         
 ng2D)                                                           
                                                                 
 batch_normalization_644 (Ba  (None, 11, 11, 64)       256       
 tchNormalization)                                               
                                                                 
 conv2d_645 (Conv2D)         (None, 8, 8, 128)         131200    
                                                                 
 max_pooling2d_645 (MaxPooli  (None, 4, 4, 128)        0         
 ng2D)                                                           
                                                    

### Retrain on entire data

In [67]:
model.fit(X, y, epochs=1, batch_size=BATCH_SIZE)



<keras.callbacks.History at 0x7f77cb7fb450>

### Predict on Test data

In [68]:
mnist_test = pd.read_csv('/content/drive/MyDrive/test.csv')

In [69]:
mnist_test

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [70]:
X_test = mnist_test.to_numpy().reshape(-1, 28, 28, 1)
X_test.shape

(28000, 28, 28, 1)

In [71]:
y_pred = model.predict(X_test)
y_pred

array([[9.8413659e-11, 9.5240350e-11, 1.0000000e+00, ..., 1.5715911e-09,
        1.2020991e-10, 2.0814264e-11],
       [9.9999881e-01, 2.4697608e-09, 3.8002278e-07, ..., 9.2707282e-09,
        1.2538391e-08, 9.6903641e-08],
       [5.1212288e-09, 5.9577931e-10, 1.8286896e-09, ..., 6.3441853e-08,
        6.1367251e-07, 9.9999905e-01],
       ...,
       [3.5802619e-16, 3.2854223e-11, 1.6710969e-13, ..., 1.2744457e-12,
        5.5484137e-12, 3.7563342e-13],
       [4.2102335e-09, 2.7802782e-10, 4.4047817e-11, ..., 1.1310145e-08,
        5.7293281e-10, 9.9999976e-01],
       [6.7125437e-12, 6.1599059e-12, 1.0000000e+00, ..., 3.3897213e-10,
        1.0278472e-10, 1.2197712e-12]], dtype=float32)

In [72]:
predictions = y_pred.argmax(axis=-1)
predictions

array([2, 0, 9, ..., 3, 9, 2])

## Create Submission

In [73]:
submission = pd.DataFrame({'ImageId': range(1, len(y_pred) + 1), 'Label': predictions})
submission

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [74]:
submission.to_csv('/submission.csv', index=False)