In [4]:
import numpy as np
import pandas as pd

### Load MNIST Data

In [5]:
mnist_full = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
mnist_test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
mnist_full

In [6]:
mnist_full['label'].value_counts()

In [7]:
y = mnist_full['label'].to_numpy()
y

In [8]:
X = mnist_full.drop(columns='label').to_numpy().reshape(-1, 28, 28, 1)
X.shape

### Split Data

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1)

In [11]:
X_train.shape

## Define Build Model Function

In [12]:
import tensorflow as tf
from tensorflow.keras import *

In [13]:
def build_cnn_model(conv_layers, dense_layers, optimizer='adam', lr=0.001, dropout=0, with_batch_norm=False):
    model = Sequential([layers.Input(shape=(28, 28, 1))])
    for layer in conv_layers:
        filters, kernel_size = layer
        model.add(layers.Conv2D(filters, kernel_size, activation='relu'))
        model.add(layers.MaxPooling2D())
        if with_batch_norm:
            model.add(layers.BatchNormalization())
    model.add(layers.Flatten())
    for units in dense_layers:
        model.add(layers.Dense(units, activation='relu'))
        if dropout > 0:
            model.add(layers.Dropout(dropout))
    model.add(layers.Dense(10, activation='softmax'))
    
    select = {'adam': optimizers.Adam, 'sgd': optimizers.SGD, 'rmsprop': optimizers.RMSprop}
    model.compile(optimizer=select[optimizer](lr), loss='sparse_categorical_crossentropy', metrics=['acc'])
    return model

## Define Parameters Search Space

In [21]:
conv_blocks = [
    [(32, 7), (64, 4)],
    [(64, 7), (128, 4)],
    [(32, 7), (64, 4), (128, 2)], 
    [(64, 7), (128, 4), (256, 2)]
]
dense_blocks = [
    [1024, 256, 64],
    [512, 256, 32],
    [512, 128, 32],
    [1024, 256],
    [512, 64],
    [256, 32],
]
optimisers = ['adam', 'rmsprop', 'sgd']
learning_rates = [0.009, 0.03, 0.09]

ALL_PARAMS = []
BEST_ACCURACY = 0.0
BEST_PARAMS = {}

for optimiser in optimisers:
    for lr in learning_rates:
        for conv in conv_blocks:
            for dense in dense_blocks:
                ALL_PARAMS.append((conv, dense, optimiser, lr, 0.2, True))
len(ALL_PARAMS)

## Define Training and Evaluation Function

In [22]:
EPOCHS = 20
BATCH_SIZE = 100

In [23]:
def train_and_evaluate(model):
    early_stop = callbacks.EarlyStopping(monitor='val_acc', patience=3, restore_best_weights=True)
    model.fit(X_train, y_train, 
              epochs=EPOCHS, 
              batch_size=BATCH_SIZE, 
              validation_data=(X_val, y_val), 
              validation_batch_size=BATCH_SIZE, 
              callbacks=[early_stop],
              verbose=0)
    return model.evaluate(X_val, y_val, batch_size=BATCH_SIZE, verbose=0)[1]

## Perform Grid Search

In [24]:
LAST_STATE = 170

In [25]:
for params in ALL_PARAMS[LAST_STATE:]:
    conv, dense, optimiser, lr, drop, with_bn = params
    try:
        model = build_cnn_model(conv, dense, optimiser, lr, drop, with_bn)
        acc = train_and_evaluate(model)
        if acc > BEST_ACCURACY:
            BEST_PARAMS = {'OPTIMIZER': optimiser, 'LEARNING_RATE': lr,'CONV' : conv, 
                        'BATCH_NORM': with_bn, 'DENSE': dense, 'DROPOUT': drop}
            BEST_ACCURACY = acc
        
        m = LAST_STATE + 1
        print(f'\nMODEL {m}: ACCURACY: {acc:.5f}\n\tOPTIMIZER: {optimiser}(learning_rate={lr}) \
                \n\tCONV : {conv}\tBATCH_NORM: {with_bn}\n\tDENSE: {dense}\tDROPOUT: {drop}')
    except ValueError:
        print(f'\nMODEL {m}: NON-FEASIBLE')
    LAST_STATE += 1                    

In [26]:
BEST_ACCURACY

In [27]:
BEST_PARAMS

## Build Best Model

In [28]:
best_model = build_cnn_model(conv_layers=BEST_PARAMS['CONV'], 
                             dense_layers=BEST_PARAMS['DENSE'],
                             optimizer=BEST_PARAMS['OPTIMIZER'], 
                             lr=BEST_PARAMS['LEARNING_RATE'], 
                             dropout=BEST_PARAMS['DROPOUT'], 
                             with_batch_norm=BEST_PARAMS['BATCH_NORM'])

In [29]:
model.summary()

### Retrain on entire data

In [30]:
model.fit(X, y, epochs=EPOCHS, batch_size=BATCH_SIZE)

### Predict on Test data

In [31]:
mnist_test

In [32]:
X_test = mnist_test.to_numpy().reshape(-1, 28, 28, 1)
X_test.shape

In [33]:
y_pred = model.predict(X_test)
y_pred

In [34]:
predictions = y_pred.argmax(axis=-1)
predictions

## Create Submission

In [35]:
submission = pd.DataFrame({'ImageId': range(1, len(y_pred) + 1), 'Label': predictions})
submission

In [37]:
submission.to_csv('/kaggle/working/submission.csv', index=False)