In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input,Dense
from tensorflow.keras.models import Sequential

In [2]:
data = pd.read_csv("mnist_train.csv")
data.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
X  = data.drop(['label'],axis = 1)
X.head()
y = data['label']

In [4]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
from tensorflow.keras.utils import to_categorical

y_train = to_categorical(y_train, num_classes=10)
y_test  = to_categorical(y_test, num_classes=10)

In [5]:
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(64,activation='relu'))
model.add(Dense(32,activation='relu'))
model.add(Dense(10,activation='softmax'))

In [7]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [9]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(
    monitor = 'val_loss',
    patience = 3,   # wait 3 epochs with no improvement
    restore_best_weights = True
)

In [10]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint(
    filepath = 'best_model.keras',
    monitor = 'val_accuracy',
    save_best_only = True,
    mode = 'max'
)

In [11]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(
    monitor  = 'val_loss',
    factor = 0.5,  # reduce LR by half
    patience = 2,
    min_lr = 1e-6
)

In [12]:
from tensorflow.keras.callbacks import CSVLogger
csv_logger = CSVLogger("mnisst_training_log.csv")

In [13]:
# tensorBord for visualization
from tensorflow.keras.callbacks import TensorBoard
tensorboard = TensorBoard(
    log_dir = 'logs',
    histogram_freq = 1
)

In [15]:
# terminate if Nan losses happens
from tensorflow.keras.callbacks import TerminateOnNaN
terminate = TerminateOnNaN()

In [16]:
# learning rate schedule (custom function)
from tensorflow.keras.callbacks import LearningRateScheduler
def scheduler(epoch,lr):
    if epoch <5:
        return lr
    else:
        return lr*0.9  # decay LR by 10% after 5 epochs
    
lr_scheduler = LearningRateScheduler(scheduler)

In [17]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=20,
    batch_size=32,
    callbacks=[early_stop, checkpoint, reduce_lr, 
               csv_logger, tensorboard, terminate, lr_scheduler]
)


Epoch 1/20
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.6907 - loss: 1.8298 - val_accuracy: 0.8263 - val_loss: 0.7592 - learning_rate: 0.0010
Epoch 2/20
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.8584 - loss: 0.5372 - val_accuracy: 0.8832 - val_loss: 0.4149 - learning_rate: 0.0010
Epoch 3/20
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9047 - loss: 0.3496 - val_accuracy: 0.9151 - val_loss: 0.3190 - learning_rate: 0.0010
Epoch 4/20
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9286 - loss: 0.2706 - val_accuracy: 0.9347 - val_loss: 0.2498 - learning_rate: 0.0010
Epoch 5/20
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9415 - loss: 0.2204 - val_accuracy: 0.9413 - val_loss: 0.2297 - learning_rate: 0.0010
Epoch 6/20
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[