In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import random
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras import layers, models, backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


In [None]:
#  Load dataset
train_data = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test_data = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")
print(f"Train shape: {train_data.shape}, Test shape: {test_data.shape}")

#  Prepare features and labels
X_train_full = train_data.drop("label", axis=1)
y_train_full = train_data["label"]

#  Normalize and reshape images
X_train = X_train_full / 255.0
X_test = test_data / 255.0
X_train = X_train.values.reshape(-1, 28, 28, 1)
X_test = X_test.values.reshape(-1, 28, 28, 1)
y_train = y_train_full.astype("int")

#  Set seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)


In [None]:
#  Define Swish activation
def swish(x):
    return x * K.sigmoid(x)

#  Define Squeeze-and-Excitation block
def se_block(input_tensor, reduction=16):
    channels = input_tensor.shape[-1]
    se = layers.GlobalAveragePooling2D()(input_tensor)
    se = layers.Dense(channels // reduction, activation='relu')(se)
    se = layers.Dense(channels, activation='sigmoid')(se)
    se = layers.Reshape((1, 1, channels))(se)
    return layers.Multiply()([input_tensor, se])

#  Define the CNN model architecture
def build_model():
    inputs = layers.Input(shape=(28, 28, 1))

    x = layers.Conv2D(32, (3, 3), padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(swish)(x)
    x = layers.Conv2D(32, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(swish)(x)
    x = se_block(x)
    x = layers.MaxPooling2D(2, 2)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Conv2D(64, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(swish)(x)
    x = layers.Conv2D(64, (3, 3), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(swish)(x)
    x = se_block(x)
    x = layers.MaxPooling2D(2, 2)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(128)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(swish)(x)
    x = layers.Dropout(0.4)(x)

    outputs = layers.Dense(10, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# Image data augmentation
aug = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)

In [None]:
# Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
models_list = []
fold_accuracies = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
    print(f"\n Fold {fold + 1}")
    X_tr, X_val = X_train[train_idx], X_train[val_idx]
    y_tr, y_val = y_train[train_idx], y_train[val_idx]

    model = build_model()
    aug.fit(X_tr)

    early_stop = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

    history = model.fit(
        aug.flow(X_tr, y_tr, batch_size=64),
        validation_data=(X_val, y_val),
        epochs=30,
        callbacks=[early_stop],
        verbose=1
    )

    #  Evaluate and store model
    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    fold_accuracies.append(val_acc)
    models_list.append(model)
    print(f"Fold {fold + 1} Accuracy: {val_acc:.4f}")

    #  Plot training and validation accuracy per fold
    plt.figure(figsize=(6, 4))
    plt.plot(history.history['accuracy'], label='Train')
    plt.plot(history.history['val_accuracy'], label='Validation')
    plt.title(f'Fold {fold + 1} Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.show()

#  Average accuracy from all folds
mean_acc = np.mean(fold_accuracies)
print(f"\n Mean Accuracy: {mean_acc:.4f}")

In [None]:
# Ensemble prediction on test set
print("\n Predicting using ensemble of models...")
all_preds = np.zeros((X_test.shape[0], 10))

for model in models_list:
    all_preds += model.predict(X_test, verbose=0)

ensemble_preds = np.argmax(all_preds / len(models_list), axis=1)


In [None]:
# Create submission file
submission = pd.DataFrame({
    "ImageId": np.arange(1, len(ensemble_preds) + 1),
    "Label": ensemble_preds
})
submission.to_csv("submission.csv", index=False)
print(" Submission saved as 'submission.csv'")

In [None]:
# Confusion Matrix using last fold validation set
last_model = models_list[-1]
y_val_preds = np.argmax(last_model.predict(X_val, verbose=0), axis=1)
cm = confusion_matrix(y_val, y_val_preds)
plt.figure(figsize=(8, 6))
ConfusionMatrixDisplay(cm, display_labels=np.arange(10)).plot(cmap="Blues")
plt.title("Confusion Matrix - Last Fold")
plt.grid(False)
plt.show()