In [1]:
# 1. Imports
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# 1. Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split

# 2. Load Data

X = train_df.drop("label", axis=1).values.reshape(-1, 28, 28, 1) / 255.0
y = to_categorical(train_df["label"].values, num_classes=10)
X_test = test_df.values.reshape(-1, 28, 28, 1) / 255.0

# 3. Train/Validation Split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.1, stratify=train_df["label"], random_state=42
)

# 4. Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(X_train)

# 5. Build CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    BatchNormalization(),
    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(),
    Dropout(0.25),

    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

# 6. Compile
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# 7. Callbacks
callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(patience=2, factor=0.5, verbose=1)
]

# 8. Train
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=64),
    epochs=30,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    verbose=2
)

# 9. Evaluate
val_acc = model.evaluate(X_val, y_val, verbose=0)[1]
print(f"✅ Validation Accuracy: {val_acc:.4f}")

# 10. Predict Test Set
preds = model.predict(X_test)
labels = np.argmax(preds, axis=1)

# 11. Prepare Submission
submission = pd.DataFrame({
    "ImageId": np.arange(1, len(labels) + 1),
    "Label": labels
})



Epoch 1/30


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


591/591 - 40s - 68ms/step - accuracy: 0.8595 - loss: 0.4642 - val_accuracy: 0.9762 - val_loss: 0.0831 - learning_rate: 0.0010
Epoch 2/30
591/591 - 37s - 63ms/step - accuracy: 0.9559 - loss: 0.1426 - val_accuracy: 0.9848 - val_loss: 0.0481 - learning_rate: 0.0010
Epoch 3/30
591/591 - 37s - 63ms/step - accuracy: 0.9680 - loss: 0.1033 - val_accuracy: 0.9883 - val_loss: 0.0364 - learning_rate: 0.0010
Epoch 4/30
591/591 - 38s - 65ms/step - accuracy: 0.9725 - loss: 0.0880 - val_accuracy: 0.9907 - val_loss: 0.0309 - learning_rate: 0.0010
Epoch 5/30
591/591 - 37s - 62ms/step - accuracy: 0.9752 - loss: 0.0802 - val_accuracy: 0.9864 - val_loss: 0.0406 - learning_rate: 0.0010
Epoch 6/30
591/591 - 39s - 65ms/step - accuracy: 0.9781 - loss: 0.0698 - val_accuracy: 0.9926 - val_loss: 0.0293 - learning_rate: 0.0010
Epoch 7/30
591/591 - 37s - 63ms/step - accuracy: 0.9798 - loss: 0.0649 - val_accuracy: 0.9921 - val_loss: 0.0253 - learning_rate: 0.0010
Epoch 8/30
591/591 - 38s - 65ms/step - accuracy: 0.9

OSError: Cannot save file into a non-existent directory: '../submissions'

In [11]:
submission.to_csv("cnn_submission.csv", index=False)
print("📁 Submission file saved.")

📁 Submission file saved.


In [12]:
!pip install tensorflow



In [15]:
# Imports
import numpy as np
import pandas as pd
import math
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Add, GlobalAveragePooling2D, Dense, BatchNormalization, ReLU
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.optimizers import Adam
import os

# Residual block
def residual_block(x, filters):
    shortcut = x
    x = Conv2D(filters, 3, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(filters, 3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Add()([x, shortcut])
    x = ReLU()(x)
    return x

# Model builder
def build_resnet_mnist():
    inputs = Input((28, 28, 1))
    x = Conv2D(64, 3, padding='same')(inputs)
    x = residual_block(x, 64)
    x = residual_block(x, 64)
    x = GlobalAveragePooling2D()(x)
    x = Dense(10, activation='softmax')(x)
    model = Model(inputs, x)
    model.compile(optimizer=Adam(0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Cosine annealing schedule
def cosine_annealing(epoch, lr):
    return 0.001 * (math.cos(math.pi * epoch / 30) + 1) / 2

# Load data
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")
X = train.drop("label", axis=1).values.reshape(-1, 28, 28, 1) / 255.0
y = to_categorical(train["label"], num_classes=10)
X_test = test.values.reshape(-1, 28, 28, 1) / 255.0

# Stratified 5-fold CV
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
preds = np.zeros((X_test.shape[0], 10))

for fold, (train_idx, val_idx) in enumerate(cv.split(X, np.argmax(y, axis=1))):
    print(f"🧠 Fold {fold + 1}")
    model = build_resnet_mnist()

    datagen = ImageDataGenerator(
        rotation_range=10,
        zoom_range=0.1,
        width_shift_range=0.1,
        height_shift_range=0.1
    )
    datagen.fit(X[train_idx])

    model.fit(
        datagen.flow(X[train_idx], y[train_idx], batch_size=64),
        epochs=30,
        validation_data=(X[val_idx], y[val_idx]),
        callbacks=[
            EarlyStopping(patience=5, restore_best_weights=True),
            ReduceLROnPlateau(patience=2, factor=0.5, verbose=1),
            LearningRateScheduler(cosine_annealing)
        ],
        verbose=2
    )

    preds += model.predict(X_test, verbose=0)

# Average ensemble predictions
final_preds = np.argmax(preds / 5, axis=1)

# Submission
submission = pd.DataFrame({
    "ImageId": np.arange(1, len(final_preds) + 1),
    "Label": final_preds
})
submission.to_csv("cnn_resnet_ensemble_submission.csv", index=False)
print("✅ Submission saved: cnn_resnet_ensemble_submission.csv")


🧠 Fold 1
Epoch 1/30


  self._warn_if_super_not_called()


525/525 - 201s - 384ms/step - accuracy: 0.8753 - loss: 0.6253 - val_accuracy: 0.5421 - val_loss: 1.3207 - learning_rate: 0.0010
Epoch 2/30
525/525 - 238s - 453ms/step - accuracy: 0.9658 - loss: 0.1682 - val_accuracy: 0.9543 - val_loss: 0.1993 - learning_rate: 9.9726e-04
Epoch 3/30
525/525 - 217s - 413ms/step - accuracy: 0.9741 - loss: 0.1114 - val_accuracy: 0.8539 - val_loss: 0.4236 - learning_rate: 9.8907e-04
Epoch 4/30

Epoch 4: ReduceLROnPlateau reducing learning rate to 0.00048776413314044476.
525/525 - 196s - 374ms/step - accuracy: 0.9785 - loss: 0.0864 - val_accuracy: 0.5245 - val_loss: 1.8318 - learning_rate: 4.8776e-04
Epoch 5/30
525/525 - 191s - 364ms/step - accuracy: 0.9798 - loss: 0.0761 - val_accuracy: 0.3302 - val_loss: 5.9014 - learning_rate: 9.5677e-04
Epoch 6/30


KeyboardInterrupt: 