In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/digit-recognizer/sample_submission.csv
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv


In [2]:
# =========================
# Digit Recognizer - Kaggle
# Baseline ML + CNN + Optuna
# =========================

# Install needed packages (if using Colab, Kaggle already has them)
!pip install optuna lightgbm xgboost -q


In [3]:

# -------------------------
# Imports
# -------------------------
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Dense, Dropout, Flatten, Conv2D, MaxPooling2D,
                                     BatchNormalization)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

import optuna

# -------------------------
# Load Data
# -------------------------
train = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")

X = train.drop("label", axis=1).values
y = train["label"].values

# Normalize
X = X / 255.0
test = test / 255.0

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

# -------------------------
# Baseline ML Models
# -------------------------
print("=== Logistic Regression Baseline ===")
log_reg = LogisticRegression(max_iter=200, solver="saga", n_jobs=-1)
log_reg.fit(X_train, y_train)
print("Validation Accuracy:", log_reg.score(X_val, y_val))

print("=== Random Forest ===")
rf = RandomForestClassifier(n_estimators=200, n_jobs=-1)
rf.fit(X_train, y_train)
print("Validation Accuracy:", rf.score(X_val, y_val))

print("=== XGBoost ===")
xg = xgb.XGBClassifier(tree_method="hist", n_estimators=500, max_depth=8)
xg.fit(X_train, y_train)
print("Validation Accuracy:", xg.score(X_val, y_val))

# -------------------------
# Deep Learning (CNN)
# -------------------------
X_train_cnn = X_train.reshape(-1,28,28,1)
X_val_cnn = X_val.reshape(-1,28,28,1)
test_cnn = test.values.reshape(-1,28,28,1)

# Data Augmentation
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)
datagen.fit(X_train_cnn)

def build_cnn(dropout1=0.25, dropout2=0.5, dense_units=256, lr=1e-3):
    model = Sequential([
        Conv2D(32, (3,3), activation="relu", input_shape=(28,28,1)),
        BatchNormalization(),
        Conv2D(32, (3,3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D(2,2),
        Dropout(dropout1),

        Conv2D(64, (3,3), activation="relu"),
        BatchNormalization(),
        Conv2D(64, (3,3), activation="relu"),
        BatchNormalization(),
        MaxPooling2D(2,2),
        Dropout(dropout1),

        Flatten(),
        Dense(dense_units, activation="relu"),
        BatchNormalization(),
        Dropout(dropout2),
        Dense(10, activation="softmax")
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# -------------------------
# Hyperparameter Tuning with Optuna
# -------------------------
def objective(trial):
    dropout1 = trial.suggest_float("dropout1", 0.2, 0.4)
    dropout2 = trial.suggest_float("dropout2", 0.3, 0.6)
    dense_units = trial.suggest_categorical("dense_units", [128, 256, 512])
    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])

    model = build_cnn(dropout1, dropout2, dense_units, lr)

    es = EarlyStopping(patience=5, restore_best_weights=True)
    rlrop = ReduceLROnPlateau(patience=3, factor=0.5, verbose=0)

    history = model.fit(
        datagen.flow(X_train_cnn, y_train, batch_size=batch_size),
        validation_data=(X_val_cnn, y_val),
        epochs=20,
        callbacks=[es, rlrop],
        verbose=0
    )

    val_acc = max(history.history["val_accuracy"])
    return val_acc

# ⚡ Run only ~20 trials (increase if GPU/time allows)
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

print("Best Params:", study.best_params)

# -------------------------
# Train Final CNN with Best Params
# -------------------------
best = study.best_params
final_model = build_cnn(best["dropout1"], best["dropout2"], best["dense_units"], best["lr"])

es = EarlyStopping(patience=7, restore_best_weights=True)
rlrop = ReduceLROnPlateau(patience=3, factor=0.5)
checkpoint = ModelCheckpoint("best_model.h5", save_best_only=True)

final_model.fit(
    datagen.flow(X_train_cnn, y_train, batch_size=best["batch_size"]),
    validation_data=(X_val_cnn, y_val),
    epochs=40,
    callbacks=[es, rlrop, checkpoint]
)

# -------------------------
# Make Submission
# -------------------------
predictions = np.argmax(final_model.predict(test_cnn), axis=1)
submission = pd.DataFrame({
    "ImageId": np.arange(1, len(predictions)+1),
    "Label": predictions
})
submission.to_csv("submission.csv", index=False)
print("✅ Submission file saved!")


2025-09-04 22:14:25.860217: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757024066.123371      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757024066.203538      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


=== Logistic Regression Baseline ===




Validation Accuracy: 0.9128571428571428
=== Random Forest ===
Validation Accuracy: 0.965
=== XGBoost ===


[I 2025-09-04 22:32:56,041] A new study created in memory with name: no-name-c9fb82e8-59f4-4e4d-a98f-8150dddf0ca6


Validation Accuracy: 0.9766666666666667


  lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-09-04 22:32:56.056813: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
  self._warn_if_super_not_called()
[I 2025-09-04 22:54:16,958] Trial 0 finished with value: 0.9952380657196045 and parameters: {'dropout1': 0.22335580816706768, 'dropout2': 0.5720402733115313, 'dense_units': 512, 'lr': 0.003605100072593643, 'batch_size': 64}. Best is trial 0 with value: 0.9952380657196045.
[I 2025-09-04 23:17:02,891] Trial 1 finished with value: 0.9952380657196045 and parameters: {'dropout1': 0.2913542678131094, 'dropout2': 0.44715108689524546, 'dense_units': 128, 'lr': 0.000984879651317627, 'batch_size': 32}. Best is trial 0 with value: 0.9952380657196045.
[I 2025-09-04 23:39:01,045] Trial 2 finished with value: 0.9938095211982727 and parameters: {'dropout1': 0.23051890

Best Params: {'dropout1': 0.3348594091594718, 'dropout2': 0.47728390110604013, 'dense_units': 512, 'lr': 0.0013562036996338472, 'batch_size': 64}
Epoch 1/40
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 127ms/step - accuracy: 0.7712 - loss: 0.7907 - val_accuracy: 0.9852 - val_loss: 0.0508 - learning_rate: 0.0014
Epoch 2/40
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 127ms/step - accuracy: 0.9558 - loss: 0.1400 - val_accuracy: 0.9874 - val_loss: 0.0375 - learning_rate: 0.0014
Epoch 3/40
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 131ms/step - accuracy: 0.9673 - loss: 0.1085 - val_accuracy: 0.9921 - val_loss: 0.0234 - learning_rate: 0.0014
Epoch 4/40
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 134ms/step - accuracy: 0.9705 - loss: 0.0947 - val_accuracy: 0.9914 - val_loss: 0.0242 - learning_rate: 0.0014
Epoch 5/40
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 139ms/step - accurac