In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

# Test whether Kaggle's GPU is using
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

/kaggle/input/digit-recognizer/sample_submission.csv
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv
Using device: cuda


In [2]:
train_set = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test_set = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")

In [3]:
pixels = train_set.drop('label', axis=1).values.astype(np.float32) / 255.0
labels = train_set['label'].values
data_mean = np.mean(pixels)
data_std = np.std(pixels)
print(f"Dataset Mean: {data_mean:.4f}, Std: {data_std:.4f}")

Dataset Mean: 0.1310, Std: 0.3085


In [4]:
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomRotation(10),      
    transforms.RandomAffine(0, translate=(0.1,0.1)), 
    transforms.Normalize((data_mean,), (data_std,))
])

val_test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((data_mean,), (data_std,))
])

In [5]:
X_train, X_val, y_train, y_val = train_test_split(
    pixels.reshape(-1, 28, 28), labels, 
    test_size=0.2, random_state=42, stratify=labels
)

In [6]:
class DigitDataset(Dataset):
    def __init__(self, images, labels=None, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        if self.transform:
            img = self.transform(img)
        if self.labels is not None:
            return img, self.labels[idx]
        else:
            return img

In [7]:
#dataloader
batch_size = 128
train_dataset = DigitDataset(X_train, y_train, transform=train_transform)
val_dataset = DigitDataset(X_val, y_val, transform=val_test_transform)
test_dataset = DigitDataset(
    test_set.values.reshape(-1, 28, 28).astype(np.float32) / 255.0,
    transform=val_test_transform
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [8]:
class EnhancedCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_block = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),  # 28x28x32
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 14x14x32
            
            nn.Conv2d(32, 64, 3, padding=1),  # 14x14x64
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 7x7x64
        )
        
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(64*7*7, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.conv_block(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)

model = EnhancedCNN().to(device)

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=2, factor=0.5)

In [10]:
num_epochs = 50
best_val_acc = 0.0
patience = 3
no_improve = 0

for epoch in range(num_epochs):
    # Training
    model.train()
    train_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
    
    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    
    train_loss = train_loss / len(train_loader.dataset)
    val_loss = val_loss / len(val_loader.dataset)
    val_acc = correct / total
    scheduler.step(val_acc)
    
    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | LR: {scheduler.get_last_lr()}")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')
        no_improve = 0
    else:
        no_improve += 1
        if no_improve >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

Epoch 1/50
Train Loss: 0.2652 | Val Loss: 0.1000 | Val Acc: 0.9683 | LR: [0.001]
Epoch 2/50
Train Loss: 0.0941 | Val Loss: 0.0324 | Val Acc: 0.9896 | LR: [0.001]
Epoch 3/50
Train Loss: 0.0723 | Val Loss: 0.0481 | Val Acc: 0.9860 | LR: [0.001]
Epoch 4/50
Train Loss: 0.0646 | Val Loss: 0.0367 | Val Acc: 0.9886 | LR: [0.001]
Epoch 5/50
Train Loss: 0.0579 | Val Loss: 0.0275 | Val Acc: 0.9917 | LR: [0.001]
Epoch 6/50
Train Loss: 0.0539 | Val Loss: 0.0249 | Val Acc: 0.9923 | LR: [0.001]
Epoch 7/50
Train Loss: 0.0496 | Val Loss: 0.0244 | Val Acc: 0.9915 | LR: [0.001]
Epoch 8/50
Train Loss: 0.0484 | Val Loss: 0.0205 | Val Acc: 0.9943 | LR: [0.001]
Epoch 9/50
Train Loss: 0.0432 | Val Loss: 0.0190 | Val Acc: 0.9945 | LR: [0.001]
Epoch 10/50
Train Loss: 0.0396 | Val Loss: 0.0217 | Val Acc: 0.9932 | LR: [0.001]
Epoch 11/50
Train Loss: 0.0417 | Val Loss: 0.0176 | Val Acc: 0.9948 | LR: [0.001]
Epoch 12/50
Train Loss: 0.0401 | Val Loss: 0.0182 | Val Acc: 0.9949 | LR: [0.001]
Epoch 13/50
Train Loss: 0

In [11]:
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

preds = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        preds.extend(predicted.cpu().numpy())


submission = pd.DataFrame({
    'ImageId': range(1, len(preds)+1),
    'Label': preds
})
submission.to_csv('submission.csv', index=False)
print("Submission file saved!")

  model.load_state_dict(torch.load('best_model.pth'))


Submission file saved!
