In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torchvision.transforms as transforms
import torchvision.models as models

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
train_df = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_df = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [3]:
y = train_df['label'].values
X = train_df.drop('label', axis=1).values

## Prepare dataset

In [4]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
X_train = torch.tensor(X_train, dtype=torch.float32) / 255.0
X_val = torch.tensor(X_val, dtype=torch.float32) / 255.0
X_test = torch.tensor(test_df.values, dtype=torch.float32) / 255.0

In [6]:
X_train = X_train.view(-1, 1, 28, 28)
X_val = X_val.view(-1, 1, 28, 28)
X_test = X_test.view(-1, 1, 28, 28)

In [7]:
y_train = torch.tensor(y_train, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

In [8]:
# Needed for PyTorch, would've been easier to use PyTorch's own MNIST dataset in other circumstances
class MNISTDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [9]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])


In [10]:
train_dataset = MNISTDataset(X_train, y_train)
val_dataset = MNISTDataset(X_val, y_val)

In [11]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

## Models

### Functions

In [12]:
def prepare_densenet(densenet, num_classes=10):
    densenet.features.conv0 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)
    densenet.features.pool0 = nn.Identity()

    num_features = densenet.classifier.in_features
    densenet.classifier = nn.Linear(num_features, num_classes)
        
    densenet.to(device)
    
    return densenet

In [13]:
def define_hyperparameters(model):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.00025, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)
    
    return criterion, optimizer, scheduler

In [14]:
def train_model(model_name: str, model, num_epochs = 20):
    best_val_acc = 0.0
    best_model_path = f'best_{model_name}.pth'
    
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_acc = correct_train / total_train

        model.eval()
        correct_val = 0
        total_val = 0
        val_loss = 0.0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_acc = correct_val / total_val
        val_loss /= len(val_loader)

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Train Acc: {train_acc:.4f}, "
              f"Val Loss: {val_loss:.4f}, "
              f"Val Acc: {val_acc:.4f}")

        train_losses.append(loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), best_model_path) 
            print(f"New best {model_name} saved at epoch {epoch+1} with validation accuracy: {val_acc:.4f}")

        scheduler.step(val_loss)

### Parameters

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 10

### DenseNet

#### Prepare model

In [16]:
densenet = models.densenet121(pretrained=True)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 81.4MB/s]


In [17]:
densenet = prepare_densenet(densenet)

#### Train model

In [18]:
criterion, optimizer, scheduler = define_hyperparameters(densenet)

In [19]:
train_model('densenet', densenet, 25)

Epoch 1/25, Train Loss: 0.0839, Train Acc: 0.9760, Val Loss: 0.0294, Val Acc: 0.9912
New best densenet saved at epoch 1 with validation accuracy: 0.9912
Epoch 2/25, Train Loss: 0.0301, Train Acc: 0.9911, Val Loss: 0.0257, Val Acc: 0.9923
New best densenet saved at epoch 2 with validation accuracy: 0.9923
Epoch 3/25, Train Loss: 0.0228, Train Acc: 0.9937, Val Loss: 0.0181, Val Acc: 0.9949
New best densenet saved at epoch 3 with validation accuracy: 0.9949
Epoch 4/25, Train Loss: 0.0228, Train Acc: 0.9928, Val Loss: 0.0251, Val Acc: 0.9931
Epoch 5/25, Train Loss: 0.0163, Train Acc: 0.9948, Val Loss: 0.0196, Val Acc: 0.9940
Epoch 6/25, Train Loss: 0.0178, Train Acc: 0.9946, Val Loss: 0.0214, Val Acc: 0.9933
Epoch 7/25, Train Loss: 0.0157, Train Acc: 0.9953, Val Loss: 0.0194, Val Acc: 0.9937
Epoch 8/25, Train Loss: 0.0146, Train Acc: 0.9958, Val Loss: 0.0347, Val Acc: 0.9886
Epoch 9/25, Train Loss: 0.0143, Train Acc: 0.9958, Val Loss: 0.0235, Val Acc: 0.9927
Epoch 10/25, Train Loss: 0.0059

### Evaluate

#### Evaluate DenseNet

In [20]:
best_densenet = models.densenet121(pretrained=False)
best_densenet = prepare_densenet(best_densenet)
best_densenet.load_state_dict(torch.load('best_densenet.pth'))

  best_densenet.load_state_dict(torch.load('best_densenet.pth'))


<All keys matched successfully>

In [21]:
densenet_predictions = []

with torch.no_grad():
    for images, _ in val_loader:
        images = images.to(device)
        outputs = best_densenet(images)
        _, predictions = torch.max(outputs, 1)
        densenet_predictions.append(predictions.cpu())

In [22]:
densenet_predictions = np.concatenate(densenet_predictions)

densenet_predictions_list = densenet_predictions.tolist()

In [23]:
accuracy = accuracy_score(y_val, densenet_predictions_list)
print(f'Ensemble Accuracy: {accuracy:.4f}')

Ensemble Accuracy: 0.9960


## Get submission

In [24]:
test_dataset = TensorDataset(X_test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [25]:
best_densenet.eval()

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): Identity()
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 128, kernel_s

In [26]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
best_model = best_densenet.to(device)

all_predictions = []

with torch.no_grad():
    for inputs in test_loader:
        inputs = inputs[0].to(device)
        outputs = best_densenet(inputs)
        _, predictions = torch.max(outputs, 1)
        all_predictions.append(predictions.cpu().numpy())

In [27]:
all_predictions = np.concatenate(all_predictions)
predictions_list = all_predictions.tolist()

In [28]:
submission = pd.DataFrame(predictions_list)
submission.index.name='ImageId'
submission.index+=1
submission.columns=['Label']

In [29]:
submission.to_csv('submission.csv', index="ImageId")