In [44]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torchvision.transforms as transforms
import torchvision.models as models

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
train_df = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_df = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [3]:
y = train_df['label'].values
X = train_df.drop('label', axis=1).values

## Prepare dataset

In [4]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
X_train = torch.tensor(X_train, dtype=torch.float32) / 255.0
X_val = torch.tensor(X_val, dtype=torch.float32) / 255.0
X_test = torch.tensor(test_df.values, dtype=torch.float32) / 255.0

In [6]:
X_train = X_train.view(-1, 1, 28, 28)
X_val = X_val.view(-1, 1, 28, 28)
X_test = X_test.view(-1, 1, 28, 28)

In [7]:
y_train = torch.tensor(y_train, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

In [8]:
# Needed for PyTorch, would've been easier to use PyTorch's own MNIST dataset in other circumstances
class MNISTDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [10]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])


In [11]:
train_dataset = MNISTDataset(X_train, y_train)
val_dataset = MNISTDataset(X_val, y_val)

In [12]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

## Models

### Functions

In [13]:
def prepare_resnet(resnet):
    resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
    resnet.fc = nn.Linear(resnet.fc.in_features, 10)
    
    resnet.to(device)

In [14]:
def prepare_densenet(densenet, num_classes=10):
    # Modify the initial convolution layer to accept single-channel input and adjust kernel size
    densenet.features.conv0 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)

    # Modify the pooling layers and transitions to ensure they do not reduce the feature map size too aggressively
    # For DenseNet, this often involves changing the stride or kernel size of the pooling layers
    densenet.features.pool0 = nn.Identity()  # Remove the initial pooling layer

    num_features = densenet.classifier.in_features
    densenet.classifier = nn.Linear(num_features, num_classes)
        
    densenet.to(device)
    
    return densenet

In [15]:
def define_hyperparameters(model):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.00025, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)
    
    return criterion, optimizer, scheduler

In [16]:
def train_model(model_name: str, model, num_epochs = 20):
    best_val_acc = 0.0
    best_model_path = f'best_{model_name}.pth'
    
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_acc = correct_train / total_train

        model.eval()
        correct_val = 0
        total_val = 0
        val_loss = 0.0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_acc = correct_val / total_val
        val_loss /= len(val_loader)

        print(f"Epoch {epoch+1}/{num_epochs}, "
              f"Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Train Acc: {train_acc:.4f}, "
              f"Val Loss: {val_loss:.4f}, "
              f"Val Acc: {val_acc:.4f}")

        train_losses.append(loss)
        val_losses.append(val_loss)
        train_accuracies.append(train_acc)
        val_accuracies.append(val_acc)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), best_model_path)  # Save the best model
            print(f"New best {model_name} saved at epoch {epoch+1} with validation accuracy: {val_acc:.4f}")

        scheduler.step(val_loss)

### Parameters

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 10

### ResNet

#### Prepare model

In [18]:
resnet = models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 213MB/s]


In [19]:
prepare_resnet(resnet)

#### Train model

In [20]:
criterion, optimizer, scheduler = define_hyperparameters(resnet)

In [21]:
train_model('resnet', resnet, 30)

Epoch 1/20, Train Loss: 0.2687, Train Acc: 0.9187, Val Loss: 0.0814, Val Acc: 0.9750
New best resnet saved at epoch 1 with validation accuracy: 0.9750
Epoch 2/20, Train Loss: 0.0899, Train Acc: 0.9719, Val Loss: 0.0625, Val Acc: 0.9798
New best resnet saved at epoch 2 with validation accuracy: 0.9798
Epoch 3/20, Train Loss: 0.0608, Train Acc: 0.9815, Val Loss: 0.0618, Val Acc: 0.9819
New best resnet saved at epoch 3 with validation accuracy: 0.9819
Epoch 4/20, Train Loss: 0.0438, Train Acc: 0.9866, Val Loss: 0.0519, Val Acc: 0.9839
New best resnet saved at epoch 4 with validation accuracy: 0.9839
Epoch 5/20, Train Loss: 0.0382, Train Acc: 0.9879, Val Loss: 0.0487, Val Acc: 0.9851
New best resnet saved at epoch 5 with validation accuracy: 0.9851
Epoch 6/20, Train Loss: 0.0329, Train Acc: 0.9900, Val Loss: 0.0548, Val Acc: 0.9845
Epoch 7/20, Train Loss: 0.0344, Train Acc: 0.9897, Val Loss: 0.0515, Val Acc: 0.9852
New best resnet saved at epoch 7 with validation accuracy: 0.9852
Epoch 8/2

### DenseNet

#### Prepare model

In [22]:
densenet = models.densenet121(pretrained=True)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 166MB/s]


In [23]:
densenet = prepare_densenet(densenet)

#### Train model

In [24]:
criterion, optimizer, scheduler = define_hyperparameters(densenet)

In [25]:
train_model('densenet', densenet, 15)

Epoch 1/20, Train Loss: 0.0899, Train Acc: 0.9749, Val Loss: 0.0281, Val Acc: 0.9913
New best densenet saved at epoch 1 with validation accuracy: 0.9913
Epoch 2/20, Train Loss: 0.0310, Train Acc: 0.9912, Val Loss: 0.0281, Val Acc: 0.9912
Epoch 3/20, Train Loss: 0.0242, Train Acc: 0.9927, Val Loss: 0.0187, Val Acc: 0.9942
New best densenet saved at epoch 3 with validation accuracy: 0.9942
Epoch 4/20, Train Loss: 0.0240, Train Acc: 0.9929, Val Loss: 0.0185, Val Acc: 0.9944
New best densenet saved at epoch 4 with validation accuracy: 0.9944
Epoch 5/20, Train Loss: 0.0160, Train Acc: 0.9957, Val Loss: 0.0189, Val Acc: 0.9943
Epoch 6/20, Train Loss: 0.0165, Train Acc: 0.9950, Val Loss: 0.0206, Val Acc: 0.9942
Epoch 7/20, Train Loss: 0.0181, Train Acc: 0.9944, Val Loss: 0.0283, Val Acc: 0.9921
Epoch 8/20, Train Loss: 0.0155, Train Acc: 0.9949, Val Loss: 0.0229, Val Acc: 0.9929
Epoch 9/20, Train Loss: 0.0154, Train Acc: 0.9954, Val Loss: 0.0188, Val Acc: 0.9943
Epoch 10/20, Train Loss: 0.0128

 ### Custom-made CNN

In [26]:
class CNN(nn.Module):
    def __init__(self, output_size=10):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        
        self.pool = nn.MaxPool2d(2, 2) 

        self.fc1 = nn.Linear(64 * 14 * 14, 128)
        self.bn_fc1 = nn.BatchNorm1d(128)
        
        self.fc2 = nn.Linear(128, output_size)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.6)
        
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        
        x = x.view(x.size(0), -1)
        
        x = self.relu(self.bn_fc1(self.fc1(x)))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

#### Prepare model

In [27]:
cnn = CNN().to(device)

#### Train model

In [28]:
criterion, optimizer, scheduler = define_hyperparameters(cnn)

In [29]:
train_model('custom_cnn', cnn, 40)

Epoch 1/40, Train Loss: 0.3614, Train Acc: 0.9374, Val Loss: 0.1014, Val Acc: 0.9798
New best custom_cnn saved at epoch 1 with validation accuracy: 0.9798
Epoch 2/40, Train Loss: 0.1200, Train Acc: 0.9759, Val Loss: 0.0586, Val Acc: 0.9850
New best custom_cnn saved at epoch 2 with validation accuracy: 0.9850
Epoch 3/40, Train Loss: 0.0816, Train Acc: 0.9813, Val Loss: 0.0508, Val Acc: 0.9852
New best custom_cnn saved at epoch 3 with validation accuracy: 0.9852
Epoch 4/40, Train Loss: 0.0643, Train Acc: 0.9844, Val Loss: 0.0403, Val Acc: 0.9877
New best custom_cnn saved at epoch 4 with validation accuracy: 0.9877
Epoch 5/40, Train Loss: 0.0490, Train Acc: 0.9884, Val Loss: 0.0393, Val Acc: 0.9883
New best custom_cnn saved at epoch 5 with validation accuracy: 0.9883
Epoch 6/40, Train Loss: 0.0440, Train Acc: 0.9887, Val Loss: 0.0380, Val Acc: 0.9882
Epoch 7/40, Train Loss: 0.0360, Train Acc: 0.9910, Val Loss: 0.0349, Val Acc: 0.9894
New best custom_cnn saved at epoch 7 with validation ac

### Evaluate

#### Evaluate ensemble

In [38]:
best_resnet = models.resnet18(pretrained=False)
best_densenet = models.densenet121(pretrained=False)
best_cnn = CNN().to(device)

In [39]:
prepare_resnet(best_resnet)
best_densenet = prepare_densenet(best_densenet)

In [40]:
best_resnet.load_state_dict(torch.load('best_resnet.pth'))
best_densenet.load_state_dict(torch.load('best_densenet.pth'))
best_cnn.load_state_dict(torch.load('best_custom_cnn.pth'))

  best_resnet.load_state_dict(torch.load('best_resnet.pth'))
  best_densenet.load_state_dict(torch.load('best_densenet.pth'))
  best_cnn.load_state_dict(torch.load('best_custom_cnn.pth'))


<All keys matched successfully>

In [81]:
def ensemble_predictions(models, data_loader, device):
    for model in models:
        model.eval()

    all_predictions = []
    with torch.no_grad():
        for data in data_loader:
            if isinstance(data, (tuple, list)) and len(data) == 2:
                images, _ = data
            else:
                images = data
                
            if isinstance(images, list):
                images = torch.cat(images, dim=0)

            if images.dim() == 3:
                images = images.unsqueeze(1)
                
            images = images.to(device)
            outputs_list = [model(images) for model in models]
            avg_outputs = torch.mean(torch.stack(outputs_list), dim=0)
            _, predictions = torch.max(avg_outputs, 1)
            all_predictions.extend(predictions.cpu().numpy())

    return np.array(all_predictions)

In [82]:
predictions = ensemble_predictions([best_resnet, best_densenet, best_cnn], val_loader, device)

In [83]:
accuracy = accuracy_score(y_val, predictions)
print(f'Ensemble Accuracy: {accuracy:.4f}')

Ensemble Accuracy: 0.9962


#### Evaluate DenseNet

In [51]:
densenet_predictions = []

with torch.no_grad():
    for images, _ in val_loader:
        images = images.to(device)
        outputs = best_densenet(images)
        _, predictions = torch.max(outputs, 1)
        densenet_predictions.append(predictions.cpu())

In [52]:
densenet_predictions = np.concatenate(densenet_predictions)

densenet_predictions_list = densenet_predictions.tolist()

In [53]:
accuracy = accuracy_score(y_val, densenet_predictions_list)
print(f'Ensemble Accuracy: {accuracy:.4f}')

Ensemble Accuracy: 0.9971


## Get submission

In [76]:
test_dataset = TensorDataset(X_test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [84]:
predictions = ensemble_predictions([best_resnet, best_densenet, best_cnn], test_loader, device)

In [85]:
submission = pd.DataFrame(predictions)
submission.index.name='ImageId'
submission.index+=1
submission.columns=['Label']

In [None]:
submission.to_csv('submission.csv', index="ImageId")