In [1]:
import torch
import torch.nn.functional as F
from torch import nn, optim
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms, models
import matplotlib.pyplot as plt
from tqdm import tqdm


import pandas as pd
import numpy as np

import os

In [2]:
# Checking GPU is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('Training on CPU...')
else:
    print('Training on GPU...')

Training on GPU...


In [3]:
# Dataset responsible for manipulating data for training as well as training tests.
class DatasetMNIST(torch.utils.data.Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        item = self.data.iloc[index]
                
        image = item[1:].values.astype(np.uint8).reshape((28, 28))
        label = item[0]

        
        if self.transform is not None:
            image = self.transform(image)
            
        
        if self.transform is not None:
            image = self.transform(image)
            
        return image, label

In [4]:
BATCH_SIZE = 100
VALID_SIZE = 0.15 # percentage of data for validation

transform_train = transforms.Compose([
    transforms.ToPILImage(),
   # transforms.RandomRotation(0, 0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,))
])

transform_valid = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,))
])

# Importing data that will be used for training and validation
dataset = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

# Creating datasets for training and validation
train_data = DatasetMNIST(dataset, transform=transform_train)
valid_data = DatasetMNIST(dataset, transform=transform_valid)

# Shuffling data and choosing data that will be used for training and validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(VALID_SIZE * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=BATCH_SIZE, sampler=valid_sampler)

print(f"Length train: {len(train_idx)}")
print(f"Length valid: {len(valid_idx)}")

Length train: 35700
Length valid: 6300


In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )
        
        
        self.fc = nn.Sequential(
            nn.Linear(3136, 100),
            nn.Linear(100, 10),
        )
                
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

model = CNN()
print(model)

if torch.cuda.is_available():
    model = model.cuda()

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.25, inplace=False)
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.25, inplace=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=3136, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=10, bias=True)
  )
)


In [6]:
LEARNING_RATE = 0.001680

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [7]:
epochs = 10
valid_loss_min = np.Inf
train_losses, valid_losses = [], []
train_accuracies, valid_accuracies = [], []

for e in range(1, epochs+1):
    running_loss = 0
    correct_train = 0
    total_train = 0
    model.train()  # Set the model to training mode

    # Training with tqdm for progress bar
    for images, labels in tqdm(train_loader, desc=f"Epoch {e}/{epochs} - Training"):
        if train_on_gpu:
            images, labels = images.cuda(), labels.cuda()

        optimizer.zero_grad()  # Clear the gradients

        # Forward pass
        ps = model(images)
        loss = criterion(ps, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate training accuracy
        _, predicted = torch.max(ps, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = correct_train / total_train
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)

    # Validation
    valid_loss = 0
    correct_valid = 0
    total_valid = 0
    model.eval()  # Set the model to evaluation mode

    # Validation with tqdm for progress bar
    for images, labels in tqdm(valid_loader, desc=f"Epoch {e}/{epochs} - Validation"):
        if train_on_gpu:
            images, labels = images.cuda(), labels.cuda()

        ps = model(images)
        loss = criterion(ps, labels)

        valid_loss += loss.item()

        # Calculate validation accuracy
        _, predicted = torch.max(ps, 1)
        total_valid += labels.size(0)
        correct_valid += (predicted == labels).sum().item()

    valid_loss /= len(valid_loader)
    valid_accuracy = correct_valid / total_valid
    valid_losses.append(valid_loss)
    valid_accuracies.append(valid_accuracy)

    # Print statistics
    print(f"Epoch: {e}/{epochs}.. \n"
          f"Training Loss: {train_loss:.3f}.. \t"
          f"Training Accuracy: {train_accuracy:.3f}.. \n"
          f"Validation Loss: {valid_loss:.3f}.. \t"
          f"Validation Accuracy: {valid_accuracy:.3f}")

    # Save the model if there's an improvement in validation loss
    if valid_loss < valid_loss_min:
        valid_loss_min = valid_loss
        torch.save(model.state_dict(), 'model_mtl_mnist.pt')
        print('Detected network improvement, saving current model')


  label = item[0]
Epoch 1/10 - Training: 100%|██████████| 357/357 [00:26<00:00, 13.62it/s]
Epoch 1/10 - Validation: 100%|██████████| 63/63 [00:04<00:00, 15.36it/s]


Epoch: 1/10.. 
Training Loss: 0.849.. 	Training Accuracy: 0.776.. 
Validation Loss: 0.437.. 	Validation Accuracy: 0.897
Detected network improvement, saving current model


Epoch 2/10 - Training: 100%|██████████| 357/357 [00:24<00:00, 14.72it/s]
Epoch 2/10 - Validation: 100%|██████████| 63/63 [00:04<00:00, 15.32it/s]


Epoch: 2/10.. 
Training Loss: 0.360.. 	Training Accuracy: 0.906.. 
Validation Loss: 0.290.. 	Validation Accuracy: 0.927
Detected network improvement, saving current model


Epoch 3/10 - Training: 100%|██████████| 357/357 [00:24<00:00, 14.69it/s]
Epoch 3/10 - Validation: 100%|██████████| 63/63 [00:03<00:00, 15.80it/s]


Epoch: 3/10.. 
Training Loss: 0.270.. 	Training Accuracy: 0.927.. 
Validation Loss: 0.231.. 	Validation Accuracy: 0.936
Detected network improvement, saving current model


Epoch 4/10 - Training: 100%|██████████| 357/357 [00:24<00:00, 14.54it/s]
Epoch 4/10 - Validation: 100%|██████████| 63/63 [00:04<00:00, 15.49it/s]


Epoch: 4/10.. 
Training Loss: 0.225.. 	Training Accuracy: 0.937.. 
Validation Loss: 0.198.. 	Validation Accuracy: 0.945
Detected network improvement, saving current model


Epoch 5/10 - Training: 100%|██████████| 357/357 [00:24<00:00, 14.59it/s]
Epoch 5/10 - Validation: 100%|██████████| 63/63 [00:04<00:00, 15.51it/s]


Epoch: 5/10.. 
Training Loss: 0.197.. 	Training Accuracy: 0.944.. 
Validation Loss: 0.175.. 	Validation Accuracy: 0.951
Detected network improvement, saving current model


Epoch 6/10 - Training: 100%|██████████| 357/357 [00:24<00:00, 14.60it/s]
Epoch 6/10 - Validation: 100%|██████████| 63/63 [00:04<00:00, 15.20it/s]


Epoch: 6/10.. 
Training Loss: 0.177.. 	Training Accuracy: 0.949.. 
Validation Loss: 0.158.. 	Validation Accuracy: 0.956
Detected network improvement, saving current model


Epoch 7/10 - Training: 100%|██████████| 357/357 [00:24<00:00, 14.62it/s]
Epoch 7/10 - Validation: 100%|██████████| 63/63 [00:04<00:00, 15.66it/s]


Epoch: 7/10.. 
Training Loss: 0.164.. 	Training Accuracy: 0.953.. 
Validation Loss: 0.146.. 	Validation Accuracy: 0.959
Detected network improvement, saving current model


Epoch 8/10 - Training: 100%|██████████| 357/357 [00:24<00:00, 14.55it/s]
Epoch 8/10 - Validation: 100%|██████████| 63/63 [00:04<00:00, 15.28it/s]


Epoch: 8/10.. 
Training Loss: 0.151.. 	Training Accuracy: 0.956.. 
Validation Loss: 0.139.. 	Validation Accuracy: 0.959
Detected network improvement, saving current model


Epoch 9/10 - Training: 100%|██████████| 357/357 [00:24<00:00, 14.78it/s]
Epoch 9/10 - Validation: 100%|██████████| 63/63 [00:04<00:00, 15.57it/s]


Epoch: 9/10.. 
Training Loss: 0.141.. 	Training Accuracy: 0.959.. 
Validation Loss: 0.130.. 	Validation Accuracy: 0.961
Detected network improvement, saving current model


Epoch 10/10 - Training: 100%|██████████| 357/357 [00:24<00:00, 14.65it/s]
Epoch 10/10 - Validation: 100%|██████████| 63/63 [00:04<00:00, 15.05it/s]

Epoch: 10/10.. 
Training Loss: 0.134.. 	Training Accuracy: 0.961.. 
Validation Loss: 0.123.. 	Validation Accuracy: 0.965
Detected network improvement, saving current model



