In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import torchvision.transforms as transforms
import torchvision.datasets as datasets

from sklearn import decomposition
from sklearn import manifold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

import copy
import random
import time

In [2]:
ROOT = '.data'

train_data = datasets.MNIST(root = ROOT, 
                            train = True, 
                            download = True)

In [3]:
BIT_DEPTH = 8
MAX_VALUE = pow(2, BIT_DEPTH) - 1

mean = train_data.data.float().mean() / MAX_VALUE
std = train_data.data.float().std() / MAX_VALUE

In [4]:
train_transforms = transforms.Compose([
                            transforms.RandomRotation(5, fill=(0,)),
                            transforms.RandomCrop(28, padding = 2),
                            transforms.ToTensor(),
                            transforms.Normalize(mean = [mean], std = [std])
                                      ])

test_transforms = transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(mean = [mean], std = [std])
                                     ])

In [5]:
train_data = datasets.MNIST(root = ROOT, 
                            train = True, 
                            download = True, 
                            transform = train_transforms)

test_data = datasets.MNIST(root = ROOT, 
                           train = False, 
                           download = True, 
                           transform = test_transforms)

In [6]:
VALID_RATIO = 0.9

n_train_examples = int(len(train_data) * VALID_RATIO)
n_valid_examples = len(train_data) - n_train_examples

train_data, valid_data = data.random_split(train_data, 
                                           [n_train_examples, n_valid_examples])

In [7]:
valid_data = copy.deepcopy(valid_data)
valid_data.dataset.transform = test_transforms

In [8]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')

Number of training examples: 54000
Number of validation examples: 6000
Number of testing examples: 10000


In [9]:
BATCH_SIZE = 1024

train_iterator = data.DataLoader(train_data, 
                                 shuffle = True, 
                                 batch_size = BATCH_SIZE)

valid_iterator = data.DataLoader(valid_data, 
                                 batch_size = BATCH_SIZE)

test_iterator = data.DataLoader(test_data, 
                                batch_size = BATCH_SIZE)

    CNN1 = [1,28,28] -> [6,24,24] -> [6,12,12] -> [16,8,8] -> [16,4,4] -> 256 -> 120 -> 84 -> 10
    

In [10]:
OUTPUT_DIM = 10

class CNN(nn.Module):
    def __init__(self, output_dim):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channels = 1, 
                               out_channels = 6, 
                               kernel_size = 5)

        self.conv2 = nn.Conv2d(in_channels = 6, 
                               out_channels = 16, 
                               kernel_size = 5)

        self.fc_1 = nn.Linear(16 * 4 * 4, 120)
        self.fc_2 = nn.Linear(120, 84)
        self.fc_3 = nn.Linear(84, output_dim)

    def forward(self, x):
        x = self.conv1(x)

        x = F.max_pool2d(x, kernel_size = 2)
        x = F.relu(x)

        x = self.conv2(x)
        
        x = F.max_pool2d(x, kernel_size = 2)
        x = F.relu(x)

        x = x.view(x.shape[0], -1)
        h = x

        x = self.fc_1(x)

        x = F.relu(x)
        x = self.fc_2(x)

        x = F.relu(x)
        x = self.fc_3(x)

        return x, h


model = CNN(OUTPUT_DIM)

    CNN2 = [1,28,28] -> [2,20,20] -> [2,10,10] -> [32,8,8] -> [32,4,4] -> 512 -> 200 -> 64 -> 10

In [None]:
OUTPUT_DIM = 10

class CNN(nn.Module):
    def __init__(self, output_dim):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channels = 1, 
                               out_channels = 2, 
                               kernel_size = 9)

        self.conv2 = nn.Conv2d(in_channels = 2, 
                               out_channels = 32, 
                               kernel_size = 3)

        self.fc_1 = nn.Linear(512, 200)
        self.fc_2 = nn.Linear(200, 64)
        self.fc_3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = self.conv1(x)

        x = F.max_pool2d(x, kernel_size = 2)
        x = F.relu(x)

        x = self.conv2(x)
        
        x = F.max_pool2d(x, kernel_size = 2)
        x = F.relu(x)

        x = x.view(x.shape[0], -1)
        h = x

        x = self.fc_1(x)

        x = F.relu(x)
        x = self.fc_2(x)

        x = F.relu(x)
        x = self.fc_3(x)

        return x, h


model = CNN(OUTPUT_DIM)

    CNN3 = [1,28,28] -> [32,28,28] -> [32,14,14] -> [64,14,14] -> [64,7,7] -> 3136 -> 128 -> 10

In [None]:
OUTPUT_DIM = 10

class CNN(nn.Module):
    def __init__(self, output_dim):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channels = 1, 
                               out_channels = 32, 
                               kernel_size = 3,
                               padding = 1)

        self.conv2 = nn.Conv2d(in_channels = 32, 
                               out_channels = 64, 
                               kernel_size = 3,
                               padding = 1)

        self.fc_1 = nn.Linear(3136, 128)
        self.fc_2 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = self.conv1(x)

        x = F.max_pool2d(x, kernel_size = 2)
        x = F.relu(x)

        x = self.conv2(x)
        
        x = F.max_pool2d(x, kernel_size = 2)
        x = F.relu(x)

        x = x.view(x.shape[0], -1)
        h = x

        x = self.fc_1(x)

        x = F.relu(x)
        x = self.fc_2(x)

        return x, h


model = CNN(OUTPUT_DIM)

In [11]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 44,426 trainable parameters


In [12]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim = True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [13]:
def train(model, iterator, optimizer, criterion, device):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for (x, y) in iterator:
        
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
                
        y_pred, _ = model(x)
        
        loss = criterion(y_pred, y)
        
        acc = calculate_accuracy(y_pred, y)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [14]:
def evaluate(model, iterator, criterion, device):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
        
        for (x, y) in iterator:

            x = x.to(device)
            y = y.to(device)

            y_pred, _ = model(x)

            loss = criterion(y_pred, y)

            acc = calculate_accuracy(y_pred, y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [15]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
criterion = criterion.to(device)

EPOCHS = 20

best_valid_loss = float('inf')

for epoch in range(EPOCHS):
    
    # start_time = time.monotonic()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'model.pt')
    
    # end_time = time.monotonic()

    # epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    # print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    # print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    # print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
    print(f'{epoch+1},{train_loss},{train_acc},{valid_loss},{valid_acc}')

1,1.344431795039267,0.6021928944677677,0.39730053146680194,0.8906101981798807
2,0.47182117992976924,0.8568702789972413,0.19119842102130255,0.9455788334210714
3,0.2709628197382081,0.9188551430432301,0.12968300779660544,0.9616062939167023
4,0.20340423089153362,0.9399184719571527,0.11259398609399796,0.9664328793684641
5,0.1735230377822552,0.9473640255208285,0.09150555233160655,0.9725349148114523
6,0.14920124397525247,0.9561879792303409,0.08924427255988121,0.9723691940307617
7,0.1363324260093131,0.958395927582147,0.08066091934839885,0.9752988815307617
8,0.1300280754858593,0.9611311656124187,0.07648349304993947,0.9768998523553213
9,0.11938929389107902,0.9633453870719334,0.08047020559509595,0.9761955440044403
10,0.10703897223157703,0.9674990109677585,0.06402550265192986,0.9810221294562022
11,0.09983321180883444,0.9693502075267288,0.06613363822301228,0.9799627065658569
12,0.0956974282579602,0.9699986352110809,0.06361116593082745,0.9809954961140951
13,0.09131351197665592,0.9716820390719287,0.0

In [17]:
model.load_state_dict(torch.load('model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion, device)

# print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
print(f'{test_loss},{test_acc}')

0.04085181159898639,0.9868482947349548
