In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [2]:
PATH = "data/"
device = "cuda"
seed = 42
file_name = PATH + "train.csv"

In [3]:
class MNIST(Dataset):
    def __init__(self, X, y):
        self.y = torch.from_numpy(y).long().to(device)
        X = X.reshape(X.shape[0], 1, 28, 28)
        X = X / 255.0
        self.X = torch.from_numpy(X).float().to(device)
    
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return (self.X[idx,:,:,:], self.y[idx])

In [4]:
df = pd.read_csv(file_name)
y = df["label"].values
X = df.drop("label", axis=1).values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)

In [5]:
batch_size = 256
train_dataset = MNIST(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

test_dataset = MNIST(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

# ResNet

![title](data/ResNet.png)

Note that this is an implemenation of the "basic block" (i.e. the left one). The "bottleneck block" (i.e. right one) is mostly used for deeper ResNet models (ResNet50, ResNet101 etc).

In [6]:
class BnLayer(nn.Module):
    def __init__(self, in_channels, out_channels, stride=2, kernel_size=3, padding=1):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
                              bias=False, padding=padding)
        
        self.bn = nn.BatchNorm2d(out_channels)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return x

In [7]:
class ResNetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=2, kernel_size=3):
        super().__init__()
        self.bn_layer_1 = BnLayer(in_channels, out_channels, kernel_size=kernel_size, stride=stride)
        self.bn_layer_2 = BnLayer(in_channels, out_channels, kernel_size=kernel_size, stride=stride)
    
    def forward(self, x):
        h = self.bn_layer_1(x)
        h = F.relu(h)
        h = self.bn_layer_2(h)
        x = F.relu(x + h)
        return x

In [8]:
class ResNet(nn.Module):
    def __init__(self, layers, num_classes=10):
        super().__init__()
        self.init_layer = BnLayer(1, layers[0], kernel_size=5, stride=1, padding=2)
        self.bn_layers = nn.ModuleList(BnLayer(layers[i], layers[i + 1], kernel_size=3, stride=2, padding=1)
                                            for i in range(len(layers) - 1))
        
        self.res_layers = nn.ModuleList([ResNetBlock(layers[i + 1], layers[i + 1], stride=1)
                                     for i in range(len(layers) - 1)])
        
        self.dropout = nn.Dropout(p=0.5)
        self.out = nn.Linear(layers[-1], num_classes)
    
    def forward(self, x):
        x = F.relu(self.init_layer(x))

        for bn, res in zip(self.bn_layers, self.res_layers):
            x = F.relu(bn(x))
            x = res(x)
            
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.out(x)
        return F.log_softmax(x, dim=-1)

In [9]:
n_epochs = 50
lr = 1e-3
weight_decay = 1e-6
layers = [8, 16, 32, 64]

torch.manual_seed(seed)
model = ResNet(layers).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

In [10]:
torch.manual_seed(seed)
for epoch in range(0, n_epochs):
    model.train()
    train_loss = 0
    for _, (x, y) in enumerate(train_loader):
            model.zero_grad()
            output = model(x)
            batch_loss = F.nll_loss(output, y)
            
            batch_loss.backward()
            optimizer.step()
            train_loss += batch_loss / x.shape[0]
    
    train_loss = np.round(train_loss.cpu().detach().numpy() / len(train_loader), 6)
    
    model.eval()
    test_loss = 0
    test_acc = 0
    for _, (x, y) in enumerate(test_loader):
        output = model(x)
        batch_loss = F.nll_loss(output, y)
        test_loss += batch_loss / x.shape[0]
        
        y_hat = torch.argmax(output, dim=1)
        acc = (y_hat == y).sum().float() / x.shape[0]
        acc = acc.cpu().detach().numpy()
        test_acc += acc
    
    test_loss = np.round(test_loss.cpu().detach().numpy() / len(test_loader), 6)
    test_acc = np.round(test_acc / len(test_loader), 4)
        
    if epoch % 5 == 0:
        print("Epoch {}: train loss {}, test loss {}, test accuracy {}".format(epoch,
                                                                               train_loss,
                                                                               test_loss,
                                                                               test_acc))
    
    if epoch % 10 == 0 and epoch != 0:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group["lr"] * 0.5
        print("-----------------------------")
        print("Decreasing learning rate to: {}".format(param_group["lr"]))
        print("-----------------------------")

Epoch 0: train loss 0.00325, test loss 0.000548, test accuracy 0.9656
Epoch 5: train loss 0.000167, test loss 0.000217, test accuracy 0.9849
Epoch 10: train loss 9.2e-05, test loss 0.000199, test accuracy 0.9867
-----------------------------
Decreasing learning rate to: 0.0005
-----------------------------
Epoch 15: train loss 2e-05, test loss 0.000144, test accuracy 0.9914
Epoch 20: train loss 1.7e-05, test loss 0.000219, test accuracy 0.9879
-----------------------------
Decreasing learning rate to: 0.00025
-----------------------------
Epoch 25: train loss 5e-06, test loss 0.000153, test accuracy 0.9914
Epoch 30: train loss 4e-06, test loss 0.000163, test accuracy 0.9916
-----------------------------
Decreasing learning rate to: 0.000125
-----------------------------
Epoch 35: train loss 4e-06, test loss 0.00016, test accuracy 0.9921
Epoch 40: train loss 5e-06, test loss 0.000177, test accuracy 0.9919
-----------------------------
Decreasing learning rate to: 6.25e-05
--------------