In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import train_test_split

In [2]:
PATH = "data/"
device = "cuda"
seed = 42
file_name = PATH + "train.csv"

In [3]:
class MNIST(Dataset):
    def __init__(self, X, y):
        self.y = torch.from_numpy(y).long().to(device)
        X = X.reshape(X.shape[0], 1, 28, 28)
        X = X / 255.0
        self.X = torch.from_numpy(X).float().to(device)
    
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return (self.X[idx,:,:,:], self.y[idx])

In [4]:
df = pd.read_csv(file_name)
y = df["label"].values
X = df.drop("label", axis=1).values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)

In [5]:
batch_size = 256
train_dataset = MNIST(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

test_dataset = MNIST(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

# DenseNet

![title](data/DenseNet.png)

In [6]:
class DenseLayer(nn.Module):
    def __init__(self, in_channels, between_channels, growth_rate):
        super().__init__()

        layers = []
        layers.append(nn.BatchNorm2d(in_channels))
        layers.append(nn.ReLU(inplace=True))
        layers.append(nn.Conv2d(in_channels, between_channels, kernel_size=1,
                                stride=1, bias=False))
        layers.append(nn.BatchNorm2d(between_channels))
        layers.append(nn.ReLU(inplace=True))
        layers.append(nn.Conv2d(between_channels, growth_rate, kernel_size=3,
                                stride=1, padding=1, bias=False))

        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        out = self.layers(x)
        return torch.cat([x, out], dim=1)

class DenseBlock(nn.Module):
    def __init__(self, in_channels, between_channels, growth_rate, num_layers):
        super().__init__()

        block = []
        for i in range(num_layers):
            block.append(DenseLayer(in_channels + i * growth_rate, between_channels, growth_rate))
        self.block = nn.Sequential(*block)

    def forward(self, x):
        return self.block(x)

class Transition(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.bn = nn.BatchNorm2d(in_channels)
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1,
                              stride=1, bias=False)
        self.pool = nn.AvgPool2d(kernel_size=3, stride=2)

    def forward(self, x):
        x = self.bn(x)
        x = self.conv(x)
        x = self.pool(x)
        return x

class DenseNet(nn.Module):
    def __init__(self, growth_rate=4, init_channels=32, num_layers=3):
        super().__init__()

        block_output = [init_channels + i * num_layers * growth_rate for i in range(1, 4)]

        self.conv_1 = nn.Conv2d(1, init_channels, kernel_size=5,
                                stride=1, bias=False)

        self.block_1 = DenseBlock(init_channels, 4 * growth_rate, growth_rate, num_layers)
        self.transition_1 = Transition(block_output[0], block_output[0])

        self.block_2 = DenseBlock(block_output[0], 4 * growth_rate, growth_rate, num_layers)
        self.transition_2 = Transition(block_output[1], block_output[1])

        self.block_3 = DenseBlock(block_output[1], 4 * growth_rate, growth_rate, num_layers)

        self.out = nn.Linear(block_output[-1], 10)

    def forward(self, x):
        x = self.conv_1(x)

        x = self.block_1(x)
        x = self.transition_1(x)

        x = self.block_2(x)
        x = self.transition_2(x)

        x = self.block_3(x)

        x = F.adaptive_avg_pool2d(x, 1)
        x = x.view(len(x), -1)

        x = self.out(x)
        return F.log_softmax(x, dim=1)

In [7]:
n_epochs = 50
lr = 1e-3
weight_decay = 1e-5

torch.manual_seed(seed)
model = DenseNet().to(device)

optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
lr_sceduler = StepLR(optimizer, gamma=0.5, step_size=10)

In [8]:
torch.manual_seed(seed)
for epoch in range(0, n_epochs):
    lr_sceduler.step()
    model.train()
    train_loss = 0
    for _, (x, y) in enumerate(train_loader):
            model.zero_grad()
            output = model(x)
            batch_loss = F.nll_loss(output, y)

            batch_loss.backward()
            optimizer.step()
            train_loss += batch_loss.cpu().detach().numpy() / x.shape[0]
    train_loss = np.round(train_loss / len(train_loader), 6)
    
    model.eval()
    test_loss = 0
    test_acc = 0
    for _, (x, y) in enumerate(test_loader):
        output = model(x)
        batch_loss = F.nll_loss(output, y)
        test_loss += batch_loss.cpu().detach().numpy() / x.shape[0]
        
        y_hat = torch.argmax(output, dim=1)
        acc = (y_hat == y).sum().float() / x.shape[0]
        acc = acc.cpu().detach().numpy()
        test_acc += acc

    test_loss = np.round(test_loss / len(test_loader), 6)
    test_acc = np.round(test_acc / len(test_loader), 4)

    if epoch % 5 == 0:
        print("Epoch {}: train loss {}, test loss {}, test accuracy {}".format(epoch,
                                                                               train_loss,
                                                                               test_loss,
                                                                               test_acc))

Epoch 0: train loss 0.005685, test loss 0.003544, test accuracy 0.7025
Epoch 5: train loss 0.00033, test loss 0.000652, test accuracy 0.9545
Epoch 10: train loss 0.000155, test loss 0.000266, test accuracy 0.9804
Epoch 15: train loss 0.000114, test loss 0.000259, test accuracy 0.9818
Epoch 20: train loss 6.4e-05, test loss 0.000199, test accuracy 0.9854
Epoch 25: train loss 4.8e-05, test loss 0.000186, test accuracy 0.9871
Epoch 30: train loss 3e-05, test loss 0.000197, test accuracy 0.9869
Epoch 35: train loss 2.4e-05, test loss 0.000197, test accuracy 0.987
Epoch 40: train loss 1.9e-05, test loss 0.000204, test accuracy 0.9866
Epoch 45: train loss 1.5e-05, test loss 0.000201, test accuracy 0.987
