In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [2]:
PATH = "data/"
device = "cuda"
seed = 42
file_name = PATH + "train.csv"

In [3]:
class MNIST(Dataset):
    def __init__(self, X, y):
        self.y = torch.from_numpy(y).long().to(device)
        X = X.reshape(X.shape[0], 1, 28, 28)
        X = X / 255.0
        self.X = torch.from_numpy(X).float().to(device)
    
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return (self.X[idx,:,:,:], self.y[idx])

In [4]:
df = pd.read_csv(file_name)
y = df["label"].values
X = df.drop("label", axis=1).values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)

In [5]:
batch_size = 256
train_dataset = MNIST(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

test_dataset = MNIST(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

# Inception V1

![title](data/Inception.png)

In [6]:
class BnLayer(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
                              padding=padding, bias=False)
        
        self.bn = nn.BatchNorm2d(out_channels)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = F.relu(x)
        return x

In [7]:
class InceptionBlock(nn.Module):
    def __init__(self, in_channels, channels_1x1, channels_1x1_3x3, channels_3x3,
                 channels_1x1_5x5, channels_5x5, channels_1x1_pool):
                                                
        super().__init__()
        
        # 1x1 conv
        self.b1 = nn.Sequential(BnLayer(in_channels, channels_1x1, kernel_size=1))
        
        # 1x1 conv -> 3x3 conv
        self.b2 = nn.Sequential(
            BnLayer(in_channels, channels_1x1_3x3, kernel_size=1),
            BnLayer(channels_1x1_3x3, channels_3x3, kernel_size=3, padding=1)
        )
        
        # 1x1 conv -> 5x5 conv
        self.b3 = nn.Sequential(
            BnLayer(in_channels, channels_1x1_5x5, kernel_size=1),
            BnLayer(channels_1x1_5x5, channels_5x5, kernel_size=5, padding=2)
        )    
                 
        # Max pooling -> 1x1 conv
        self.b4 = nn.Sequential(
            nn.MaxPool2d(3, stride=1, padding=1),
            BnLayer(in_channels, channels_1x1_pool, kernel_size=1)
        )  
    
    def forward(self, x):
        x1 = self.b1(x)
        x2 = self.b2(x)
        x3 = self.b3(x)
        x4 = self.b4(x)
        x_cat = torch.cat([x1, x2, x3, x4], dim=1)         
        return x_cat

In [8]:
class InceptionNet(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.init_layer = BnLayer(1, 16, kernel_size=5, stride=2, padding=2)
        
        self.module_1 = InceptionBlock(16, 8, 4, 8, 4, 8, 8)
        self.module_2 = InceptionBlock(32, 16, 4, 16, 4, 16, 16)
        
        self.dropout = nn.Dropout(p=0.5)
        self.out = nn.Linear(64, num_classes)
    
    def forward(self, x):
        x = self.init_layer(x)
        x = F.max_pool2d(x, 2)
        x = self.module_1(x)
        x = F.max_pool2d(x, 2)
        x = self.module_2(x)            
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.size(0), -1)
        
        x = self.dropout(x)
        x = self.out(x)
        return F.log_softmax(x, dim=-1)

In [9]:
n_epochs = 50
lr = 1e-3
weight_decay = 1e-5

torch.manual_seed(seed)
model = InceptionNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

In [10]:
torch.manual_seed(seed)
for epoch in range(0, n_epochs):
    model.train()
    train_loss = 0
    for _, (x, y) in enumerate(train_loader):
            model.zero_grad()
            output = model(x)
            batch_loss = F.nll_loss(output, y)
            
            batch_loss.backward()
            optimizer.step()
            train_loss += batch_loss / x.shape[0]
    
    train_loss = np.round(train_loss.cpu().detach().numpy() / len(train_loader), 6)
    
    model.eval()
    test_loss = 0
    test_acc = 0
    for _, (x, y) in enumerate(test_loader):
        output = model(x)
        batch_loss = F.nll_loss(output, y)
        test_loss += batch_loss / x.shape[0]
        
        y_hat = torch.argmax(output, dim=1)
        acc = (y_hat == y).sum().float() / x.shape[0]
        acc = acc.cpu().detach().numpy()
        test_acc += acc
    
    test_loss = np.round(test_loss.cpu().detach().numpy() / len(test_loader), 6)
    test_acc = np.round(test_acc / len(test_loader), 4)
        
    if epoch % 5 == 0:
        print("Epoch {}: train loss {}, test loss {}, test accuracy {}".format(epoch,
                                                                               train_loss,
                                                                               test_loss,
                                                                               test_acc))
    
    if epoch % 10 == 0 and epoch != 0:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group["lr"] * 0.5
        print("-----------------------------")
        print("Decreasing learning rate to: {}".format(param_group["lr"]))
        print("-----------------------------")

Epoch 0: train loss 0.006775, test loss 0.003857, test accuracy 0.8346
Epoch 5: train loss 0.000818, test loss 0.000516, test accuracy 0.9664
Epoch 10: train loss 0.000529, test loss 0.000356, test accuracy 0.9753
-----------------------------
Decreasing learning rate to: 0.0005
-----------------------------
Epoch 15: train loss 0.000431, test loss 0.000309, test accuracy 0.9785
Epoch 20: train loss 0.000389, test loss 0.000294, test accuracy 0.9791
-----------------------------
Decreasing learning rate to: 0.00025
-----------------------------
Epoch 25: train loss 0.000341, test loss 0.000279, test accuracy 0.9792
Epoch 30: train loss 0.000328, test loss 0.000273, test accuracy 0.9793
-----------------------------
Decreasing learning rate to: 0.000125
-----------------------------
Epoch 35: train loss 0.000316, test loss 0.000265, test accuracy 0.9804
Epoch 40: train loss 0.000309, test loss 0.000266, test accuracy 0.9802
-----------------------------
Decreasing learning rate to: 6.25