In [4]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import torchvision.transforms as transforms

In [14]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))])

#Load train and test data
train_dataset = MNIST(root='./mnist_data', train=True, download=True, transform=transform)
test_dataset = MNIST(root='./mnist_data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(train_dataset, batch_size=128)


tensor([[ 0.5843,  0.9765,  0.9843,  0.9843,  0.9843],
        [ 0.9843,  0.9843,  0.9843,  0.9843,  0.9843],
        [ 0.9843,  0.9843,  0.9843,  0.9843,  0.8431],
        [ 0.9843,  0.9843,  0.1216, -0.2235, -0.7255],
        [ 0.9843,  0.2157, -0.9922, -1.0000, -1.0000]])


In [27]:
#Model
class Model(nn.Module):
    def __init__(self, use_learnable_skip=False):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 3, padding='same')
        self.act1 = nn.LeakyReLU()
        self.conv2 = nn.Conv2d(6, 12, 3, padding='same')
        self.act2 = nn.LeakyReLU()
        self.conv3 = nn.Conv2d(12, 24, 7) #output of size Nx24x22x22
        self.act3 = nn.LeakyReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2) #output of size Nx24x11x11
        self.conv4 = nn.Conv2d(24, 24, 5) #output of size Nx24x7x7
        self.act4 = nn.LeakyReLU()
        self.fc = nn.Linear(24*7*7, 10)
        self.act_final = nn.LogSoftmax()

        self.use_learnable_skip = use_learnable_skip
        self.t = nn.Parameter(torch.ones(1) * 0.5)

    def forward(self, input):
        x = self.conv1(input)
        x = self.act1(x)
        x = self.conv2(x)
        if self.use_learnable_skip:
            x = self.act2(x) * self.t + input * (1-self.t)
        else:
            x = self.act2(x) + input
        x = self.conv3(x)
        x = self.act3(x)
        x = self.pool(x)
        x = self.conv4(x)
        x = self.act4(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
        return self.act_final(x)
        

In [21]:
model = Model()
optimizer = Adam(model.parameters())
loss_fct = nn.CrossEntropyLoss()

for epoch in range(3):
    running_loss = 0.

    for i, batch in enumerate(train_loader):
        X, y = batch
        pred = model(X)
        loss = loss_fct(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

  return self._call_impl(*args, **kwargs)


[1,   100] loss: 0.421
[1,   200] loss: 0.105
[1,   300] loss: 0.085
[1,   400] loss: 0.069
[2,   100] loss: 0.049
[2,   200] loss: 0.048
[2,   300] loss: 0.048
[2,   400] loss: 0.046
[3,   100] loss: 0.033
[3,   200] loss: 0.036
[3,   300] loss: 0.035
[3,   400] loss: 0.033


In [24]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        X, y = data
        pred = model(X)

        _, predicted = torch.max(pred.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()

print(f"Accuracy on test set: {correct / total * 100}%")

  return self._call_impl(*args, **kwargs)


Accuracy on test set: 99.20833333333333%


In [28]:
# This cell and the next one are copies of the previous ones but with the extra learnable parameter.

model2 = Model(use_learnable_skip=True)
optimizer2 = Adam(model2.parameters())
loss_fct2 = nn.CrossEntropyLoss()

for epoch in range(3):
    running_loss = 0.

    for i, batch in enumerate(train_loader):
        X, y = batch
        pred = model2(X)
        loss = loss_fct2(pred, y)
        
        optimizer2.zero_grad()
        loss.backward()
        optimizer2.step()

        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

  return self._call_impl(*args, **kwargs)


[1,   100] loss: 0.485
[1,   200] loss: 0.121
[1,   300] loss: 0.089
[1,   400] loss: 0.074
[2,   100] loss: 0.056
[2,   200] loss: 0.053
[2,   300] loss: 0.048
[2,   400] loss: 0.047
[3,   100] loss: 0.039
[3,   200] loss: 0.036
[3,   300] loss: 0.029
[3,   400] loss: 0.038


In [29]:
correct2 = 0
total2 = 0
with torch.no_grad():
    for data in test_loader:
        X, y = data
        pred = model2(X)

        _, predicted = torch.max(pred.data, 1)
        total2 += y.size(0)
        correct2 += (predicted == y).sum().item()

print(f"Accuracy on test set USING OUR AMAZING IDEA: {correct2 / total2 * 100}%")

  return self._call_impl(*args, **kwargs)


Accuracy on test set USING OUR AMAZING IDEA: 99.32166666666666%
