In [146]:
import dlc_practical_prologue as prologue
import torch
from torch import  nn
from torch.nn import functional as F
import math
from torch import optim
from torch import Tensor
from torch import nn

In [147]:
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(1000)

In [148]:
IMAGE_SIZE = 196
NUM_CLASSES = 10

In [149]:
train_input_1 = train_input[:, 0, :, :]
train_input_2 = train_input[:, 1, :, :]
test_input_1 = test_input[:, 0, :, :]
test_input_2 = test_input[:, 1, :, :]
train_classes_1 = train_classes[:, 0]
train_classes_2 = train_classes[:, 1]
test_classes_1 = test_classes[:, 0]
test_classes_2 = test_classes[:, 1]


In [321]:
class ShallowFullyConncectedNet(nn.Module):
    def __init__(self, nb_hidden):
        super(ShallowFullyConncectedNet, self).__init__()
        self.fc1 = nn.Linear(IMAGE_SIZE, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, NUM_CLASSES)
        self.name = f"ShallowFullyConncectedNet({nb_hidden})"

    def forward(self, x):
        x = F.relu(self.fc1(x.view(-1, IMAGE_SIZE)))
        x = self.fc2(x)
        return x

In [322]:
class DeepFullyConncectedNet(nn.Module):
    def __init__(self, nb_layers):
        super(DeepFullyConncectedNet, self).__init__()
        self.layers = nn.ModuleList([])
        self.name = f"DeepFullyConncectedNet({nb_layers})"
        acc = IMAGE_SIZE
        if nb_layers % 2 !=0:
            nb_layers = nb_layers - 1
        for l in range(nb_layers):
            if l < nb_layers/2:
                self.layers.append(nn.Linear(acc, acc*2))
                acc = acc*2
            else:
                self.layers.append(nn.Linear(acc, int(acc/2)))
                acc = int(acc/2)
        self.layers.append(nn.Linear(IMAGE_SIZE, 10))

        

    def forward(self, x):
        acc = IMAGE_SIZE
        for l in range(len(self.layers)-1):
            x = F.relu(self.layers[l](x.view(-1, acc)))
            if l < (len(self.layers)-1)/2:
                acc = acc*2
            else:
                acc = int(acc/2)
        x = self.layers[len(self.layers)-1](x)

       
        return x

In [323]:
# TODO
# class BasicCNN(nn.Module):
#     def __init__(self):
#         super(Net2, self).__init__()
#         nb_hidden = 200
#         self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
#         self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
#         self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
#         self.fc1 = nn.Linear(9 * 64, nb_hidden)
#         self.fc2 = nn.Linear(nb_hidden, 10)

#     def forward(self, x):
#         x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
#         x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
#         x = F.relu(self.conv3(x))
#         x = F.relu(self.fc1(x.view(-1, 9 * 64)))
#         x = self.fc2(x)
#         return x

In [324]:
# TODO 
# class ResNetBlock(nn.Module):
#     def __init__(self, nb_channels, kernel_size,
#                  skip_connections = True, batch_normalization = True):
#         super(ResNetBlock, self).__init__()

#         self.conv1 = nn.Conv2d(nb_channels, nb_channels,
#                                kernel_size = kernel_size,
#                                padding = (kernel_size - 1) // 2)

#         self.bn1 = nn.BatchNorm2d(nb_channels)

#         self.conv2 = nn.Conv2d(nb_channels, nb_channels,
#                                kernel_size = kernel_size,
#                                padding = (kernel_size - 1) // 2)

#         self.bn2 = nn.BatchNorm2d(nb_channels)

#         self.skip_connections = skip_connections
#         self.batch_normalization = batch_normalization
        

#     def forward(self, x):
#         y = self.conv1(x)
#         if self.batch_normalization: y = self.bn1(y)
#         y = F.relu(y)
#         y = self.conv2(y)
#         if self.batch_normalization: y = self.bn2(y)
#         if self.skip_connections: y = y + x
#         y = F.relu(y)

#         return y
    

# class ResNet(nn.Module):

#     def __init__(self, nb_residual_blocks, nb_channels,
#                  kernel_size = 3, nb_classes = 10,
#                  skip_connections = True, batch_normalization = True):
#         super(ResNet, self).__init__()

#         self.conv = nn.Conv2d(3, nb_channels,
#                               kernel_size = kernel_size,
#                               padding = (kernel_size - 1) // 2)
#         self.bn = nn.BatchNorm2d(nb_channels)

#         self.resnet_blocks = nn.Sequential(
#             *(ResNetBlock(nb_channels, kernel_size, skip_connections, batch_normalization)
#               for _ in range(nb_residual_blocks))
#         )

#         self.fc = nn.Linear(nb_channels, nb_classes)

#     def forward(self, x):
#         x = F.relu(self.bn(self.conv(x)))
#         x = self.resnet_blocks(x)
#         x = F.avg_pool2d(x, 32).view(x.size(0), -1)
#         x = self.fc(x)
#         return x


In [325]:
def train_model(model, train_input, train_target, mini_batch_size, eta, criterion = nn.CrossEntropyLoss(), nb_epochs = 25):
   
    optimizer = optim.SGD(model.parameters(), eta)
    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [326]:
def compute_nb_errors(model, input1, input2, classes1, classes2, target, mini_batch_size):
    nb_errors = 0
    for b in range(0, input1.size(0), mini_batch_size):
        output1 = model(input1.narrow(0, b, mini_batch_size))
        output2 = model(input2.narrow(0, b, mini_batch_size))
        _, predicted_classes1 = output1.max(1)
        _, predicted_classes2 = output2.max(1)
        for k in range(mini_batch_size):
            if target[b + k] != (predicted_classes1[k] <= predicted_classes2[k]):
                nb_errors = nb_errors + 1

    return nb_errors

In [327]:
def train_test(model, mini_batch_size, eta, criterion, nb_epochs):
    train = torch.cat((train_input_1, train_input_2), 0)
    target = torch.cat((train_classes_1, train_classes_2), 0)

    train_model(model, train, target, mini_batch_size, eta, criterion, nb_epochs)
    train_error = compute_nb_errors(model, train_input_1, train_input_2, train_classes_1, train_classes_2, train_target, mini_batch_size) / target.size(0) * 100
    test_error = compute_nb_errors(model, test_input_1, test_input_2, test_classes_1, test_classes_2, test_target, mini_batch_size) / target.size(0) * 100
    print(f'MODEL: {model.name}, BATCH_SIZE: { mini_batch_size}, CRITERION: {str(criterion)}, EPOCHS: {nb_epochs}, train_error:{train_error}%, test_error: {test_error}%')

In [317]:
train_test(ShallowFullyConncectedNet(1000), mini_batch_size=100, eta = 1e-1, criterion =nn.CrossEntropyLoss(), nb_epochs = 25)

MODEL: ShallowFullyConncectedNet, BATCH_SIZE: 100, CRITERION: CrossEntropyLoss(), EPOCHS: 25, train_error:21.95%, test_error: 23.3%


In [328]:
train_test(DeepFullyConncectedNet(7), 100, eta = 1e-1, criterion =nn.CrossEntropyLoss(), nb_epochs = 25)

MODEL: DeepFullyConncectedNet(7), BATCH_SIZE: 100, CRITERION: CrossEntropyLoss(), EPOCHS: 25, train_error:22.05%, test_error: 23.25%
