In [382]:
import torch
import data_loader
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

In [422]:
# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [383]:
# helper.py

# Count the number of parameters
def count_param(model):
    return sum([torch.numel(param) for param in model.parameters()])

In [466]:
train_loader, test_loader = load_data(N=1000, batch_size=50, seed=42)

In [445]:
def compute_nb_errors(model, data_loader):

    nb_data_errors = 0

    for data_input, data_target, data_classes in data_loader:
        data_target = torch.nn.functional.one_hot(data_target)
        output = model(data_input)
        nb_error = torch.sum(torch.argmax(output, dim=1, keepdim=True) != torch.argmax(data_target, dim=1, keepdim=True))
        nb_data_errors += nb_error
        
    return nb_data_errors

In [496]:
def compute_nb_errors_siamese(model, data_loader):

    nb_data_errors = 0
    for data_input, data_target, data_classes in data_loader:
        data_1, data_2 = data_input.unbind(1)               
        output = model(data_1.unsqueeze(1), data_2.unsqueeze(1))
        data_target = torch.nn.functional.one_hot(data_target)
        nb_error = torch.sum(torch.argmax(output, dim=1, keepdim=True) != torch.argmax(data_target, dim=1, keepdim=True))
        nb_data_errors += nb_error
        
    return nb_data_errors

In [458]:
class BaseNet(nn.Module):
    def __init__(self):
        super(BaseNet, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=5)    # size [nb, 32, 10, 10]
        self.conv2 = nn.Conv2d(32, 64, kernel_size=2)   # size [nb, 64, 4, 4]
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)
        self.fc3 = nn.Linear(10, 2)
        
    def forward(self, x):        
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2)) # size [nb, 32, 5, 5]      
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2)) # size [nb, 64, 2, 2]
        x = x.view(-1, 256) # size [nb, 256]
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

In [536]:
class SiameseBaseNet(nn.Module):
    def __init__(self):
        super(SiameseBaseNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)    # size [nb, 32, 10, 10]
        self.conv2 = nn.Conv2d(32, 64, kernel_size=2)   # size [nb, 64, 4, 4]
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)
        self.fc3 = nn.Linear(20, 2)
        
    def convs(self, x):        
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2)) # size [nb, 32, 5, 5]      
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2)) # size [nb, 64, 2, 2]
        return x
    
    def forward(self, x1, x2):
        x1 = self.convs(x1)
        x1 = x1.view(-1, 256)
        x1 = F.relu((self.fc1(x1)))
        x1 = F.relu(self.fc2(x1))
        
        x2 = self.convs(x2)
        x2 = x2.view(-1, 256)
        x2 = F.relu(self.fc1(x2))
        x2 = F.relu(self.fc2(x2))
        
        x = torch.cat([x1, x2], dim=1)
        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))
        
        x = torch.sigmoid(self.fc3(x))
        
        return x

<torch.utils.data.dataloader.DataLoader at 0x7fbcde9fd970>

In [544]:
model = BaseNet()

# Calculate the number of parameters in the model
count_param(model)

63320

In [543]:
model = SiameseBaseNet()

# Calculate the number of parameters in the model
count_param(model)

62540

In [530]:
def train(model, train_loader, eta, decay, n_epochs=25, verbose=False, siamese=False):

    binary_crit = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=eta, weight_decay=decay)

    tr_losses = []
    tr_accuracies = []

    for e in range(n_epochs):
        # Reset training/validation loss
        tr_loss = 0

        # Training model
        model.train()

        for train_input, train_target, train_classes in iter(train_loader):
            # Forward pass
            if siamese == True:
                train_1, train_2 = train_input.unbind(1)               
                output = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
            else:
                output = model(train_input)
            # Binary classification loss
            binary_loss = binary_crit(output, train_target)
        
            # Total loss = Binary loss
            tr_loss += binary_loss

            # Backward pass
            optimizer.zero_grad()
            binary_loss.backward()
            optimizer.step()

        # Collect accuracy data
        # tr_accuracies.append(compute_nb_errors_siamese(model, train_loader)/1000)

        # Collect loss data
        tr_losses.append(tr_loss)

        if verbose:
            print('Epoch %d/%d, Binary loss: %.3f' %
                  (e+1, n_epochs, tr_loss))

In [538]:
import time

time1 = time.perf_counter()
model = SiameseBaseNet()
#model = BaseNet()
train(model, train_loader, 0.001, 0, 25, verbose=True, siamese=True)
time2 = time.perf_counter()
print('Spend {:e} s'.format(time2 - time1))

tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
print(tr_accuracy, te_accuracy)

Epoch 1/25, Binary loss: 13.699
Epoch 2/25, Binary loss: 12.367
Epoch 3/25, Binary loss: 10.409
Epoch 4/25, Binary loss: 9.507
Epoch 5/25, Binary loss: 8.995
Epoch 6/25, Binary loss: 8.528
Epoch 7/25, Binary loss: 8.152
Epoch 8/25, Binary loss: 7.807
Epoch 9/25, Binary loss: 7.594
Epoch 10/25, Binary loss: 7.456
Epoch 11/25, Binary loss: 7.214
Epoch 12/25, Binary loss: 7.017
Epoch 13/25, Binary loss: 6.907
Epoch 14/25, Binary loss: 6.809
Epoch 15/25, Binary loss: 6.725
Epoch 16/25, Binary loss: 6.666
Epoch 17/25, Binary loss: 6.637
Epoch 18/25, Binary loss: 6.610
Epoch 19/25, Binary loss: 6.598
Epoch 20/25, Binary loss: 6.581
Epoch 21/25, Binary loss: 6.572
Epoch 22/25, Binary loss: 6.570
Epoch 23/25, Binary loss: 6.565
Epoch 24/25, Binary loss: 6.562
Epoch 25/25, Binary loss: 6.559
Spend 9.707771e+00 s
tensor(0.9860) tensor(0.8370)


In [539]:
accuracies = []
times = []

for i in range(10):
    time1 = time.perf_counter()
    model = SiameseBaseNet()
    #model = BaseNet()
    train(model, train_loader, 0.001, 0, 25, verbose=False, siamese=True)
    time2 = time.perf_counter()
    times.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies.append(te_accuracy)

tensor(0.9830) tensor(0.8430)
tensor(0.9760) tensor(0.8370)
tensor(0.9700) tensor(0.8480)
tensor(0.9810) tensor(0.8440)
tensor(0.9750) tensor(0.8470)
tensor(0.9750) tensor(0.8400)
tensor(0.9750) tensor(0.8380)
tensor(0.9870) tensor(0.8560)
tensor(0.9840) tensor(0.8390)
tensor(0.9880) tensor(0.8410)


In [540]:
times

[11.573083244002191,
 10.595573466998758,
 10.637857167006587,
 9.038797736007837,
 9.342131892000907,
 9.268388575990684,
 9.249736006007879,
 9.293062070006272,
 9.045610155997565,
 9.073594603003585]

In [541]:
accuracies1 = []
times1 = []

for i in range(10):
    time1 = time.perf_counter()
    #model = SiameseBaseNet()
    model = BaseNet()
    train(model, train_loader, 0.001, 0, 25, verbose=False, siamese=False)
    time2 = time.perf_counter()
    times1.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies1.append(te_accuracy)

tensor(0.9690) tensor(0.8130)
tensor(0.9760) tensor(0.8160)
tensor(0.9800) tensor(0.8150)
tensor(0.9730) tensor(0.8150)
tensor(0.9770) tensor(0.8200)
tensor(0.9740) tensor(0.8160)
tensor(0.9820) tensor(0.8010)
tensor(0.9840) tensor(0.8230)
tensor(0.9680) tensor(0.8330)
tensor(0.9810) tensor(0.8240)


In [542]:
times1

[5.620303037008853,
 4.7123231459991075,
 4.686643656998058,
 4.621934707000037,
 4.853875696004252,
 5.68401227099821,
 5.850134880995029,
 5.8803767320059706,
 5.72162740699423,
 5.136375185000361]

In [419]:
gammas = torch.logspace(start=-4, end=-2, steps=5)
decays = torch.logspace(start=-13, end=-8, steps=6)
accuracies = torch.empty((len(gammas), len(decays)))
model = BaseNet()
for j in range(len(gammas)):
    for k in range(len(decays)):
        accurate = []
        for i in range(10):
            train_loader, test_loader = load_data(N=1000, batch_size=50, seed=42)
            train(model, train_loader, gammas[j], decays[k], 25, verbose=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        accuracies[j,k] = torch.Tensor(accurate).mean()

KeyboardInterrupt: 

In [420]:
accuracies

tensor([[0.8026, 0.8156, 0.8151, 0.8154, 0.8163, 0.8181],
        [0.8172, 0.8175, 0.8170, 0.8170, 0.8168, 0.8193],
        [0.8100, 0.8058, 0.8081, 0.8109, 0.8148, 0.8128],
        [0.7738, 0.7307, 0.7360, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000]])