In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import dlc_practical_prologue as prologue
import time

In [2]:
from MLP_models import *
from CNN_models import *
from Resnet_models import *
from helpers import *
from data_loader import *

In [3]:
device = torch.device('cpu')

In [3]:
# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

#### Number of parameters of each model

In [4]:
model_1 = MLP()
model_2 = SiameseMLP()
model_3 = AuxMLP()
model_4 = AuxsiameseMLP()
model_5 = CNN()
model_6 = SiameseCNN()
model_7 = AuxCNN()
model_8 = AuxsiameseCNN()
model_9 = ResNet()
model_10 = SiameseResNet()
model_11 = AuxResNet()
model_12 = AuxsiameseResNet()

models = [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, model_10, model_11, model_12]
for i in range(len(models)):
    #print('The number of parameters in' count_param(i))
    print('The number of parameters in model_%d is %d' %
                  (i+1, count_param(models[i])))

The number of parameters in model_1 is 73314
The number of parameters in model_2 is 33172
The number of parameters in model_3 is 66302
The number of parameters in model_4 is 33172
The number of parameters in model_5 is 72536
The number of parameters in model_6 is 72268
The number of parameters in model_7 is 144494
The number of parameters in model_8 is 72268
The number of parameters in model_9 is 75746
The number of parameters in model_10 is 77812
The number of parameters in model_11 is 152692
The number of parameters in model_12 is 77812


#### Training function

In [23]:
def train(model, train_loader, eta, decay, n_epochs=25, verbose=False, siamese=False, aux=False, alpha=0.0):
    '''
    model: learning model
    '''
    binary_crit = torch.nn.BCELoss()
    aux_crit = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=eta, weight_decay=decay)
    tr_losses = []
    tr_accuracies = []

    for e in range(n_epochs):
        # Reset training/validation loss
        tr_loss = 0

        # Training model
        model.train()

        for train_input, train_target, train_classes in iter(train_loader):
            # Forward pass
            
            if aux == True:
                train_1, train_2 = train_input.unbind(1)
                output, aux1, aux2 = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
                
            elif siamese == True:
                train_1, train_2 = train_input.unbind(1)
                output = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
                
            else:
                output = model(train_input)
                
            # Binary classification loss
            binary_loss = binary_crit(output, train_target.float())
            total_loss = binary_loss
            
            # Auxiliary loss
            if aux == True:

                aux_loss1 = aux_crit(aux1, train_classes[:,0])
                aux_loss2 = aux_crit(aux2, train_classes[:,1])
                aux_loss = aux_loss1 + aux_loss2
                
                # Total loss = Binary loss + aux loss * alpha
                total_loss = binary_loss + aux_loss * alpha
            
            tr_loss += total_loss

            # Backward pass
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        # Collect loss data
        tr_losses.append(tr_loss)

        if verbose:
            print('Epoch %d/%d, Binary loss: %.3f' %
                  (e+1, n_epochs, tr_loss))
    return tr_losses

#### Accuracy function

In [6]:
def accu(model, train_loader, test_loader, siamese = False, aux = False):
    
    if aux == True:
        tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    elif siamese == True:
        tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
        te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    else:
        tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
        te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            
    return tr_accuracy, te_accuracy

#### Tune learning rate and batch size for each model

In [33]:
Train_model = [[MLP, False, False],
              [SiameseMLP, True, False],
              [AuxMLP, False, True],
              [AuxsiameseMLP, True, True],
              [CNN, False, False],
              [SiameseCNN, True, False],
              [AuxCNN, False, True],
              [AuxsiameseCNN, True, True],
              [ResNet, False, False],
              [SiameseResNet, True, False],
              [AuxResNet, False, True],
              [AuxsiameseResNet, True, True]]

In [34]:
accuracy = []
std = []
model_number = 0
for models in Train_model:
    gammas = [5e-3, 1e-3, 5e-4, 1e-4]
    batch_sizes = [8, 16, 32, 64, 128]
    test_accuracies = torch.empty((len(gammas), len(batch_sizes)))
    test_stds = torch.empty((len(gammas), len(batch_sizes)))

    for j in range(len(gammas)):
        for k in range(len(batch_sizes)):
            accurate = []
            for i in range(10):
                model = models[0]()
                model.to(device)
                train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
                train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=models[1], aux=models[2])
                tr_accuracy, te_accuracy = accu(model, train_loader, test_loader, models[1], models[2])
                accurate.append(te_accuracy)
            test_accuracies[j,k] =  torch.FloatTensor(accurate).mean()
            test_stds[j,k] =  torch.FloatTensor(accurate).std()
    accuracy.append(test_accuracies)
    std.append(test_stds)
    max_index = test_accuracies.argmax() 
    model_number += 1
    print('The optimal parameters for model_%d: learning rate %.5f, batch size: %d' %(model_number, gammas[max_index//5], batch_sizes[(max_index+1)%5-1]))

The optimal parameters for model_1: learning rate 0.00100, batch size: 16


KeyboardInterrupt: 

In [35]:
accuracy

[tensor([[0.7940, 0.7972, 0.8029, 0.8010, 0.8049],
         [0.8049, 0.8090, 0.8051, 0.8017, 0.8021],
         [0.8054, 0.8039, 0.8035, 0.8017, 0.7962],
         [0.7956, 0.7924, 0.7865, 0.7810, 0.7699]])]

#### Tune alpha for models with auxiliary loss

In [19]:
Train_auxmodel = [[AuxMLP, False, True, 0.005, 32],
              [AuxsiameseMLP, True, True, 0.005, 32],
              [AuxCNN, False, True, 0.001, 32],
              [AuxsiameseCNN, True, True, 0.0001, 128],
              [AuxResNet, False, True, 0.001, 32],
              [AuxsiameseResNet, True, True, 0.001, 32]]

In [14]:
accuracy_aux = []
std_aux = []
model_number_aux = 0
for models in [Train_auxmodel[3]]:
    print(models)
    test_accuracies_aux = torch.empty((1, 11))
    test_stds_aux = torch.empty((1,11))    
    for j in range(11):
        accurate_aux = []
        for i in range(10):
            model = models[0]()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size = models[4], seed=i)
            train(model, train_loader, models[3], 0, 25, verbose=True, siamese=models[1], aux=models[2], alpha = j/10)
            tr_accuracy, te_accuracy = accu(model, train_loader, test_loader, siamese=models[1], aux=models[2])
            accurate_aux.append(te_accuracy)
        test_accuracies_aux[0,j] =  torch.FloatTensor(accurate_aux).mean()
        test_stds_aux[0,j] =  torch.FloatTensor(accurate_aux).std()
    accuracy_aux.append(test_accuracies_aux)
    std_aux.append(test_stds_aux)
    max_index = test_accuracies_aux.argmax() 
    model_number_aux += 1
    print('The optimal alpha for aux_model_%d is %.2f' %(model_number_aux, max_index/10))

[<class 'CNN_models.AuxsiameseCNN'>, True, True, 0.0001, 128]
Epoch 1/25, Binary loss: 5.569
Epoch 2/25, Binary loss: 5.521
Epoch 3/25, Binary loss: 5.493
Epoch 4/25, Binary loss: 5.470
Epoch 5/25, Binary loss: 5.442
Epoch 6/25, Binary loss: 5.423
Epoch 7/25, Binary loss: 5.399
Epoch 8/25, Binary loss: 5.370
Epoch 9/25, Binary loss: 5.335
Epoch 10/25, Binary loss: 5.296
Epoch 11/25, Binary loss: 5.242
Epoch 12/25, Binary loss: 5.183
Epoch 13/25, Binary loss: 5.119
Epoch 14/25, Binary loss: 5.038
Epoch 15/25, Binary loss: 4.949
Epoch 16/25, Binary loss: 4.880
Epoch 17/25, Binary loss: 4.791
Epoch 18/25, Binary loss: 4.705
Epoch 19/25, Binary loss: 4.616
Epoch 20/25, Binary loss: 4.530
Epoch 21/25, Binary loss: 4.438
Epoch 22/25, Binary loss: 4.361
Epoch 23/25, Binary loss: 4.280
Epoch 24/25, Binary loss: 4.180
Epoch 25/25, Binary loss: 4.120
Epoch 1/25, Binary loss: 5.565
Epoch 2/25, Binary loss: 5.522
Epoch 3/25, Binary loss: 5.476
Epoch 4/25, Binary loss: 5.434
Epoch 5/25, Binary loss

KeyboardInterrupt: 

In [21]:
accuracy_aux = []
std_aux = []
model_number_aux = 0
for models in Train_auxmodel:
    test_accuracies_aux = torch.empty((1, 11))
    test_stds_aux = torch.empty((1,11))    
    for j in range(11):
        accurate_aux = []
        for i in range(10):
            model = models[0]()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size = models[4], seed=i)
            train(model, train_loader, models[3], 0, 25, verbose=False, siamese=models[1], aux=models[2], alpha = j/10)
            tr_accuracy, te_accuracy = accu(model, train_loader, test_loader, siamese=models[1], aux=models[2])
            accurate_aux.append(te_accuracy)
        test_accuracies_aux[0,j] =  torch.FloatTensor(accurate_aux).mean()
        test_stds_aux[0,j] =  torch.FloatTensor(accurate_aux).std()
    accuracy_aux.append(test_accuracies_aux)
    std_aux.append(test_stds_aux)
    max_index = test_accuracies_aux.argmax() 
    model_number_aux += 1
    print('The optimal alpha for aux_model_%d is %.2f' %(model_number_aux, max_index/10))

The optimal alpha for aux_model_1 is 0.80
The optimal alpha for aux_model_2 is 1.00
The optimal alpha for aux_model_3 is 0.80
The optimal alpha for aux_model_4 is 0.00
The optimal alpha for aux_model_5 is 0.90
The optimal alpha for aux_model_6 is 0.80


In [22]:
accuracy_aux

[tensor([[0.8170, 0.8248, 0.8303, 0.8349, 0.8381, 0.8411, 0.8430, 0.8432, 0.8488,
          0.8440, 0.8472]]),
 tensor([[0.8447, 0.8457, 0.8537, 0.8534, 0.8562, 0.8666, 0.8639, 0.8682, 0.8687,
          0.8722, 0.8752]]),
 tensor([[0.8365, 0.8355, 0.8364, 0.8414, 0.8436, 0.8490, 0.8479, 0.8525, 0.8616,
          0.8584, 0.8532]]),
 tensor([[0.7969, 0.7944, 0.7895, 0.7766, 0.7578, 0.7373, 0.7253, 0.7099, 0.6919,
          0.6775, 0.6624]]),
 tensor([[0.8493, 0.8560, 0.8571, 0.8596, 0.8652, 0.8759, 0.8703, 0.8724, 0.8765,
          0.8817, 0.8780]]),
 tensor([[0.8693, 0.8782, 0.8795, 0.8845, 0.8826, 0.8842, 0.8854, 0.8859, 0.8907,
          0.8897, 0.8900]])]

In [14]:
test_accuracies

tensor([[7.5550e-01, 8.2610e-01, 8.3650e-01, 8.1070e-01, 8.3090e-01],
        [1.0561e-38, 7.3470e-39, 1.0194e-38, 9.2755e-39, 1.0653e-38],
        [4.1327e-39, 8.9082e-39, 1.0102e-38, 7.3470e-39, 1.0194e-38],
        [9.2755e-39, 1.0653e-38, 4.1327e-39, 8.9082e-39, 1.0102e-38]])

In [7]:
# Final data

In [12]:
import torch
x = torch.empty(4, 4)

In [9]:
13//7

1

In [23]:
x.max()

tensor(2.)

In [8]:
accuracies8 = []
times8 = []
losses8 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()
    #model = SiameseBaseNet()
    model = MLP()
    model.to(device)
    losses8[i-10, :] = torch.tensor(train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False))
    time2 = time.perf_counter()
    times8.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies8.append(te_accuracy)

print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies8).mean(), torch.tensor(accuracies8).std()))

tensor(1.) tensor(0.8110)
tensor(1.) tensor(0.8030)
tensor(1.) tensor(0.8140)
tensor(1.) tensor(0.8200)
tensor(1.) tensor(0.8110)
tensor(1.) tensor(0.8140)
tensor(1.) tensor(0.8180)
tensor(1.) tensor(0.8200)
tensor(1.) tensor(0.8110)
tensor(1.) tensor(0.8110)
Mean: 0.813, Std: 0.005


In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times8).mean(), torch.tensor(times8).std()))

In [None]:
accuracies9 = []
times9 = []
losses9 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()
    
    model = SiameseMLP()
    model.to(device)

    losses9[i-10,:] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True))
    time2 = time.perf_counter()
    times9.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies9.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies9).mean(), torch.tensor(accuracies9).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times9).mean(), torch.tensor(times9).std()))

In [None]:
accuracies10 = []
times10 = []
losses10 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()

    model = AuxMLP()
    model.to(device)
    losses10[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.9))
    time2 = time.perf_counter()
    times10.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies10.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies10).mean(), torch.tensor(accuracies10).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times10).mean(), torch.tensor(times10).std()))

In [None]:
accuracies11 = []
times11 = []
losses11 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()

    model = AuxsiameseMLP()
    model.to(device)
    losses11[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = 0.7))
    time2 = time.perf_counter()
    times11.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies11.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies11).mean(), torch.tensor(accuracies11).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times11).mean(), torch.tensor(times11).std()))

In [None]:
accuracies = []
times = []
losses = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()

    model = BaseNet()
    model.to(device)
    losses[i-10, :] = torch.tensor(train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False))
    time2 = time.perf_counter()
    times.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies.append(te_accuracy)

print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies).mean(), torch.tensor(accuracies).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times).mean(), torch.tensor(times).std()))

In [None]:
accuracies1 = []
times1 = []
losses1 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()
    model = SiameseBaseNet()
    model.to(device)

    losses1[i-10, :] = torch.tensor(train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=True))
    time2 = time.perf_counter()
    times1.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies1.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies1).mean(), torch.tensor(accuracies1).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times1).mean(), torch.tensor(times1).std()))

In [None]:
accuracies2 = []
times2 = []
losses2 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()

    model = AuxBaseNet()
    model.to(device)
    losses2[i-10, :] = torch.tensor(train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False, aux=True, alpha = 1.0))
    time2 = time.perf_counter()
    times2.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies2.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies2).mean(), torch.tensor(accuracies2).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times2).mean(), torch.tensor(times2).std()))

In [None]:
accuracies3 = []
times3 = []
losses3 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()
    
    model = AuxsiameseBaseNet()
    model.to(device)
    losses3[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = 0.6))
    time2 = time.perf_counter()
    times3.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies3.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies3).mean(), torch.tensor(accuracies3).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times3).mean(), torch.tensor(times3).std()))

In [7]:
accuracies4 = []
times4 = []
losses4 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()

    model = ResNet(nb_residual_blocks = 4, input_channels = 2, nb_channels = 16, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses4[i-10, :] = torch.tensor(train(model, train_loader, 1e-3, 0, 25, verbose=True, siamese=False))
    time2 = time.perf_counter()
    times4.append(time2 - time1)

    tr_accuracy, te_accuracy = accu(model, train_loader, test_loader, siamese=False, aux=False)
    print(tr_accuracy, te_accuracy)
    accuracies4.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies4).mean(), torch.tensor(accuracies4).std()))

Epoch 1/25, Binary loss: 19.887
Epoch 2/25, Binary loss: 15.464
Epoch 3/25, Binary loss: 12.644
Epoch 4/25, Binary loss: 9.707
Epoch 5/25, Binary loss: 7.833
Epoch 6/25, Binary loss: 5.845
Epoch 7/25, Binary loss: 4.467
Epoch 8/25, Binary loss: 2.233
Epoch 9/25, Binary loss: 1.382
Epoch 10/25, Binary loss: 0.663
Epoch 11/25, Binary loss: 0.480
Epoch 12/25, Binary loss: 0.254
Epoch 13/25, Binary loss: 0.166
Epoch 14/25, Binary loss: 0.127
Epoch 15/25, Binary loss: 0.094
Epoch 16/25, Binary loss: 0.076
Epoch 17/25, Binary loss: 0.062
Epoch 18/25, Binary loss: 0.052
Epoch 19/25, Binary loss: 0.047
Epoch 20/25, Binary loss: 0.040
Epoch 21/25, Binary loss: 0.037
Epoch 22/25, Binary loss: 0.034
Epoch 23/25, Binary loss: 0.031
Epoch 24/25, Binary loss: 0.027
Epoch 25/25, Binary loss: 0.025
tensor(1., device='cuda:0') tensor(0.8400, device='cuda:0')
Epoch 1/25, Binary loss: 20.017
Epoch 2/25, Binary loss: 14.624
Epoch 3/25, Binary loss: 11.656
Epoch 4/25, Binary loss: 9.744
Epoch 5/25, Binary 

Epoch 17/25, Binary loss: 0.063
Epoch 18/25, Binary loss: 0.055
Epoch 19/25, Binary loss: 0.049
Epoch 20/25, Binary loss: 0.042
Epoch 21/25, Binary loss: 0.038
Epoch 22/25, Binary loss: 0.034
Epoch 23/25, Binary loss: 0.030
Epoch 24/25, Binary loss: 0.025
Epoch 25/25, Binary loss: 0.024
tensor(1., device='cuda:0') tensor(0.8060, device='cuda:0')
Mean: 0.825, Std: 0.011


In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times4).mean(), torch.tensor(times4).std()))

In [None]:
accuracies5 = []
times5 = []
losses5 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()
    
    model = SiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses5[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True))
    time2 = time.perf_counter()
    times5.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies5.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies5).mean(), torch.tensor(accuracies5).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times5).mean(), torch.tensor(times5).std()))

In [None]:
accuracies6 = []
times6 = []
losses6 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()
    
    model = AuxResNet(nb_residual_blocks = 10, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses6[i-10, :] = torch.tensor(train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.6))
    time2 = time.perf_counter()
    times6.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies6.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies6).mean(), torch.tensor(accuracies6).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times6).mean(), torch.tensor(times6).std()))

In [None]:
accuracies7 = []
times7 = []
losses7 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()
    #model = SiameseBaseNet()
    #model = BaseNet()
    model = AuxsiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses7[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = 0.6))
    time2 = time.perf_counter()
    times7.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies7.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies7).mean(), torch.tensor(accuracies7).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times7).mean(), torch.tensor(times7).std()))

In [None]:
# Optimize learning rate and batch size

In [9]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies1 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = MLP()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies1[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies1)

tensor([[0.8057, 0.7989, 0.8030, 0.8060, 0.8008],
        [0.8062, 0.8070, 0.8065, 0.8033, 0.8010],
        [0.8071, 0.8070, 0.8051, 0.8005, 0.7952],
        [0.7974, 0.7917, 0.7891, 0.7792, 0.7682]])


In [30]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies1 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = MLP()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        print(torch.cuda.FloatTensor(accurate).mean())
        test_accuracies1[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies1)

tensor(0.8033, device='cuda:0')
tensor(0.7989, device='cuda:0')
tensor(0.8030, device='cuda:0')
tensor(0.8060, device='cuda:0')
tensor(0.8008, device='cuda:0')
tensor(0.8062, device='cuda:0')
tensor(0.8070, device='cuda:0')
tensor(0.8065, device='cuda:0')
tensor(0.8033, device='cuda:0')
tensor(0.8010, device='cuda:0')
tensor(0.8071, device='cuda:0')
tensor(0.8070, device='cuda:0')
tensor(0.8051, device='cuda:0')
tensor(0.8005, device='cuda:0')
tensor(0.7952, device='cuda:0')
tensor(0.7974, device='cuda:0')
tensor(0.7917, device='cuda:0')
tensor(0.7891, device='cuda:0')
tensor(0.7792, device='cuda:0')
tensor(0.7682, device='cuda:0')
tensor([[0.8033, 0.7989, 0.8030, 0.8060, 0.8008],
        [0.8062, 0.8070, 0.8065, 0.8033, 0.8010],
        [0.8071, 0.8070, 0.8051, 0.8005, 0.7952],
        [0.7974, 0.7917, 0.7891, 0.7792, 0.7682]])


In [12]:
device

device(type='cpu')

In [24]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies2 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = SiameseMLP()
            model.to(device)
            #model = BaseNet()
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=True)
            te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies2[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies2)

tensor([[0.8509, 0.8454, 0.8457, 0.8422, 0.8374],
        [0.8414, 0.8386, 0.8371, 0.8364, 0.8293],
        [0.8403, 0.8347, 0.8343, 0.8299, 0.8143],
        [0.8195, 0.8097, 0.7901, 0.7710, 0.7423]])


In [25]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies3 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = AuxMLP()
            model.to(device)
            #model = BaseNet()
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.0)
            te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies3[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies3)

tensor([[0.8190, 0.8242, 0.8229, 0.8176, 0.8115],
        [0.8141, 0.8120, 0.8107, 0.8063, 0.8106],
        [0.8127, 0.8092, 0.8097, 0.8116, 0.8016],
        [0.8058, 0.7968, 0.7874, 0.7730, 0.7571]])


In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies5 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = BaseNet()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies5[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies5)

In [26]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies6 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = SiameseCNN()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=True)
            te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies6[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies6)

tensor([[0.8511, 0.8231, 0.8442, 0.8428, 0.8415],
        [0.8533, 0.8563, 0.8511, 0.8492, 0.8524],
        [0.8526, 0.8517, 0.8529, 0.8496, 0.8533],
        [0.8486, 0.8502, 0.8439, 0.8259, 0.7883]])


In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies7 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = AuxBaseNet()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.0)
            te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies7[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies7)

In [27]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies9 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = ResNet(nb_residual_blocks = 4, input_channels = 2, nb_channels = 32, kernel_size = 3, nb_classes = 2)
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies9[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies9)

tensor([[0.8227, 0.8301, 0.8310, 0.8322, 0.8278],
        [0.8387, 0.8398, 0.8384, 0.8336, 0.8317],
        [0.8332, 0.8305, 0.8267, 0.8279, 0.8229],
        [0.8151, 0.8161, 0.8175, 0.8149, 0.8153]])


In [28]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies10 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = SiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            loss = train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=True)
            te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies10[j,k] =  torch.FloatTensor(accurate).mean()
print(test_accuracies10)

KeyboardInterrupt: 

In [29]:
test_accuracies10

tensor([[0.8156, 0.8483, 0.8466, 0.8430, 0.8749],
        [0.8823, 0.8833, 0.8772, 0.8711, 0.8317],
        [0.8332, 0.8305, 0.8267, 0.8279, 0.8229],
        [0.8151, 0.8161, 0.8175, 0.8149, 0.8153]])

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies11 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = AuxResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            loss = train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.0)
            te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies11[j,k] =  torch.FloatTensor(accurate).mean()
print(test_accuracies11)

In [None]:
# Optimize alpha for auxiliary loss

In [None]:
for j in range(11):
    accuracies100 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
        
        model = AuxMLP()
        model.to(device)
        loss = train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies100.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies100).mean(), torch.tensor(accuracies100).std()))

In [None]:
for j in range(11):
    accuracies101 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)

        model = AuxsiameseMLP()
        model.to(device)
        loss = train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies101.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies101).mean(), torch.tensor(accuracies101).std()))

In [None]:
for j in range(11):
    accuracies102 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)

        model = AuxsiameseBaseNet()
        model.to(device)
        loss = train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies102.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies102).mean(), torch.tensor(accuracies102).std()))

In [None]:
for j in range(11):
    accuracies103 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)

        model = AuxBaseNet()
        model.to(device)
        train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies103.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies103).mean(), torch.tensor(accuracies103).std()))

In [None]:
for j in range(11):
    accuracies104 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)

        model = AuxsiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
        model.to(device)
        train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies104.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies104).mean(), torch.tensor(accuracies104).std()))

In [None]:
for j in range(11):
    accuracies105 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=64, seed=i)

        model = AuxResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
        model.to(device)
        train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies105.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies105).mean(), torch.tensor(accuracies105).std()))

In [10]:
>>> import torch
>>> from torch import nn
>>> layer = nn.Conv2d(2, 3, kernel_size=3, stride=1, padding=0)
>>> layer.weight

Parameter containing:
tensor([[[[-0.2021, -0.2152,  0.0717],
          [-0.0338,  0.1757,  0.1063],
          [-0.2043, -0.0632, -0.1443]],

         [[ 0.1711, -0.2197, -0.0794],
          [-0.0367, -0.1827,  0.1783],
          [-0.0457, -0.1880, -0.1476]]],


        [[[-0.1065,  0.2347,  0.0664],
          [ 0.0989,  0.2181, -0.2000],
          [-0.2090,  0.1658, -0.0477]],

         [[ 0.1767, -0.0043,  0.0595],
          [ 0.0525,  0.2171,  0.1257],
          [ 0.0687, -0.2002,  0.1470]]],


        [[[ 0.0411, -0.1230,  0.2138],
          [ 0.0970,  0.0338,  0.1252],
          [ 0.0936, -0.1127, -0.1848]],

         [[ 0.1284, -0.2110,  0.1211],
          [-0.1395,  0.1456, -0.2234],
          [-0.1436, -0.2100,  0.0385]]]], requires_grad=True)