In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import dlc_practical_prologue as prologue
import time

In [2]:
from MLP_models import *
from CNN_models import *
from Resnet_models import *
from helpers import *
from data_loader import *

  return torch._C._cuda_getDeviceCount() > 0


In [3]:
# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

#### Number of parameters of each model

In [4]:
model_1 = MLP()
model_2 = SiameseMLP()
model_3 = AuxMLP()
model_4 = AuxsiameseMLP()
model_5 = CNN()
model_6 = SiameseCNN()
model_7 = AuxCNN()
model_8 = AuxsiameseCNN()
model_9 = ResNet()
model_10 = SiameseResNet()
model_11 = AuxResNet()
model_12 = AuxsiameseResNet()

models = [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, model_10, model_11, model_12]
for i in range(len(models)):
    #print('The number of parameters in' count_param(i))
    print('The number of parameters in model_%d is %d' %
                  (i+1, count_param(models[i])))

The number of parameters in model_1 is 73314
The number of parameters in model_2 is 33172
The number of parameters in model_3 is 66302
The number of parameters in model_4 is 33172
The number of parameters in model_5 is 72536
The number of parameters in model_6 is 72268
The number of parameters in model_7 is 144494
The number of parameters in model_8 is 72268
The number of parameters in model_9 is 75746
The number of parameters in model_10 is 77812
The number of parameters in model_11 is 152692
The number of parameters in model_12 is 77812


#### Training function

In [5]:
def train(model, train_loader, eta, decay, n_epochs=25, verbose=False, siamese=False, aux=False, alpha = 0):
    '''
    model: learning model
    '''
    binary_crit = torch.nn.BCELoss()
    aux_crit = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=eta, weight_decay=decay)
    tr_losses = []
    tr_accuracies = []

    for e in range(n_epochs):
        # Reset training/validation loss
        tr_loss = 0

        # Training model
        model.train()

        for train_input, train_target, train_classes in iter(train_loader):
            # Forward pass
            
            if aux == True:
                train_1, train_2 = train_input.unbind(1)
                output, aux1, aux2 = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
                
            elif siamese == True:
                train_1, train_2 = train_input.unbind(1)
                output = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
                
            else:
                output = model(train_input)
                
            # Binary classification loss
            binary_loss = binary_crit(output, train_target.float())
            total_loss = binary_loss
            
            # Auxiliary loss
            if aux == True:

                aux_loss1 = aux_crit(aux1, train_classes[:,0])
                aux_loss2 = aux_crit(aux2, train_classes[:,1])
                aux_loss = aux_loss1 + aux_loss2
                
                # Total loss = Binary loss + aux loss * alpha
                total_loss = binary_loss + aux_loss * alpha
            
            tr_loss += total_loss

            # Backward pass
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        # Collect loss data
        tr_losses.append(tr_loss)

        if verbose:
            print('Epoch %d/%d, Binary loss: %.3f' %
                  (e+1, n_epochs, tr_loss))
    return tr_losses

In [6]:
def accu(model, train_loader, test_loader, siamese = False, aux = False):
    
    if siamese == False and aux == False:
        tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
        te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    elif aux == True:
        tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    else:
        tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
        te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            
    return tr_accuracy, te_accuracy

In [7]:
Train_model = [[MLP, False, False],
              [SiameseMLP, True, False],
              [AuxMLP, False, True],
              [AuxsiameseMLP, True, True],
              [CNN, False, False],
              [SiameseCNN, True, False],
              [AuxCNN, False, True],
              [AuxsiameseCNN, True, True],
              [ResNet, False, False],
              [SiameseResNet, True, False],
              [AuxResNet, False, True],
              [AuxsiameseResNet, True, True]]

In [8]:
# train function

#### Tune learning rate and batch size for each model

In [12]:
accuracy = []
std = []
model_number = 0
for models in Train_model:
    gammas = [5e-3, 1e-3, 5e-4, 1e-4]
    batch_sizes = [8, 16, 32, 64, 128]
    test_accuracies = torch.empty((len(gammas), len(batch_sizes)))
    test_stds = torch.empty((len(gammas), len(batch_sizes)))

    for j in range(len(gammas)):
        for k in range(len(batch_sizes)):
            accurate = []
            for i in range(10):
                model = models[0]()
                model.to(device)
                train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
                train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=models[1], aux=models[2])
                tr_accuracy, te_accuracy = accu(model, train_loader, test_loader, siamese=models[1], aux=models[2])
                accurate.append(te_accuracy)
            test_accuracies[j,k] =  torch.FloatTensor(accurate).mean()
            test_stds[j,k] =  torch.FloatTensor(accurate).std()
    accuracy.append(test_accuracies)
    std.append(test_stds)
    max_index = test_accuracies.argmax() 
    model_number += 1
    print('The optimal parameters for model_%d: learning rate %.5f, batch size: %d' %(model_number, gammas[(max_index+1)//5], batch_sizes[(max_index+1)%5-1]))

The optimal parameters for model_1: learning rate 0.00100, batch size: 16
The optimal parameters for model_2: learning rate 0.00500, batch size: 8
The optimal parameters for model_3: learning rate 0.00500, batch size: 32
The optimal parameters for model_4: learning rate 0.00500, batch size: 8
The optimal parameters for model_5: learning rate 0.00050, batch size: 8
The optimal parameters for model_6: learning rate 0.00010, batch size: 128
The optimal parameters for model_7: learning rate 0.00100, batch size: 32
The optimal parameters for model_8: learning rate 0.00010, batch size: 128
The optimal parameters for model_9: learning rate 0.00100, batch size: 32
The optimal parameters for model_10: learning rate 0.00500, batch size: 8


RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:73] data. DefaultCPUAllocator: not enough memory: you tried to allocate 31360000 bytes. Buy new RAM!

#### Tune alpha for models with auxiliary loss

In [14]:
test_accuracies

tensor([[7.5550e-01, 8.2610e-01, 8.3650e-01, 8.1070e-01, 8.3090e-01],
        [1.0561e-38, 7.3470e-39, 1.0194e-38, 9.2755e-39, 1.0653e-38],
        [4.1327e-39, 8.9082e-39, 1.0102e-38, 7.3470e-39, 1.0194e-38],
        [9.2755e-39, 1.0653e-38, 4.1327e-39, 8.9082e-39, 1.0102e-38]])

In [7]:
# Final data

In [12]:
import torch
x = torch.empty(4, 4)

In [21]:
5%3

2

In [23]:
x.max()

tensor(2.)

In [8]:
accuracies8 = []
times8 = []
losses8 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()
    #model = SiameseBaseNet()
    model = MLP()
    model.to(device)
    losses8[i-10, :] = torch.tensor(train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False))
    time2 = time.perf_counter()
    times8.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies8.append(te_accuracy)

print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies8).mean(), torch.tensor(accuracies8).std()))

tensor(1.) tensor(0.8110)
tensor(1.) tensor(0.8030)
tensor(1.) tensor(0.8140)
tensor(1.) tensor(0.8200)
tensor(1.) tensor(0.8110)
tensor(1.) tensor(0.8140)
tensor(1.) tensor(0.8180)
tensor(1.) tensor(0.8200)
tensor(1.) tensor(0.8110)
tensor(1.) tensor(0.8110)
Mean: 0.813, Std: 0.005


In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times8).mean(), torch.tensor(times8).std()))

In [None]:
accuracies9 = []
times9 = []
losses9 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()
    
    model = SiameseMLP()
    model.to(device)

    losses9[i-10,:] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True))
    time2 = time.perf_counter()
    times9.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies9.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies9).mean(), torch.tensor(accuracies9).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times9).mean(), torch.tensor(times9).std()))

In [None]:
accuracies10 = []
times10 = []
losses10 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()

    model = AuxMLP()
    model.to(device)
    losses10[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.9))
    time2 = time.perf_counter()
    times10.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies10.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies10).mean(), torch.tensor(accuracies10).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times10).mean(), torch.tensor(times10).std()))

In [None]:
accuracies11 = []
times11 = []
losses11 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()

    model = AuxsiameseMLP()
    model.to(device)
    losses11[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = 0.7))
    time2 = time.perf_counter()
    times11.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies11.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies11).mean(), torch.tensor(accuracies11).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times11).mean(), torch.tensor(times11).std()))

In [None]:
accuracies = []
times = []
losses = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()

    model = BaseNet()
    model.to(device)
    losses[i-10, :] = torch.tensor(train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False))
    time2 = time.perf_counter()
    times.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies.append(te_accuracy)

print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies).mean(), torch.tensor(accuracies).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times).mean(), torch.tensor(times).std()))

In [None]:
accuracies1 = []
times1 = []
losses1 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()
    model = SiameseBaseNet()
    model.to(device)

    losses1[i-10, :] = torch.tensor(train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=True))
    time2 = time.perf_counter()
    times1.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies1.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies1).mean(), torch.tensor(accuracies1).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times1).mean(), torch.tensor(times1).std()))

In [None]:
accuracies2 = []
times2 = []
losses2 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()

    model = AuxBaseNet()
    model.to(device)
    losses2[i-10, :] = torch.tensor(train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False, aux=True, alpha = 1.0))
    time2 = time.perf_counter()
    times2.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies2.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies2).mean(), torch.tensor(accuracies2).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times2).mean(), torch.tensor(times2).std()))

In [None]:
accuracies3 = []
times3 = []
losses3 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()
    
    model = AuxsiameseBaseNet()
    model.to(device)
    losses3[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = 0.6))
    time2 = time.perf_counter()
    times3.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies3.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies3).mean(), torch.tensor(accuracies3).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times3).mean(), torch.tensor(times3).std()))

In [10]:
accuracies4 = []
times4 = []
losses4 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()

    model = ResNet(nb_residual_blocks = 4, input_channels = 2, nb_channels = 16, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses4[i-10, :] = torch.tensor(train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=False))
    time2 = time.perf_counter()
    times4.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies4.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies4).mean(), torch.tensor(accuracies4).std()))

tensor(1.) tensor(0.8200)
tensor(1.) tensor(0.8120)
tensor(1.) tensor(0.8090)
tensor(1.) tensor(0.8200)


KeyboardInterrupt: 

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times4).mean(), torch.tensor(times4).std()))

In [None]:
accuracies5 = []
times5 = []
losses5 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()
    
    model = SiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses5[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True))
    time2 = time.perf_counter()
    times5.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies5.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies5).mean(), torch.tensor(accuracies5).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times5).mean(), torch.tensor(times5).std()))

In [None]:
accuracies6 = []
times6 = []
losses6 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()
    
    model = AuxResNet(nb_residual_blocks = 10, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses6[i-10, :] = torch.tensor(train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.6))
    time2 = time.perf_counter()
    times6.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies6.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies6).mean(), torch.tensor(accuracies6).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times6).mean(), torch.tensor(times6).std()))

In [None]:
accuracies7 = []
times7 = []
losses7 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()
    #model = SiameseBaseNet()
    #model = BaseNet()
    model = AuxsiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses7[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = 0.6))
    time2 = time.perf_counter()
    times7.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies7.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies7).mean(), torch.tensor(accuracies7).std()))

In [None]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times7).mean(), torch.tensor(times7).std()))

In [None]:
# Optimize learning rate and batch size

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies1 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = MLP()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies1[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies1)

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies2 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = SiameseMLP()
            model.to(device)
            #model = BaseNet()
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=True)
            te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies2[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies2)

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies3 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = AuxMLP()
            model.to(device)
            #model = BaseNet()
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.0)
            te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies3[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies3)

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies5 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = BaseNet()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies5[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies5)

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies6 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = SiameseBaseNet()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=True)
            te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies6[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies6)

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies7 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = AuxBaseNet()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.0)
            te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies7[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies7)

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies9 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = ResNet(nb_residual_blocks = 4, input_channels = 2, nb_channels = 32, kernel_size = 3, nb_classes = 2)
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies9[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies9)

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies10 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = SiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            loss = train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=True)
            te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies10[j,k] =  torch.FloatTensor(accurate).mean()
print(test_accuracies10)

In [None]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies11 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = AuxResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            loss = train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.0)
            te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies11[j,k] =  torch.FloatTensor(accurate).mean()
print(test_accuracies11)

In [None]:
# Optimize alpha for auxiliary loss

In [None]:
for j in range(11):
    accuracies100 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
        
        model = AuxMLP()
        model.to(device)
        loss = train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies100.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies100).mean(), torch.tensor(accuracies100).std()))

In [None]:
for j in range(11):
    accuracies101 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)

        model = AuxsiameseMLP()
        model.to(device)
        loss = train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies101.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies101).mean(), torch.tensor(accuracies101).std()))

In [None]:
for j in range(11):
    accuracies102 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)

        model = AuxsiameseBaseNet()
        model.to(device)
        loss = train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies102.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies102).mean(), torch.tensor(accuracies102).std()))

In [None]:
for j in range(11):
    accuracies103 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)

        model = AuxBaseNet()
        model.to(device)
        train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies103.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies103).mean(), torch.tensor(accuracies103).std()))

In [None]:
for j in range(11):
    accuracies104 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)

        model = AuxsiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
        model.to(device)
        train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies104.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies104).mean(), torch.tensor(accuracies104).std()))

In [None]:
for j in range(11):
    accuracies105 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=64, seed=i)

        model = AuxResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
        model.to(device)
        train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies105.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies105).mean(), torch.tensor(accuracies105).std()))

In [10]:
>>> import torch
>>> from torch import nn
>>> layer = nn.Conv2d(2, 3, kernel_size=3, stride=1, padding=0)
>>> layer.weight

Parameter containing:
tensor([[[[-0.2021, -0.2152,  0.0717],
          [-0.0338,  0.1757,  0.1063],
          [-0.2043, -0.0632, -0.1443]],

         [[ 0.1711, -0.2197, -0.0794],
          [-0.0367, -0.1827,  0.1783],
          [-0.0457, -0.1880, -0.1476]]],


        [[[-0.1065,  0.2347,  0.0664],
          [ 0.0989,  0.2181, -0.2000],
          [-0.2090,  0.1658, -0.0477]],

         [[ 0.1767, -0.0043,  0.0595],
          [ 0.0525,  0.2171,  0.1257],
          [ 0.0687, -0.2002,  0.1470]]],


        [[[ 0.0411, -0.1230,  0.2138],
          [ 0.0970,  0.0338,  0.1252],
          [ 0.0936, -0.1127, -0.1848]],

         [[ 0.1284, -0.2110,  0.1211],
          [-0.1395,  0.1456, -0.2234],
          [-0.1436, -0.2100,  0.0385]]]], requires_grad=True)