In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import dlc_practical_prologue as prologue
import time

In [2]:
from MLP_models import *
from CNN_models import *
from Resnet_models import *
from helpers import *
from data_loader import *

  return torch._C._cuda_getDeviceCount() > 0


In [3]:
# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

#### Number of parameters of each model

In [4]:
model_1 = MLP()
model_2 = SiameseMLP()
model_3 = AuxMLP()
model_4 = AuxsiameseMLP()
model_5 = CNN()
model_6 = SiameseCNN()
model_7 = AuxCNN()
model_8 = AuxsiameseCNN()
model_9 = ResNet()
model_10 = SiameseResNet()
model_11 = AuxResNet()
model_12 = AuxsiameseResNet()

models = [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, model_10, model_11, model_12]
for i in range(len(models)):
    #print('The number of parameters in' count_param(i))
    print('The number of parameters in model_%d is %d' %
                  (i+1, count_param(models[i])))

The number of parameters in model_1 is 73314
The number of parameters in model_2 is 33172
The number of parameters in model_3 is 66302
The number of parameters in model_4 is 33172
The number of parameters in model_5 is 72536
The number of parameters in model_6 is 72268
The number of parameters in model_7 is 144494
The number of parameters in model_8 is 72268
The number of parameters in model_9 is 75746
The number of parameters in model_10 is 77812
The number of parameters in model_11 is 152692
The number of parameters in model_12 is 77812


#### Training function

In [5]:
def train(model, train_loader, eta, decay, n_epochs=25, verbose=False, siamese=False, aux=False, alpha = 0):
    '''
    model: learning model
    '''
    binary_crit = torch.nn.BCELoss()
    aux_crit = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=eta, weight_decay=decay)
    tr_losses = []
    tr_accuracies = []

    for e in range(n_epochs):
        # Reset training/validation loss
        tr_loss = 0

        # Training model
        model.train()

        for train_input, train_target, train_classes in iter(train_loader):
            
            # Forward pass
            if aux == True:
                train_1, train_2 = train_input.unbind(1)
                output, aux1, aux2 = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
                
            elif siamese == True:
                train_1, train_2 = train_input.unbind(1)
                output = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
                
            else:
                output = model(train_input)
                
            # Binary classification loss
            binary_loss = binary_crit(output, train_target.float())
            total_loss = binary_loss
            
            # Auxiliary loss
            if aux == True:

                aux_loss1 = aux_crit(aux1, train_classes[:,0])
                aux_loss2 = aux_crit(aux2, train_classes[:,1])
                aux_loss = aux_loss1 + aux_loss2
                
                # Total loss = Binary loss + aux loss * alpha
                total_loss = binary_loss + aux_loss * alpha
            
            tr_loss += float(total_loss)

            # Backward pass
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        # Collect loss data
        tr_losses.append(tr_loss)

        if verbose:
            print('Epoch %d/%d, Binary loss: %.3f' %
                  (e+1, n_epochs, tr_loss))
    return tr_losses

In [43]:
def accu(model, train_loader, test_loader, siamese = False, aux = False):
    
    if aux == True:
        tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    elif siamese == True:
        tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
        te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    else:
        tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
        te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            
    return tr_accuracy, te_accuracy

In [7]:
# train function

#### Tune learning rate and batch size for each model

In [8]:
Train_model = [[MLP, False, False],
              [SiameseMLP, True, False],
              [AuxMLP, False, True],
              [AuxsiameseMLP, True, True],
              [CNN, False, False],
              [SiameseCNN, True, False],
              [AuxCNN, False, True],
              [AuxsiameseCNN, True, True],
              [ResNet, False, False],
              [SiameseResNet, True, False],
              [AuxResNet, False, True],
              [AuxsiameseResNet, True, True]]

In [10]:
accuracy = []
std = []
model_number = 0
for models in Train_model:
    gammas = [5e-3, 1e-3, 5e-4, 1e-4]
    batch_sizes = [8, 16, 32, 64, 128]
    test_accuracies = torch.empty((len(gammas), len(batch_sizes)))
    test_stds = torch.empty((len(gammas), len(batch_sizes)))

    for j in range(len(gammas)):
        for k in range(len(batch_sizes)):
            accurate = []
            for i in range(10):
                model = models[0]()
                model.to(device)
                train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
                loss = train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=models[1], aux=models[2])
                tr_accuracy, te_accuracy = accu(model, train_loader, test_loader, siamese=models[1], aux=models[2])
                accurate.append(te_accuracy)
            test_accuracies[j,k] =  torch.FloatTensor(accurate).mean()
            test_stds[j,k] =  torch.FloatTensor(accurate).std()
    accuracy.append(test_accuracies)
    std.append(test_stds)
    max_index = test_accuracies.argmax() 
    model_number += 1
    print('The optimal parameters for model_%d: learning rate %.5f, batch size: %d' %(model_number, gammas[(max_index)//5], batch_sizes[(max_index+1)%5-1]))

The optimal parameters for model_1: learning rate 0.00100, batch size: 16
The optimal parameters for model_2: learning rate 0.00500, batch size: 8
The optimal parameters for model_3: learning rate 0.00500, batch size: 32
The optimal parameters for model_4: learning rate 0.00500, batch size: 8
The optimal parameters for model_5: learning rate 0.00050, batch size: 8
The optimal parameters for model_6: learning rate 0.00050, batch size: 128
The optimal parameters for model_7: learning rate 0.00100, batch size: 32
The optimal parameters for model_8: learning rate 0.00050, batch size: 128
The optimal parameters for model_9: learning rate 0.00100, batch size: 32
The optimal parameters for model_10: learning rate 0.00500, batch size: 8
The optimal parameters for model_11: learning rate 0.00100, batch size: 8
The optimal parameters for model_12: learning rate 0.00500, batch size: 8


In [13]:
accuracy

[tensor([[0.8028, 0.8028, 0.8011, 0.7980, 0.8034],
         [0.8052, 0.8087, 0.8048, 0.8018, 0.8023],
         [0.8053, 0.8040, 0.8033, 0.8020, 0.7962],
         [0.7955, 0.7924, 0.7864, 0.7810, 0.7700]]),
 tensor([[0.8450, 0.8439, 0.8443, 0.8428, 0.8395],
         [0.8398, 0.8373, 0.8358, 0.8313, 0.8309],
         [0.8371, 0.8346, 0.8335, 0.8298, 0.8136],
         [0.8200, 0.8132, 0.8003, 0.7841, 0.7601]]),
 tensor([[0.8176, 0.8136, 0.8179, 0.8141, 0.8142],
         [0.8148, 0.8105, 0.8099, 0.8053, 0.8041],
         [0.8100, 0.8112, 0.8076, 0.8018, 0.7954],
         [0.7969, 0.7868, 0.7751, 0.7666, 0.7541]]),
 tensor([[0.8450, 0.8439, 0.8443, 0.8428, 0.8395],
         [0.8398, 0.8373, 0.8358, 0.8313, 0.8309],
         [0.8371, 0.8346, 0.8335, 0.8298, 0.8136],
         [0.8200, 0.8132, 0.8003, 0.7841, 0.7601]]),
 tensor([[0.7928, 0.8029, 0.7966, 0.7924, 0.8143],
         [0.8269, 0.8314, 0.8297, 0.8336, 0.8243],
         [0.8347, 0.8330, 0.8296, 0.8322, 0.8221],
         [0.8282, 0.821

In [15]:
std

[tensor([[0.0121, 0.0104, 0.0113, 0.0159, 0.0098],
         [0.0086, 0.0117, 0.0091, 0.0130, 0.0117],
         [0.0097, 0.0106, 0.0093, 0.0128, 0.0129],
         [0.0138, 0.0105, 0.0157, 0.0163, 0.0138]]),
 tensor([[0.0163, 0.0200, 0.0185, 0.0178, 0.0206],
         [0.0207, 0.0215, 0.0201, 0.0184, 0.0198],
         [0.0178, 0.0189, 0.0205, 0.0200, 0.0165],
         [0.0172, 0.0110, 0.0105, 0.0138, 0.0169]]),
 tensor([[0.0125, 0.0179, 0.0176, 0.0108, 0.0142],
         [0.0110, 0.0116, 0.0103, 0.0118, 0.0115],
         [0.0114, 0.0110, 0.0117, 0.0137, 0.0145],
         [0.0128, 0.0151, 0.0129, 0.0130, 0.0151]]),
 tensor([[0.0163, 0.0200, 0.0185, 0.0178, 0.0206],
         [0.0207, 0.0215, 0.0201, 0.0184, 0.0198],
         [0.0178, 0.0189, 0.0205, 0.0200, 0.0165],
         [0.0172, 0.0110, 0.0105, 0.0138, 0.0169]]),
 tensor([[0.0248, 0.0207, 0.0219, 0.0881, 0.0211],
         [0.0117, 0.0112, 0.0124, 0.0110, 0.0144],
         [0.0106, 0.0174, 0.0177, 0.0104, 0.0127],
         [0.0117, 0.009

#### Tune alpha for models with auxiliary loss

In [36]:
Train_auxmodel = [[AuxMLP, False, True, 5e-3, 16],
              [AuxsiameseMLP, True, True, 5e-3, 8],
              [AuxCNN, False, True, 5e-4, 8],
              [AuxsiameseCNN, True, True, 5e-3, 32],
              [AuxResNet, False, True, 1e-3, 8],
              [AuxsiameseResNet, True, True, 5e-3, 8]]

In [32]:
accuracy_aux = []
std_aux = []
model_number_aux = 0
for models in Train_auxmodel:
    test_accuracies_aux = torch.empty((1, 11))
    test_stds_aux = torch.empty((1,11))    
    for j in range(11):
        accurate_aux = []
        for i in range(10):
            model = models[0]()
            model.to(device)
            
            train_loader, test_loader = load_data(N=1000, batch_size = models[4], seed=i)
            loss = train(model, train_loader, models[3], 0, 25, verbose=False, siamese=models[1], aux=models[2], alpha = j/10)
            tr_accuracy, te_accuracy = accu(model, train_loader, test_loader, siamese=models[1], aux=models[2])
            accurate_aux.append(te_accuracy)
            
        test_accuracies_aux[0,j] =  torch.FloatTensor(accurate_aux).mean()
        test_stds_aux[0,j] =  torch.FloatTensor(accurate_aux).std()
    accuracy_aux.append(test_accuracies_aux)
    std_aux.append(test_stds_aux)
    max_index = test_accuracies_aux.argmax() 
    model_number_aux += 1
    print('The optimal alpha for aux_model_%d is %.2f' %(model_number_aux, max_index/10))

The optimal alpha for aux_model_1 is 1.00


KeyboardInterrupt: 

#### Final results

In [38]:
Train_final_model = [[MLP, False, False, 5e-4, 8, 0],
              [SiameseMLP, True, False, 5e-3, 8, 0],
              [AuxMLP, False, True, 5e-3, 16, 0.9],
              [AuxsiameseMLP, True, True, 5e-3, 8, 0.7],
              [CNN, False, False, 5e-4, 16, 0],
              [SiameseCNN, True, False, 1e-3, 16, 0],
              [AuxCNN, False, True, 5e-4, 8, 1.0],
              [AuxsiameseCNN, True, True, 5e-3, 32, 0.6],
              [ResNet, False, False, 1e-3, 32, 0],
              [SiameseResNet, True, False, 5e-3, 32, 0],
              [AuxResNet, False, True, 5e-3, 32, 0.6],
              [AuxsiameseResNet, True, True, 5e-3, 32, 0.6]]

In [42]:
loss_total = []
index = 0
for models in Train_final_model:
    times = []
    accuracies = []
    losses = torch.empty((10,25))

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=models[4], seed=i)
        time1 = time.perf_counter()

        model = models[0]()
        model.to(device)
        losses[i,:] = torch.tensor(train(model, train_loader, models[3], 0, 25, verbose=False, siamese=models[1], aux=models[2], alpha = models[5]))
        time2 = time.perf_counter()
        times.append(time2 - time1)

        tr_accuracy, te_accuracy = accu(model, train_loader, test_loader, siamese=models[1], aux=models[2])

        accuracies.append(te_accuracy)
    loss_total.append(losses)
    index += 1
    
    print('For optimal model_%d, Mean accuracy: %.3f, Std: %.3f, Mean time: %.3f, Std: %.3f' %(index, torch.tensor(accuracies).mean(), torch.tensor(accuracies).std(), torch.tensor(times).mean(), torch.tensor(times).std()))

For optimal model_1, Mean accuracy: 0.810, Std: 0.010, Mean time: 10.164, Std: 0.613
For optimal model_2, Mean accuracy: 0.814, Std: 0.101, Mean time: 6.424, Std: 0.637
For optimal model_3, Mean accuracy: 0.849, Std: 0.017, Mean time: 8.898, Std: 0.570
For optimal model_4, Mean accuracy: 0.876, Std: 0.021, Mean time: 6.860, Std: 0.685
For optimal model_5, Mean accuracy: 0.825, Std: 0.021, Mean time: 11.122, Std: 0.800
For optimal model_6, Mean accuracy: 0.823, Std: 0.098, Mean time: 16.943, Std: 1.064
For optimal model_7, Mean accuracy: 0.871, Std: 0.009, Mean time: 38.943, Std: 1.508
For optimal model_8, Mean accuracy: 0.903, Std: 0.017, Mean time: 9.981, Std: 0.968
For optimal model_9, Mean accuracy: 0.835, Std: 0.011, Mean time: 26.725, Std: 1.541
For optimal model_10, Mean accuracy: 0.807, Std: 0.137, Mean time: 47.060, Std: 2.568
For optimal model_11, Mean accuracy: 0.878, Std: 0.025, Mean time: 52.441, Std: 3.036
For optimal model_12, Mean accuracy: 0.905, Std: 0.016, Mean time: 

In [44]:
loss_total = []
index = 0
for models in Train_final_model:
    times = []
    accuracies = []
    losses = torch.empty((10,25))

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=models[4], seed=i)
        time1 = time.perf_counter()

        model = models[0]()
        model.to(device)
        losses[i,:] = torch.tensor(train(model, train_loader, models[3], 0, 25, verbose=False, siamese=models[1], aux=models[2], alpha = models[5]))
        time2 = time.perf_counter()
        times.append(time2 - time1)

        tr_accuracy, te_accuracy = accu(model, train_loader, test_loader, siamese=models[1], aux=models[2])

        accuracies.append(te_accuracy)
    loss_total.append(losses)
    index += 1
    
    print('For optimal model_%d, Mean accuracy: %.3f, Std: %.3f, Mean time: %.3f, Std: %.3f' %(index, torch.tensor(accuracies).mean(), torch.tensor(accuracies).std(), torch.tensor(times).mean(), torch.tensor(times).std()))

For optimal model_1, Mean accuracy: 0.808, Std: 0.010, Mean time: 10.724, Std: 1.155
For optimal model_2, Mean accuracy: 0.814, Std: 0.101, Mean time: 6.483, Std: 0.751
For optimal model_3, Mean accuracy: 0.849, Std: 0.017, Mean time: 9.822, Std: 1.106
For optimal model_4, Mean accuracy: 0.876, Std: 0.021, Mean time: 6.507, Std: 0.149


KeyboardInterrupt: 