### Notebook to show grid search computation with the obtained values

In [None]:
import torch
from torch import nn
from torch.nn import functional as F
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt #just for plotting
import numpy as np #just for plotting
import dlc_practical_prologue as prologue
import warnings
warnings.filterwarnings('ignore')

In [None]:
N = 1000
#mini_batch_size = 100
rounds = 15

In [None]:
def normalize_data(train_data_input, test_data_input):
    """
    Scale data input to have zero mean and unit variance based on the train_data_input tensor.
    """
    mu, std = train_data_input.mean(), train_data_input.std()
    train_data_input.sub(mu).div(std)
    test_data_input.sub(mu).div(std)
    return train_data_input, test_data_input

In [None]:
def avg_acc_std(test_errors_list):
    """
    Get standard devation of the test errors during all the rounds and the best test error rate for one model.
    """
    std = torch.FloatTensor(test_errors_list).std().item()
    avg_err = torch.FloatTensor(test_errors_list).min().item()
    return std, avg_err

def perf_summary(test_errors_list, rounds=15):
    """
    Get mean and standard devation of the test errors during all the rounds for one model.
    """
    testErrorMean = torch.FloatTensor(test_errors_list).mean().item()
    testErrorStd = torch.FloatTensor(test_errors_list).std().item()
    print("Estimates of {} rounds:".format(rounds))
    print("Test error Average: {:3f};  Test error standard deviations: {:3f}".format(testErrorMean, testErrorStd))

def plot_err_evolution(test_errors_1, test_errors_2, test_errors_3):
    """
    Plot the evolution of the test error during the different rounds for each model
    """
    plt.figure(figsize=(10, 5))
    ax2 = plt.subplot(111)
    ax2.plot(range(1, len(test_errors_1)+1), test_errors_1, label='Baseline', marker='^')
    ax2.plot(range(1, len(test_errors_2)+1), test_errors_2, label='Siamese', marker='^')
    ax2.plot(range(1, len(test_errors_3)+1), test_errors_3, label='Siamese with aux. losses', marker='^')
    ax2.set_ylabel('Accuracy')
    ax2.set_xlabel('Round')
    ax2.set_title('Test error rate in function of rounds')
    ax2.legend()
    plt.savefig('Test__error_rate_over_rounds.png')
    
def plot_acc_std(avg_accs, stds):
    """
    Bar plot of the average test error rate with the corresponding standard deviation over all rounds for each model.
    """
    
    models = ['Baseline', 'Siamese', 'Siamese wit aux. losses']
    x_pos = np.arange(len(models))  #illegal use of numpy
    
    # Build the plot
    fig, ax = plt.subplots()
    ax.bar(x_pos, avg_accs, yerr=stds, align='center', alpha=0.5, ecolor='black', capsize=5, width=0.3, color=['pink', 'blue', 'cyan'])
    ax.set_ylabel('Average test error rate')
    ax.set_xticks(x_pos)
    ax.set_xticklabels(models)
    ax.set_title('Average test error rate and standard deviation for each model')
    ax.yaxis.grid(True)

    # Save the figure and show
    plt.tight_layout()
    plt.savefig('bar_plot_with_error_bars.png')
    plt.show()

In [None]:
class Base_net(nn.Module):
    """
    Baseline ConvNet similar to LeNet-5 architecture.
    """
    def __init__(self):
        super(Base_net, self).__init__()
        
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.bn1=nn.BatchNorm2d(32)
        self.bn2=nn.BatchNorm2d(64)
        self.bn3=nn.BatchNorm1d(128)
        self.bn4=nn.BatchNorm1d(90)
        
        self.fc1 = nn.Linear(64 * 2 * 2, 128)
        self.fc2 = nn.Linear(128, 90)
        self.fc3 = nn.Linear(90, 2)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.bn1(self.conv1(x))), kernel_size=2, stride=2)
        x = F.max_pool2d(F.relu(self.bn2(self.conv2(x))),  kernel_size=2, stride=2)
        
        x = F.relu(self.bn3(self.fc1(x.view(x.size(0), -1))))
        x = F.relu(self.bn4(self.fc2(x)))
        x = self.fc3(x)
        return x

In [None]:
class Siamese_net_ws(nn.Module):
    """
    Siamese ConvNet with weight sharing implementation, each of the two branch has the same architecture as the baseline
    ConvNet.
    """
    def __init__(self):
        super(Siamese_net_ws, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.bn1=nn.BatchNorm2d(32)
        self.bn2=nn.BatchNorm2d(64)
        self.bn3=nn.BatchNorm1d(128)
        self.bn4=nn.BatchNorm1d(90)
        self.bn5=nn.BatchNorm1d(10)
        self.bn6=nn.BatchNorm1d(90)
        
        self.fc1 = nn.Linear(64 * 2 * 2, 128)
        self.fc2 = nn.Linear(128, 90)
        self.fc3 = nn.Linear(90,10)
        self.fc4 = nn.Linear(20, 90)
        self.fc5 = nn.Linear(90, 2)
        
    def forward(self, x):
        x1 = F.max_pool2d(F.relu(self.bn1(self.conv1(x[:, 0].view(-1, 1, 14, 14)))), kernel_size=2, stride=2)
        x2 = F.max_pool2d(F.relu(self.bn1(self.conv1(x[:, 1].view(-1, 1, 14, 14)))), kernel_size=2, stride=2)

        x1 = F.max_pool2d(F.relu(self.bn2(self.conv2(x1))),  kernel_size=2, stride=2)
        x2 = F.max_pool2d(F.relu(self.bn2(self.conv2(x2))),  kernel_size=2, stride=2)
        
        x1 = F.relu(self.bn3(self.fc1(x1.view(x1.size(0), -1))))
        x2 = F.relu(self.bn3(self.fc1(x2.view(x2.size(0), -1))))
        
        x1 = F.relu(self.bn4(self.fc2(x1)))
        x2 = F.relu(self.bn4(self.fc2(x2)))
        
        x1 = F.relu(self.bn5(self.fc3(x1)))
        x2 = F.relu(self.bn5(self.fc3(x2)))
        
        x = F.relu(self.bn6(self.fc4(torch.cat((x1, x2), dim=1))))
        x = self.fc5(x)
        return x

In [None]:
class Siamese_net_ws_aux(nn.Module):
    """
    Siamese ConvNet with weight sharing implementation and auxiliary losses.
    """
    def __init__(self):
        super(Siamese_net_ws_aux, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        
        self.bn1=nn.BatchNorm2d(32)
        self.bn2=nn.BatchNorm2d(64)
        self.bn3=nn.BatchNorm1d(128)
        self.bn4=nn.BatchNorm1d(90)
        self.bn5=nn.BatchNorm1d(10)
        self.bn6=nn.BatchNorm1d(90)
        
        #self.dropout = nn.Dropout(0.25) dropout was also exp
        
        self.fc1 = nn.Linear(64 * 2 * 2, 128)
        self.fc2 = nn.Linear(128, 90)
        self.fc3 = nn.Linear(90,10)
        self.fc4 = nn.Linear(20, 90)
        self.fc5 = nn.Linear(90, 2)
        
    def forward(self, x):
        x1 = F.max_pool2d(F.relu(self.bn1(self.conv1(x[:, 0].view(-1, 1, 14, 14)))), kernel_size=2, stride=2)
        x2 = F.max_pool2d(F.relu(self.bn1(self.conv1(x[:, 1].view(-1, 1, 14, 14)))), kernel_size=2, stride=2)

        x1 = F.max_pool2d(F.relu(self.bn2(self.conv2(x1))),  kernel_size=2, stride=2)
        x2 = F.max_pool2d(F.relu(self.bn2(self.conv2(x2))),  kernel_size=2, stride=2)
        
        
        x1 = F.relu(self.bn3(self.fc1(x1.view(x1.size(0), -1))))
        x2 = F.relu(self.bn3(self.fc1(x2.view(x2.size(0), -1))))
        
        x1 = F.relu(self.bn4(self.fc2(x1)))
        x2 = F.relu(self.bn4(self.fc2(x2)))
        
        x1_aux = self.fc3(x1)
        x2_aux = self.fc3(x2)
        
        x1 = F.relu(self.bn5(x1_aux))
        x2 = F.relu(self.bn5(x2_aux))
        x = F.relu(self.bn6(self.fc4(torch.cat((x1, x2), dim=1))))
        x = self.fc5(x)
        
        return x, x1_aux, x2_aux

In [None]:
def train_model(model, train_input, train_target, mini_batch_size, nb_epochs, learning_rate, verbose=False):
    """
    Train ConvNet model without auxiliry losses.
    """
    model,criterion = model,nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    #move model and criterion to gpu if CUDA available
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
    
    losses = []
    for e in range(nb_epochs):
        
        for b in range(0, train_input.size(0), mini_batch_size):
            
            # forward pass: compute prediction
            output = model(train_input.narrow(0, b, mini_batch_size))
            
            #loss
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
                
            # backward pass
            model.zero_grad()
            loss.backward()
            
            #update weights
            optimizer.step()
            
            losses.append(loss.data.item())
            if torch.cuda.is_available():
                torch.cuda.empty_cache()         
            
        if verbose:
            print("Epoch: {} \t -> Loss: {} ".format(e, losses))
    return losses

def train_model_auxiliaryloss(model, train_input, train_class, train_target, mini_batch_size, nb_epochs, learning_rate, verbose=False):
    """
    Train ConvNet model with auxiliry losses.
    """
    model,criterion = model,nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    #move model and criterion to gpu if CUDA available
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
        
    losses_aux = []
    for e in range(nb_epochs):
        
        for b in range(0, train_input.size(0), mini_batch_size):
            
            # Forward pass: compute prediction
            output_primary, output_aux1, output_aux2 = model(train_input.narrow(0, b, mini_batch_size))
            
            # Main loss + 2 auxiliary losses
            loss =  criterion(output_primary, train_target.narrow(0, b, mini_batch_size)) + criterion(output_aux1, train_class[:, 0].narrow(0, b, mini_batch_size)) + criterion(output_aux2, train_class[:, 1].narrow(0, b, mini_batch_size))
            
            # backward pass
            model.zero_grad()
            loss.backward()
            
            #update weights
            optimizer.step()
            
            losses_aux.append(loss.data.item())
            if torch.cuda.is_available():
                torch.cuda.empty_cache()         
            
        if verbose:
            print("Epoch: {} \t -> Loss: {} ".format(e, losses))
    return losses_aux

In [None]:
def compute_errors_aux(model, data_input, data_target, mini_batch_size):
    """
    Compute number of target errors of model with auxiliary losses.
    """
    nb_target_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        # Prediction
        output_primary, output_aux1, output_aux2 = model(data_input.narrow(0, b, mini_batch_size))
        highest_numbers_indices_main = output_primary.max(1)[1]
        highest_numbers_indices_aux1 = output_aux1.max(1)[1]
        highest_numbers_indices_aux2 = output_aux2.max(1)[1]
        for i in range(highest_numbers_indices_main.size(0)):
            if highest_numbers_indices_main[i] != data_target[b + i]:
                nb_target_errors += 1
    return nb_target_errors

def compute_errors(model, data_input, data_target, mini_batch_size):
    """
    Compute number of target errors of model with no auxiliary losses.
    """
    nb_target_errors = 0
    
    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        highest_numbers_indices_main = output.max(1)[1]
        for i in range(highest_numbers_indices_main.size(0)):
            if highest_numbers_indices_main[i] != data_target[b + i]:
                nb_target_errors += 1
    return nb_target_errors

In [None]:
def BasePipeline(model, mini_batch_size, rounds, N, learning_rate, nb_epochs, train_input, train_target, train_classes, test_input, test_target, test_classes):
    """
    Full pipeline with baseline model.
    """

    loss_per_round = []
    test_errors_list = []
    
    for k in range(rounds):
        
        print('Starting Round', k+1)
        
        #if cuda available move to gpu
        if torch.cuda.is_available():
            train_input, train_target, train_classes = train_input.cuda(), train_target.cuda(), train_classes.cuda()
            test_input, test_target, test_classes = test_input.cuda(), test_target.cuda(), test_classes.cuda()
        
        # Model training
        losses = train_model(model, train_input, train_target, mini_batch_size, nb_epochs, learning_rate)
        loss_per_round.append(losses)

        # Predict and compute error
        nb_test_errors = compute_errors(model, test_input, test_target, mini_batch_size)
        test_errors_list.append(nb_test_errors/test_input.size(0))

        print('Target error rate: {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                          nb_test_errors, test_input.size(0)))
        print("--------------------------------------------------\n")
    return model, loss_per_round, test_errors_list

In [None]:
def Ws_Pipeline(model, mini_batch_size, rounds, N, learning_rate, nb_epochs, train_input, train_target, train_classes, test_input, test_target, test_classes):
    """
    Full pipeline with weight sharing and now auxiliary losses, equal to the base pipeline function but wanted
    to separate the two pipeline names for additional clarity.
    """
    loss_per_round = []
    test_errors_list = []
    
    for k in range(rounds):
        
        print('Starting Round', k+1)
        
        if torch.cuda.is_available():
            train_input, train_target, train_classes = train_input.cuda(), train_target.cuda(), train_classes.cuda()
            test_input, test_target, test_classes = test_input.cuda(), test_target.cuda(), test_classes.cuda()
        
        # Model training
        losses = train_model(model, train_input, train_target, mini_batch_size, nb_epochs, learning_rate)
        loss_per_round.append(losses)

        # Predict and compute error
        nb_test_errors = compute_errors(model, test_input, test_target)
        test_errors_list.append(nb_test_errors/test_input.size(0))

        # Logging
        print('Target error rate: {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                          nb_test_errors, test_input.size(0)))
        print("--------------------------------------------------\n")
    return model, loss_per_round, test_errors_list

In [None]:
def Ws_aux_Pipeline(model, mini_batch_size, rounds, N, learning_rate, nb_epochs, train_input, train_target, train_classes, test_input, test_target, test_classes):
    """
    Full pipeline with weight sharing and auxiliary losses.
    """
    
    loss_per_round = []
    test_errors_list = []
    
    for k in range(rounds):
        
        print('Starting Round', k+1)
        
        if torch.cuda.is_available():
            train_input, train_target, train_classes = train_input.cuda(), train_target.cuda(), train_classes.cuda()
            test_input, test_target, test_classes = test_input.cuda(), test_target.cuda(), test_classes.cuda()
        
        # Model training
        losses = train_model_auxiliaryloss(model, train_input, train_classes, train_target, mini_batch_size, nb_epochs, learning_rate)
        loss_per_round.append(losses)

        # Predict and compute error
        nb_test_errors = compute_errors_aux(model, test_input, test_target, mini_batch_size)
        test_errors_list.append(nb_test_errors/test_input.size(0))

        # Logging
        print('Target error rate: {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                          nb_test_errors, test_input.size(0)))
        print("--------------------------------------------------\n")
    return model, loss_per_round, test_errors_list

In [None]:
#controlling sources of randomness
torch.manual_seed(0)

#setting number of rounds and total number of samples

train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

#check dimensions
print('The shape of inputA and B is : {}'.format(train_input.shape))
print('The shape of targetA and B is : {}'.format(train_target.shape))
print('The shape of classes A and B is : {}'.format(train_classes.shape))

#scale data
train_input, test_input = normalize_data(train_input, test_input)

#computation but in this notebook we just wanted to show the grid search computation
#print("--------------------------------------------------")
#print("ConvNet model with no weight sharing nor auxiliary losses")
#model_nows_noaux, loss_per_round_nows_noaux, test_errors_nows_noaux = BasePipeline(Base_net(), mini_batch_size, rounds, N, train_input, train_target, train_classes, test_input, test_target, test_classes)
#perf_summary(test_errors_nows_noaux)
#print("The total number of trainable parameters of this Model:", sum(p.numel() for p in model_nows_noaux.parameters() if p.requires_grad),'\n')

#print("--------------------------------------------------")
#print("Siamese ConvNet model with weight sharing and no auxiliary losses")
#model_ws_noaux, loss_per_round_ws_noaux, test_errors_ws_noaux = Ws_Pipeline(Siamese_net_ws(), mini_batch_size, rounds, N, train_input, train_target, train_classes, test_input, test_target, test_classes)
#perf_summary(test_errors_ws_noaux)
#print("The total number of trainable parameters of this Model:", sum(p.numel() for p in model_ws_noaux.parameters() if p.requires_grad),'\n')

#print("--------------------------------------------------")
#print("Siamese ConvNet model with weight sharing and auxiliary losses")
#model_ws_aux, loss_per_round_ws_aux, test_errors_ws_aux = Ws_aux_Pipeline(Siamese_net_ws_aux(), mini_batch_size, rounds, N, train_input, train_target, train_classes, test_input, test_target, test_classes)
#perf_summary(test_errors_ws_aux)
#print("The total number of trainable parameters of this Model:", sum(p.numel() for p in model_ws_aux.parameters() if p.requires_grad),'\n')

The three search grid had to be performed every time by restarting the kernel but they were all executed with the code above and these were the results

In [15]:
param_grid = [(int(nb_epochs), int(mini_batch_size), lr)
                  for nb_epochs in (25, 50, 100, 150)
                  for mini_batch_size in (25, 50, 100)  
                  for lr in (0.001, 0.005, 0.01, 0.1)]

#store mean and std of each trained model
base_model_scores = {}
base_model_stds = {}

#Test each hyper-parameter combination
for param_combo in param_grid:
    print("Validating parameter combination:", param_combo)

    base_model_scores[param_combo] = []

    nb_epochs, mini_batch_size, lr = param_combo


    model_nows_noaux, loss_per_round_nows_noaux, test_errors_nows_noaux = BasePipeline(Base_net(), mini_batch_size, 10, N, lr, nb_epochs, train_input, train_target, train_classes, test_input, test_target, test_classes)                                                 

        
    pair_model_scores[param_combo] = torch.FloatTensor(test_errors_nows_noaux).mean().item()
    pair_model_stds[param_combo] = torch.FloatTensor(test_errors_nows_noaux).std().item()

    grid_val_results = (pair_model_scores, pair_model_stds)

Validating parameter combination: (25, 25, 0.001)
Starting Round 1
Target error rate: 19.20% 192/1000
--------------------------------------------------

Starting Round 2
Target error rate: 17.80% 178/1000
--------------------------------------------------

Starting Round 3
Target error rate: 17.90% 179/1000
--------------------------------------------------

Starting Round 4
Target error rate: 18.20% 182/1000
--------------------------------------------------

Starting Round 5
Target error rate: 17.90% 179/1000
--------------------------------------------------

Starting Round 6
Target error rate: 17.80% 178/1000
--------------------------------------------------

Starting Round 7
Target error rate: 16.90% 169/1000
--------------------------------------------------

Starting Round 8
Target error rate: 16.00% 160/1000
--------------------------------------------------

Starting Round 9
Target error rate: 18.60% 186/1000
--------------------------------------------------

Starting Round

Target error rate: 18.10% 181/1000
--------------------------------------------------

Starting Round 7
Target error rate: 17.80% 178/1000
--------------------------------------------------

Starting Round 8
Target error rate: 18.30% 183/1000
--------------------------------------------------

Starting Round 9
Target error rate: 17.70% 177/1000
--------------------------------------------------

Starting Round 10
Target error rate: 17.40% 174/1000
--------------------------------------------------

f
Validating parameter combination: (25, 100, 0.001)
Starting Round 1
Target error rate: 20.00% 200/1000
--------------------------------------------------

Starting Round 2
Target error rate: 17.80% 178/1000
--------------------------------------------------

Starting Round 3
Target error rate: 17.80% 178/1000
--------------------------------------------------

Starting Round 4
Target error rate: 18.40% 184/1000
--------------------------------------------------

Starting Round 5
Target err

Target error rate: 17.80% 178/1000
--------------------------------------------------

Starting Round 2
Target error rate: 17.80% 178/1000
--------------------------------------------------

Starting Round 3
Target error rate: 17.50% 175/1000
--------------------------------------------------

Starting Round 4
Target error rate: 16.40% 164/1000
--------------------------------------------------

Starting Round 5
Target error rate: 15.90% 159/1000
--------------------------------------------------

Starting Round 6
Target error rate: 16.70% 167/1000
--------------------------------------------------

Starting Round 7
Target error rate: 16.90% 169/1000
--------------------------------------------------

Starting Round 8
Target error rate: 17.10% 171/1000
--------------------------------------------------

Starting Round 9
Target error rate: 16.90% 169/1000
--------------------------------------------------

Starting Round 10
Target error rate: 17.40% 174/1000
----------------------------

Target error rate: 16.80% 168/1000
--------------------------------------------------

Starting Round 8
Target error rate: 18.30% 183/1000
--------------------------------------------------

Starting Round 9
Target error rate: 17.10% 171/1000
--------------------------------------------------

Starting Round 10
Target error rate: 17.90% 179/1000
--------------------------------------------------

f
Validating parameter combination: (50, 100, 0.1)
Starting Round 1
Target error rate: 18.90% 189/1000
--------------------------------------------------

Starting Round 2
Target error rate: 19.70% 197/1000
--------------------------------------------------

Starting Round 3
Target error rate: 16.50% 165/1000
--------------------------------------------------

Starting Round 4
Target error rate: 18.10% 181/1000
--------------------------------------------------

Starting Round 5
Target error rate: 17.10% 171/1000
--------------------------------------------------

Starting Round 6
Target error

Target error rate: 18.90% 189/1000
--------------------------------------------------

Starting Round 3
Target error rate: 15.80% 158/1000
--------------------------------------------------

Starting Round 4
Target error rate: 17.30% 173/1000
--------------------------------------------------

Starting Round 5
Target error rate: 18.20% 182/1000
--------------------------------------------------

Starting Round 6
Target error rate: 18.20% 182/1000
--------------------------------------------------

Starting Round 7
Target error rate: 17.20% 172/1000
--------------------------------------------------

Starting Round 8
Target error rate: 17.50% 175/1000
--------------------------------------------------

Starting Round 9
Target error rate: 17.40% 174/1000
--------------------------------------------------

Starting Round 10
Target error rate: 17.60% 176/1000
--------------------------------------------------

f
Validating parameter combination: (100, 50, 0.1)
Starting Round 1
Target error

Target error rate: 18.30% 183/1000
--------------------------------------------------

Starting Round 9
Target error rate: 18.40% 184/1000
--------------------------------------------------

Starting Round 10
Target error rate: 19.10% 191/1000
--------------------------------------------------

f
Validating parameter combination: (150, 25, 0.01)
Starting Round 1
Target error rate: 18.90% 189/1000
--------------------------------------------------

Starting Round 2
Target error rate: 18.90% 189/1000
--------------------------------------------------

Starting Round 3
Target error rate: 20.90% 209/1000
--------------------------------------------------

Starting Round 4
Target error rate: 18.40% 184/1000
--------------------------------------------------

Starting Round 5
Target error rate: 19.30% 193/1000
--------------------------------------------------

Starting Round 6
Target error rate: 16.90% 169/1000
--------------------------------------------------

Starting Round 7
Target erro

Target error rate: 16.80% 168/1000
--------------------------------------------------

Starting Round 4
Target error rate: 15.10% 151/1000
--------------------------------------------------

Starting Round 5
Target error rate: 16.40% 164/1000
--------------------------------------------------

Starting Round 6
Target error rate: 17.60% 176/1000
--------------------------------------------------

Starting Round 7
Target error rate: 16.40% 164/1000
--------------------------------------------------

Starting Round 8
Target error rate: 17.10% 171/1000
--------------------------------------------------

Starting Round 9
Target error rate: 17.70% 177/1000
--------------------------------------------------

Starting Round 10
Target error rate: 17.70% 177/1000
--------------------------------------------------

f
Validating parameter combination: (150, 100, 0.01)
Starting Round 1
Target error rate: 17.50% 175/1000
--------------------------------------------------

Starting Round 2
Target err

In [16]:
tuning_val_results

({(25, 25, 0.001): 0.17720000445842743,
  (25, 25, 0.005): 0.17570000886917114,
  (25, 25, 0.01): 0.17069999873638153,
  (25, 25, 0.1): 0.19609999656677246,
  (25, 50, 0.001): 0.18559998273849487,
  (25, 50, 0.005): 0.1809999942779541,
  (25, 50, 0.01): 0.18160000443458557,
  (25, 50, 0.1): 0.17389999330043793,
  (25, 100, 0.001): 0.1833999902009964,
  (25, 100, 0.005): 0.17390000820159912,
  (25, 100, 0.01): 0.17480000853538513,
  (25, 100, 0.1): 0.1761999875307083,
  (50, 25, 0.001): 0.18050000071525574,
  (50, 25, 0.005): 0.17820000648498535,
  (50, 25, 0.01): 0.19249999523162842,
  (50, 25, 0.1): 0.170400008559227,
  (50, 50, 0.001): 0.195700004696846,
  (50, 50, 0.005): 0.17960000038146973,
  (50, 50, 0.01): 0.17309997975826263,
  (50, 50, 0.1): 0.16520000994205475,
  (50, 100, 0.001): 0.18999998271465302,
  (50, 100, 0.005): 0.1818000078201294,
  (50, 100, 0.01): 0.17410001158714294,
  (50, 100, 0.1): 0.1779000163078308,
  (100, 25, 0.001): 0.19290000200271606,
  (100, 25, 0.005)

### The computation took a long time for this dumb grid search therefore for the others two the search parameters were reduced trying to fine tune the hyper-parameter search space. Best hyper-parameters are nb_epochs=50, mini_batch_size=50, lr=0.1 with avg test error_rate of 0.165 and std 0.0076

In [15]:
param_grid = [(int(nb_epochs), int(mini_batch_size), lr)
                  for nb_epochs in (25, 50, 100)
                  for mini_batch_size in (50, 100)  
                  for lr in (0.001, 0.005, 0.01, 0.1)]

#store mean and std of each trained model
siam_model_scores = {}
siam_model_stds = {}

#Test each hyper-parameter combination
for param_combo in param_grid:
    print("Validating parameter combination:", param_combo)

    siam_model_scores[param_combo] = []

    nb_epochs, mini_batch_size, lr = param_combo

    model_ws_noaux, loss_per_round_ws_noaux, test_errors_ws_noaux = Ws_Pipeline(Siamese_net_ws(), mini_batch_size, 10, N, lr, nb_epochs, train_input, train_target, train_classes, test_input, test_target, test_classes)                                                 

        
    siam_model_scores[param_combo] = torch.FloatTensor(test_errors_ws_noaux).mean().item()
    siam_model_stds[param_combo] = torch.FloatTensor(test_errors_ws_noaux).std().item()

    tuning_val_results = (siam_model_scores, siam_model_stds)

Validating parameter combination: (25, 50, 0.001)
Starting Round 1
Target error rate: 13.20% 132/1000
--------------------------------------------------

Starting Round 2
Target error rate: 13.40% 134/1000
--------------------------------------------------

Starting Round 3
Target error rate: 13.80% 138/1000
--------------------------------------------------

Starting Round 4
Target error rate: 13.20% 132/1000
--------------------------------------------------

Starting Round 5
Target error rate: 14.00% 140/1000
--------------------------------------------------

Starting Round 6
Target error rate: 13.80% 138/1000
--------------------------------------------------

Starting Round 7
Target error rate: 13.50% 135/1000
--------------------------------------------------

Starting Round 8
Target error rate: 12.20% 122/1000
--------------------------------------------------

Starting Round 9
Target error rate: 13.80% 138/1000
--------------------------------------------------

Starting Round

Target error rate: 10.40% 104/1000
--------------------------------------------------

Starting Round 7
Target error rate: 9.60% 96/1000
--------------------------------------------------

Starting Round 8
Target error rate: 11.40% 114/1000
--------------------------------------------------

Starting Round 9
Target error rate: 11.30% 113/1000
--------------------------------------------------

Starting Round 10
Target error rate: 12.80% 128/1000
--------------------------------------------------

Validating parameter combination: (50, 50, 0.001)
Starting Round 1
Target error rate: 13.60% 136/1000
--------------------------------------------------

Starting Round 2
Target error rate: 14.10% 141/1000
--------------------------------------------------

Starting Round 3
Target error rate: 15.00% 150/1000
--------------------------------------------------

Starting Round 4
Target error rate: 13.20% 132/1000
--------------------------------------------------

Starting Round 5
Target error ra

Target error rate: 11.20% 112/1000
--------------------------------------------------

Starting Round 3
Target error rate: 10.60% 106/1000
--------------------------------------------------

Starting Round 4
Target error rate: 13.30% 133/1000
--------------------------------------------------

Starting Round 5
Target error rate: 10.30% 103/1000
--------------------------------------------------

Starting Round 6
Target error rate: 11.60% 116/1000
--------------------------------------------------

Starting Round 7
Target error rate: 9.70% 97/1000
--------------------------------------------------

Starting Round 8
Target error rate: 9.50% 95/1000
--------------------------------------------------

Starting Round 9
Target error rate: 11.00% 110/1000
--------------------------------------------------

Starting Round 10
Target error rate: 11.50% 115/1000
--------------------------------------------------

Validating parameter combination: (100, 50, 0.001)
Starting Round 1
Target error rat

Target error rate: 9.40% 94/1000
--------------------------------------------------

Starting Round 9
Target error rate: 10.90% 109/1000
--------------------------------------------------

Starting Round 10
Target error rate: 10.30% 103/1000
--------------------------------------------------

Validating parameter combination: (100, 100, 0.1)
Starting Round 1
Target error rate: 12.60% 126/1000
--------------------------------------------------

Starting Round 2
Target error rate: 11.30% 113/1000
--------------------------------------------------

Starting Round 3
Target error rate: 13.00% 130/1000
--------------------------------------------------

Starting Round 4
Target error rate: 12.70% 127/1000
--------------------------------------------------

Starting Round 5
Target error rate: 12.80% 128/1000
--------------------------------------------------

Starting Round 6
Target error rate: 12.80% 128/1000
--------------------------------------------------

Starting Round 7
Target error ra

In [16]:
tuning_val_results

({(25, 50, 0.001): 0.1347000002861023,
  (25, 50, 0.005): 0.10520000755786896,
  (25, 50, 0.01): 0.11440000683069229,
  (25, 50, 0.1): 0.1216999888420105,
  (25, 100, 0.001): 0.14400000870227814,
  (25, 100, 0.005): 0.10199999809265137,
  (25, 100, 0.01): 0.11500000953674316,
  (25, 100, 0.1): 0.11130000650882721,
  (50, 50, 0.001): 0.1371999979019165,
  (50, 50, 0.005): 0.10160000622272491,
  (50, 50, 0.01): 0.09620000422000885,
  (50, 50, 0.1): 0.11389999091625214,
  (50, 100, 0.001): 0.16249999403953552,
  (50, 100, 0.005): 0.1169000044465065,
  (50, 100, 0.01): 0.09479999542236328,
  (50, 100, 0.1): 0.10939999669790268,
  (100, 50, 0.001): 0.13259999454021454,
  (100, 50, 0.005): 0.11550001055002213,
  (100, 50, 0.01): 0.11469999700784683,
  (100, 50, 0.1): 0.11189999431371689,
  (100, 100, 0.001): 0.14790000021457672,
  (100, 100, 0.005): 0.1120000034570694,
  (100, 100, 0.01): 0.10429999977350235,
  (100, 100, 0.1): 0.12479998916387558},
 {(25, 50, 0.001): 0.005250395741313696,
 

### Best hyper-parameters are nb_epochs=50, mini_batch_size=100, lr=0.01 with avg test error_rate of 0.947 and std 0.0046

In [15]:
param_grid = [(int(nb_epochs), int(mini_batch_size), lr)
                  for nb_epochs in (25, 50, 100)
                  for mini_batch_size in (50, 100)  
                  for lr in (0.001, 0.005, 0.01, 0.1)]

#store mean and std of each trained model
siamaux_model_scores = {}
siamaux_model_stds = {}

#Test each hyper-parameter combination
for param_combo in param_grid:
    print("Validating parameter combination:", param_combo)

    siamaux_model_scores[param_combo] = []

    nb_epochs, mini_batch_size, lr = param_combo
    model_ws_aux, loss_per_round_ws_aux, test_errors_ws_aux = Ws_aux_Pipeline(Siamese_net_ws_aux(), mini_batch_size, 10, N, lr, nb_epochs, train_input, train_target, train_classes, test_input, test_target, test_classes)                                                 

        
    siamaux_model_scores[param_combo] = torch.FloatTensor(test_errors_ws_aux).mean().item()
    siamaux_model_stds[param_combo] = torch.FloatTensor(test_errors_ws_aux).std().item()

    tuning_val_results = (siamaux_model_scores, siamaux_model_stds)

Validating parameter combination: (25, 50, 0.001)
Starting Round 1
Target error rate: 6.90% 69/1000
--------------------------------------------------

Starting Round 2
Target error rate: 6.70% 67/1000
--------------------------------------------------

Starting Round 3
Target error rate: 5.70% 57/1000
--------------------------------------------------

Starting Round 4
Target error rate: 5.40% 54/1000
--------------------------------------------------

Starting Round 5
Target error rate: 5.40% 54/1000
--------------------------------------------------

Starting Round 6
Target error rate: 6.00% 60/1000
--------------------------------------------------

Starting Round 7
Target error rate: 5.50% 55/1000
--------------------------------------------------

Starting Round 8
Target error rate: 5.60% 56/1000
--------------------------------------------------

Starting Round 9
Target error rate: 6.30% 63/1000
--------------------------------------------------

Starting Round 10
Target error r

Target error rate: 5.20% 52/1000
--------------------------------------------------

Starting Round 9
Target error rate: 4.00% 40/1000
--------------------------------------------------

Starting Round 10
Target error rate: 4.60% 46/1000
--------------------------------------------------

Validating parameter combination: (50, 50, 0.001)
Starting Round 1
Target error rate: 6.10% 61/1000
--------------------------------------------------

Starting Round 2
Target error rate: 6.50% 65/1000
--------------------------------------------------

Starting Round 3
Target error rate: 6.10% 61/1000
--------------------------------------------------

Starting Round 4
Target error rate: 5.80% 58/1000
--------------------------------------------------

Starting Round 5
Target error rate: 5.40% 54/1000
--------------------------------------------------

Starting Round 6
Target error rate: 4.70% 47/1000
--------------------------------------------------

Starting Round 7
Target error rate: 5.50% 55/100

Target error rate: 3.70% 37/1000
--------------------------------------------------

Starting Round 6
Target error rate: 5.00% 50/1000
--------------------------------------------------

Starting Round 7
Target error rate: 3.10% 31/1000
--------------------------------------------------

Starting Round 8
Target error rate: 4.10% 41/1000
--------------------------------------------------

Starting Round 9
Target error rate: 4.40% 44/1000
--------------------------------------------------

Starting Round 10
Target error rate: 4.40% 44/1000
--------------------------------------------------

Validating parameter combination: (100, 50, 0.001)
Starting Round 1
Target error rate: 8.40% 84/1000
--------------------------------------------------

Starting Round 2
Target error rate: 7.30% 73/1000
--------------------------------------------------

Starting Round 3
Target error rate: 6.90% 69/1000
--------------------------------------------------

Starting Round 4
Target error rate: 6.70% 67/10

Target error rate: 5.30% 53/1000
--------------------------------------------------

Starting Round 3
Target error rate: 4.40% 44/1000
--------------------------------------------------

Starting Round 4
Target error rate: 4.20% 42/1000
--------------------------------------------------

Starting Round 5
Target error rate: 5.60% 56/1000
--------------------------------------------------

Starting Round 6
Target error rate: 3.80% 38/1000
--------------------------------------------------

Starting Round 7
Target error rate: 3.80% 38/1000
--------------------------------------------------

Starting Round 8
Target error rate: 3.80% 38/1000
--------------------------------------------------

Starting Round 9
Target error rate: 4.30% 43/1000
--------------------------------------------------

Starting Round 10
Target error rate: 4.70% 47/1000
--------------------------------------------------



In [16]:
tuning_val_results

({(25, 50, 0.001): 0.058800000697374344,
  (25, 50, 0.005): 0.04349999874830246,
  (25, 50, 0.01): 0.04639999940991402,
  (25, 50, 0.1): 0.054099999368190765,
  (25, 100, 0.001): 0.0567999966442585,
  (25, 100, 0.005): 0.03790000081062317,
  (25, 100, 0.01): 0.041099999099969864,
  (25, 100, 0.1): 0.040400005877017975,
  (50, 50, 0.001): 0.05689999461174011,
  (50, 50, 0.005): 0.050599999725818634,
  (50, 50, 0.01): 0.043800003826618195,
  (50, 50, 0.1): 0.05220000073313713,
  (50, 100, 0.001): 0.05429999902844429,
  (50, 100, 0.005): 0.0406000018119812,
  (50, 100, 0.01): 0.03790000081062317,
  (50, 100, 0.1): 0.04270000010728836,
  (100, 50, 0.001): 0.06629999727010727,
  (100, 50, 0.005): 0.04859999567270279,
  (100, 50, 0.01): 0.0463000051677227,
  (100, 50, 0.1): 0.048100002110004425,
  (100, 100, 0.001): 0.05630000680685043,
  (100, 100, 0.005): 0.03790000081062317,
  (100, 100, 0.01): 0.03590000420808792,
  (100, 100, 0.1): 0.04529999941587448},
 {(25, 50, 0.001): 0.005731007084

### Best hyper-parameters are nb_epochs=50, mini_batch_size=100, lr=0.01 with avg test error_rate of  0.038 and std 0.046 (over the 0.036 mean with 0.063 std)

### The models with the chosen hyper-parameters were then trained and tested on a freshly made train dataset with the same seed and prediction made on the test datasets. The source of randomness were tries to be kept to a minimum.