In [487]:
import torch
import data_loader
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

In [488]:
from torch.utils.data import TensorDataset, DataLoader

def load_data(N=1000, batch_size=50, seed=42):
    # Load data
    train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)
    train_target = torch.nn.functional.one_hot(train_target)
    test_target = torch.nn.functional.one_hot(test_target)
    
    train_input = train_input.to(device)
    train_target = train_target.to(device)
    train_classes = train_classes.to(device)
    test_input = test_input.to(device)
    test_target = test_target.to(device)
    test_classes = test_classes.to(device)
    # Normalize data
    mean, std = train_input.mean(), train_input.std()
    train_input.sub_(mean).div_(std)
    test_input.sub_(mean).div_(std)
    
    # Generate dataset
    train_data = TensorDataset(train_input, train_target, train_classes)
    test_data = TensorDataset(test_input, test_target, test_classes)
    
    # For reproducibility
    torch.manual_seed(seed)
    
    # Generate data loader
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size)
    
    return train_loader, test_loader

In [489]:
# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [515]:
device = torch.device('cpu')

In [516]:
# helper.py

# Count the number of parameters
def count_param(model):
    return sum([torch.numel(param) for param in model.parameters()])

In [517]:
train_loader, test_loader = load_data(N=1000, batch_size=50, seed=42)

In [518]:
def compute_nb_errors(model, data_loader):

    nb_data_errors = 0

    for data_input, data_target, data_classes in data_loader:
        output = model(data_input)
        nb_error = torch.sum(torch.argmax(output, dim=1, keepdim=True) != torch.argmax(data_target, dim=1, keepdim=True))
        nb_data_errors += nb_error
        
    return nb_data_errors

In [519]:
def compute_nb_errors_siamese(model, data_loader):

    nb_data_errors = 0
    for data_input, data_target, data_classes in data_loader:
        data_1, data_2 = data_input.unbind(1)               
        output = model(data_1.unsqueeze(1), data_2.unsqueeze(1))
        nb_error = torch.sum(torch.argmax(output, dim=1, keepdim=True) != torch.argmax(data_target, dim=1, keepdim=True))
        nb_data_errors += nb_error
        
    return nb_data_errors

In [520]:
def compute_nb_errors_auxsiamese(model, data_loader):

    nb_data_errors = 0
    for data_input, data_target, data_classes in data_loader:
        data_1, data_2 = data_input.unbind(1)               
        output, aux1, aux2 = model(data_1.unsqueeze(1), data_2.unsqueeze(1))
        nb_error = torch.sum(torch.argmax(output, dim=1, keepdim=True) != torch.argmax(data_target, dim=1, keepdim=True))
        nb_data_errors += nb_error
        
    return nb_data_errors

In [521]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        
        self.fc1 = nn.Linear(392, 160)
        self.fc2 = nn.Linear(160, 64) 
        self.fc3 = nn.Linear(64, 2) 


    def forward(self, x):
        x = x.view(-1,392) # flatten
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        
        return x

In [522]:
class AuxMLP(nn.Module):
    def __init__(self):
        super(AuxMLP, self).__init__()
        
        self.fc11 = nn.Linear(196, 160)
        self.fc12 = nn.Linear(196, 160)
        self.fc21 = nn.Linear(160, 10)
        self.fc22 = nn.Linear(160, 10)
        self.fc3 = nn.Linear(20, 2) 


    def forward(self, x1, x2):
        x1 = x1.view(-1,196) # flatten
        x1 = F.relu(self.fc11(x1))
        x1 = self.fc21(x1)
        aux1 = F.softmax(x1)
        x1 = F.relu(x1)
        
        x2 = x2.view(-1,196) # flatten
        x2 = F.relu(self.fc12(x2))
        x2 = self.fc22(x2)
        aux2 = F.softmax(x2)
        x2 = F.relu(x2)
        
        x = torch.cat([x1, x2], dim=1)
        x = torch.sigmoid(self.fc3(x))

        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))
        
        return x, aux1, aux2

In [523]:
class SiameseMLP(nn.Module):
    def __init__(self):
        super(SiameseMLP, self).__init__()
        
        self.fc1 = nn.Linear(196, 160)
        self.fc2 = nn.Linear(160, 10)
        self.fc3 = nn.Linear(20, 2) 


    def forward(self, x1, x2):
        x1 = x1.view(-1,196) # flatten
        x1 = F.relu(self.fc1(x1))
        x1 = self.fc2(x1)
        x1 = F.relu(x1)
        
        x2 = x2.view(-1,196) # flatten
        x2 = F.relu(self.fc1(x2))
        x2 = self.fc2(x2)
        x2 = F.relu(x2)
        
        x = torch.cat([x1, x2], dim=1)
        x = torch.sigmoid(self.fc3(x))

        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))
        
        return x

In [524]:
class AuxsiameseMLP(nn.Module):
    def __init__(self):
        super(AuxsiameseMLP, self).__init__()
        
        self.fc1 = nn.Linear(196, 160)
        self.fc2 = nn.Linear(160, 10)
        self.fc3 = nn.Linear(20, 2) 


    def forward(self, x1, x2):
        x1 = x1.view(-1,196) # flatten
        x1 = F.relu(self.fc1(x1))
        x1 = self.fc2(x1)
        aux1 = F.softmax(x1)
        x1 = F.relu(x1)
        
        x2 = x2.view(-1,196) # flatten
        x2 = F.relu(self.fc1(x2))
        x2 = self.fc2(x2)
        aux2 = F.softmax(x2)
        x2 = F.relu(x2)
        
        x = torch.cat([x1, x2], dim=1)
        x = torch.sigmoid(self.fc3(x))

        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))
        
        return x, aux1, aux2

In [525]:
class BaseNet(nn.Module):
    def __init__(self):
        super(BaseNet, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3)    # size [nb, 32, 12, 12]
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)   # size [nb, 64, 4, 4]
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)
        self.fc3 = nn.Linear(10, 2)
        
    def forward(self, x):        
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2)) # size [nb, 32, 6, 6]      
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2)) # size [nb, 64, 2, 2]
        x = x.view(-1, 256) # size [nb, 256]
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

In [526]:
class SiameseBaseNet(nn.Module):
    def __init__(self):
        super(SiameseBaseNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)    # size [nb, 32, 10, 10]
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)   # size [nb, 64, 4, 4]
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)
        self.fc3 = nn.Linear(20, 2)
        
    def convs(self, x):        
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2)) # size [nb, 32, 5, 5]      
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2)) # size [nb, 64, 2, 2]
        return x
    
    def forward(self, x1, x2):
        x1 = self.convs(x1)
        x1 = x1.view(-1, 256)
        x1 = F.relu((self.fc1(x1)))
        x1 = F.relu(self.fc2(x1))
        
        x2 = self.convs(x2)
        x2 = x2.view(-1, 256)
        x2 = F.relu(self.fc1(x2))
        x2 = F.relu(self.fc2(x2))
        
        x = torch.cat([x1, x2], dim=1)
        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))
        
        x = torch.sigmoid(self.fc3(x))
        
        return x

In [527]:
class AuxsiameseBaseNet(nn.Module):
    def __init__(self):
        super(AuxsiameseBaseNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)    # size [nb, 32, 10, 10]
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)   # size [nb, 64, 4, 4]
        self.fc1 = nn.Linear(256, 200)
        self.fc2 = nn.Linear(200, 10)
        self.fc3 = nn.Linear(20, 2)
        
    def convs(self, x):        
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2)) # size [nb, 32, 5, 5]      
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2)) # size [nb, 64, 2, 2]
        return x
    
    def forward(self, x1, x2):
        x1 = self.convs(x1)
        x1 = x1.view(-1, 256)
        x1 = F.relu((self.fc1(x1)))
        x1 = self.fc2(x1)
        aux1 = F.softmax(x1)
        x1 = F.relu(x1)
        
        x2 = self.convs(x2)
        x2 = x2.view(-1, 256)
        x2 = F.relu(self.fc1(x2))
        x2 = self.fc2(x2)
        aux2 = F.softmax(x2)
        x2 = F.relu(x2)
        
        x = torch.cat([x1, x2], dim=1)
        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))
        
        x = torch.sigmoid(self.fc3(x))
        
        return x, aux1, aux2

In [528]:
class AuxBaseNet(nn.Module):
    def __init__(self):
        super(AuxBaseNet, self).__init__()
        self.conv11 = nn.Conv2d(1, 32, kernel_size=3)    # size [nb, 32, 10, 10]
        self.conv21 = nn.Conv2d(32, 64, kernel_size=3)   # size [nb, 64, 4, 4]
        self.fc11 = nn.Linear(256, 200)
        self.fc21 = nn.Linear(200, 10)
        self.conv12 = nn.Conv2d(1, 32, kernel_size=5)    # size [nb, 32, 10, 10]
        self.conv22 = nn.Conv2d(32, 64, kernel_size=2)   # size [nb, 64, 4, 4]
        self.fc12 = nn.Linear(256, 200)
        self.fc22 = nn.Linear(200, 10)
        self.fc3 = nn.Linear(20, 2)
        
    def convs(self, x):        
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2)) # size [nb, 32, 5, 5]      
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2)) # size [nb, 64, 2, 2]
        return x
    
    def forward(self, x1, x2):
        x1 = F.relu(F.max_pool2d(self.conv11(x1), kernel_size=2)) # size [nb, 32, 5, 5]  
        x1 = F.relu(F.max_pool2d(self.conv21(x1), kernel_size=2)) # size [nb, 64, 2, 2]
        x1 = x1.view(-1, 256)
        x1 = F.relu((self.fc11(x1)))
        x1 = self.fc21(x1)
        aux1 = F.softmax(x1)
        x1 = F.relu(x1)
        
        x2 = F.relu(F.max_pool2d(self.conv12(x2), kernel_size=2)) # size [nb, 32, 5, 5]  
        x2 = F.relu(F.max_pool2d(self.conv22(x2), kernel_size=2)) # size [nb, 64, 2, 2]
        x2 = x2.view(-1, 256)
        x2 = F.relu((self.fc12(x2)))
        x2 = self.fc22(x2)
        aux2 = F.softmax(x2)
        x2 = F.relu(x2)
        
        x = torch.cat([x1, x2], dim=1)
        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))
        
        x = torch.sigmoid(self.fc3(x))
        
        return x, aux1, aux2

In [529]:
# ResNetBlock with skip-connection and batch normalization
class ResNetBlock(nn.Module):
    def __init__(self, nb_channels, kernel_size, dropout = 0):
        super().__init__()

        self.conv1 = nn.Conv2d(nb_channels, nb_channels,
                               kernel_size = kernel_size,
                               padding = (kernel_size - 1) // 2)

        self.bn1 = nn.BatchNorm2d(nb_channels)

        self.conv2 = nn.Conv2d(nb_channels, nb_channels,
                               kernel_size = kernel_size,
                               padding = (kernel_size - 1) // 2)

        self.bn2 = nn.BatchNorm2d(nb_channels)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        y = self.conv1(x)
        y = self.bn1(y)
        y = self.dropout(y)
        y = F.relu(y)
        y = self.conv2(y)
        y = self.bn2(y)
        y = self.dropout(y)
        y = y + x
        y = F.relu(y)

        return y

In [530]:
class ResNet(nn.Module):

    def __init__(self, nb_residual_blocks, input_channels, nb_channels,
                 kernel_size = 3, nb_classes = 10, dropout = 0):
        super().__init__()

        self.conv = nn.Conv2d(input_channels, nb_channels,
                              kernel_size = kernel_size,
                              padding = (kernel_size - 1) // 2)
        self.bn = nn.BatchNorm2d(nb_channels)

        self.resnet_blocks = nn.Sequential(
            *(ResNetBlock(nb_channels, kernel_size, dropout)
              for _ in range(nb_residual_blocks))
        )

        self.fc = nn.Linear(288, nb_classes)
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x):
        x = F.relu(self.bn(self.conv(x)))
        #x = F.relu(self.dropout(self.bn(self.conv(x))))
        x = self.resnet_blocks(x)
        x = F.avg_pool2d(x, 4).view(x.size(0), -1)
        x = torch.sigmoid(self.fc(x))
        return x

In [549]:
class SiameseResNet(nn.Module):

    def __init__(self, nb_residual_blocks, input_channels, nb_channels,
                 kernel_size = 3, nb_classes = 10, dropout = 0):
        super().__init__()

        self.conv = nn.Conv2d(input_channels, nb_channels,
                              kernel_size = kernel_size,
                              padding = (kernel_size - 1) // 2)
        self.bn = nn.BatchNorm2d(nb_channels)

        self.resnet_blocks = nn.Sequential(
            *(ResNetBlock(nb_channels, kernel_size, dropout)
              for _ in range(nb_residual_blocks))
        )

        self.fc = nn.Linear(20, nb_classes)
        self.fc1 = nn.Linear(288, 10)
        
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x1, x2):
        x1 = F.relu(self.bn(self.conv(x1)))
        #x = F.relu(self.dropout(self.bn(self.conv(x))))
        x1 = self.resnet_blocks(x1)
        x1 = F.avg_pool2d(x1, 4).view(x1.size(0), -1)
        x1 = F.relu(self.fc1(x1))
        
        x2 = F.relu(self.bn(self.conv(x2)))
        #x = F.relu(self.dropout(self.bn(self.conv(x))))
        x2 = self.resnet_blocks(x2)
        x2 = F.avg_pool2d(x2, 4).view(x2.size(0), -1)
        x2 = F.relu(self.fc1(x2))
        
        x = torch.cat([x1, x2], dim=1)
        #aux1 = F.softmax(self.fc1(x1)
        #aux2 = F.softmax(self.fc1(x2)
        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))      
        x = torch.sigmoid(self.fc(x))
        
        return x

In [550]:
class AuxResNet(nn.Module):

    def __init__(self, nb_residual_blocks, input_channels, nb_channels,
                 kernel_size = 3, nb_classes = 10, dropout = 0):
        super().__init__()

        self.conv1 = nn.Conv2d(input_channels, nb_channels,
                              kernel_size = kernel_size,
                              padding = (kernel_size - 1) // 2)
        self.conv2 = nn.Conv2d(input_channels, nb_channels,
                              kernel_size = kernel_size,
                              padding = (kernel_size - 1) // 2)
        self.bn1 = nn.BatchNorm2d(nb_channels)
        self.bn2 = nn.BatchNorm2d(nb_channels)

        self.resnet_blocks1 = nn.Sequential(
            *(ResNetBlock(nb_channels, kernel_size, dropout)
              for _ in range(nb_residual_blocks))
        )
        self.resnet_blocks2 = nn.Sequential(
            *(ResNetBlock(nb_channels, kernel_size, dropout)
              for _ in range(nb_residual_blocks))
        )
        
        self.fc = nn.Linear(20, nb_classes)
        self.fc1 = nn.Linear(288, 10)
        
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x1, x2):
        x1 = F.relu(self.bn1(self.conv1(x1)))
        #x = F.relu(self.dropout(self.bn(self.conv(x))))
        x1 = self.resnet_blocks1(x1)
        x1 = F.avg_pool2d(x1, 4).view(x1.size(0), -1)
        x1 = self.fc1(x1)
        
        x2 = F.relu(self.bn2(self.conv2(x2)))
        #x = F.relu(self.dropout(self.bn(self.conv(x))))
        x2 = self.resnet_blocks2(x2)
        x2 = F.avg_pool2d(x2, 4).view(x2.size(0), -1)
        x2 = self.fc1(x2)
        
        x = torch.cat([x1, x2], dim=1)
        aux1 = F.softmax(x1)
        aux2 = F.softmax(x2)
        x1 = F.relu(x1)
        x2 = F.relu(x2)
        x = torch.cat([x1, x2], dim=1)
        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))      
        x = torch.sigmoid(self.fc(x))    
        
        return x, aux1, aux2

In [551]:
class AuxsiameseResNet(nn.Module):

    def __init__(self, nb_residual_blocks, input_channels, nb_channels,
                 kernel_size = 3, nb_classes = 10, dropout = 0):
        super().__init__()

        self.conv = nn.Conv2d(input_channels, nb_channels,
                              kernel_size = kernel_size,
                              padding = (kernel_size - 1) // 2)
        self.bn = nn.BatchNorm2d(nb_channels)

        self.resnet_blocks = nn.Sequential(
            *(ResNetBlock(nb_channels, kernel_size, dropout)
              for _ in range(nb_residual_blocks))
        )

        self.fc = nn.Linear(20, nb_classes)
        self.fc1 = nn.Linear(288, 10)
        
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x1, x2):
        x1 = F.relu(self.bn(self.conv(x1)))
        #x = F.relu(self.dropout(self.bn(self.conv(x))))
        x1 = self.resnet_blocks(x1)
        x1 = F.avg_pool2d(x1, 4).view(x1.size(0), -1)
        x1 = self.fc1(x1)
        
        x2 = F.relu(self.bn(self.conv(x2)))
        #x = F.relu(self.dropout(self.bn(self.conv(x))))
        x2 = self.resnet_blocks(x2)
        x2 = F.avg_pool2d(x2, 4).view(x2.size(0), -1)
        x2 = self.fc1(x2)
      
        aux1 = F.softmax(x1)
        aux2 = F.softmax(x2)
        x1 = F.relu(x1)
        x2 = F.relu(x2)
        x = torch.cat([x1, x2], dim=1)
        #x = torch.abs(x1 - x2)
        #x = F.relu(self.fc1(x.flatten(start_dim=1)))
        #x = F.relu(self.fc2(x))      
        x = torch.sigmoid(self.fc(x))
        
        return x, aux1, aux2

In [534]:
# Number of parameters of each model

In [552]:
model_1 = MLP()
model_2 = SiameseMLP()
model_3 = AuxMLP()
model_4 = AuxsiameseMLP()
model_5 = BaseNet()
model_6 = SiameseBaseNet()
model_7 = AuxBaseNet()
model_8 = AuxsiameseBaseNet()
model_9 = ResNet(nb_residual_blocks = 4, input_channels = 2, nb_channels = 32, kernel_size = 3, nb_classes = 2)
model_10 = SiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
model_11 = AuxResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
model_12 = AuxsiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)

model = [model_1, model_2, model_3, model_4, model_5, model_6, model_7, model_8, model_9, model_10, model_11, model_12]
for i in model:
    print(count_param(i))

73314
33172
66302
33172
72536
72268
134766
72268
75746
77812
152692
77812


In [None]:
# train function

In [536]:
def train(model, train_loader, eta, decay, n_epochs=25, verbose=False, siamese=False, aux=False, alpha = 0):

    #binary_crit = nn.CrossEntropyLoss()
    binary_crit = torch.nn.BCELoss()
    aux_crit = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=eta, weight_decay=decay)
    #optimizer = torch.optim.LBFGS(model.parameters(), lr=eta)
    tr_losses = []
    tr_accuracies = []

    for e in range(n_epochs):
        # Reset training/validation loss
        tr_loss = 0

        # Training model
        model.train()

        for train_input, train_target, train_classes in iter(train_loader):
            #train_target = torch.nn.functional.one_hot(train_target)
            # Forward pass
            
            if siamese == True:
                train_1, train_2 = train_input.unbind(1)
                if aux == True:
                    output, aux1, aux2 = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
                else:
                    output = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
            elif aux == True:
                train_1, train_2 = train_input.unbind(1)
                output, aux1, aux2 = model(train_1.unsqueeze(1), train_2.unsqueeze(1))
            else:
                output = model(train_input)
                
            # Binary classification loss
            binary_loss = binary_crit(output, train_target.float())
            total_loss = binary_loss
            
            # Auxiliary loss
            if aux == True:

                aux_loss1 = aux_crit(aux1, train_classes[:,0])
                aux_loss2 = aux_crit(aux2, train_classes[:,1])
                aux_loss = aux_loss1 + aux_loss2
                total_loss = binary_loss + aux_loss * alpha
        
            # Total loss = Binary loss + aux loss * alpha
            
            tr_loss += total_loss

            # Backward pass
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        # Collect accuracy data
        # tr_accuracies.append(compute_nb_errors_siamese(model, train_loader)/1000)

        # Collect loss data
        tr_losses.append(tr_loss)

        if verbose:
            print('Epoch %d/%d, Binary loss: %.3f' %
                  (e+1, n_epochs, tr_loss))
    return tr_losses

In [None]:
# Final data

In [537]:
accuracies8 = []
times8 = []
losses8 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()
    #model = SiameseBaseNet()
    model = MLP()
    model.to(device)
    losses8[i-10, :] = torch.tensor(train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False))
    time2 = time.perf_counter()
    times8.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies8.append(te_accuracy)

print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies8).mean(), torch.tensor(accuracies8).std()))

tensor(1.) tensor(0.8220)
tensor(1.) tensor(0.8030)
tensor(1.) tensor(0.8100)
tensor(1.) tensor(0.8220)
tensor(1.) tensor(0.8130)
tensor(1.) tensor(0.8140)
tensor(1.) tensor(0.8180)
tensor(1.) tensor(0.8200)
tensor(1.) tensor(0.8110)
tensor(1.) tensor(0.8110)
Mean: 0.814, Std: 0.006


In [538]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times8).mean(), torch.tensor(times8).std()))

Mean: 9.767, Std: 2.086


In [449]:
accuracies9 = []
times9 = []
losses9 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()
    
    model = SiameseMLP()
    model.to(device)

    losses9[i-10,:] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True))
    time2 = time.perf_counter()
    times9.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies9.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies9).mean(), torch.tensor(accuracies9).std()))

tensor(1., device='cuda:0') tensor(0.8400, device='cuda:0')
tensor(0.9980, device='cuda:0') tensor(0.8370, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8480, device='cuda:0')
tensor(0.9980, device='cuda:0') tensor(0.8360, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8350, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8460, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8570, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8640, device='cuda:0')
tensor(0.9990, device='cuda:0') tensor(0.8420, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8490, device='cuda:0')
Mean: 0.845, Std: 0.009


In [450]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times9).mean(), torch.tensor(times9).std()))

Mean: 54.148, Std: 0.246


In [451]:
accuracies10 = []
times10 = []
losses10 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()

    model = AuxMLP()
    model.to(device)
    losses10[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.9))
    time2 = time.perf_counter()
    times10.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies10.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies10).mean(), torch.tensor(accuracies10).std()))

  app.launch_new_instance()


tensor(0.9940, device='cuda:0') tensor(0.8390, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8440, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8450, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8730, device='cuda:0')
tensor(0.9980, device='cuda:0') tensor(0.8640, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8390, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8570, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8580, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8800, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8720, device='cuda:0')
Mean: 0.857, Std: 0.015


In [452]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times10).mean(), torch.tensor(times10).std()))

Mean: 43.098, Std: 0.224


In [453]:
accuracies11 = []
times11 = []
losses11 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()

    model = AuxsiameseMLP()
    model.to(device)
    losses11[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = 0.7))
    time2 = time.perf_counter()
    times11.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies11.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies11).mean(), torch.tensor(accuracies11).std()))

  


tensor(0.9540, device='cuda:0') tensor(0.8670, device='cuda:0')
tensor(0.9800, device='cuda:0') tensor(0.8880, device='cuda:0')
tensor(0.9840, device='cuda:0') tensor(0.8680, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8720, device='cuda:0')
tensor(0.9730, device='cuda:0') tensor(0.8740, device='cuda:0')
tensor(0.9910, device='cuda:0') tensor(0.8770, device='cuda:0')
tensor(0.9890, device='cuda:0') tensor(0.8740, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8740, device='cuda:0')
tensor(0.9950, device='cuda:0') tensor(0.8950, device='cuda:0')
tensor(0.9610, device='cuda:0') tensor(0.8810, device='cuda:0')
Mean: 0.877, Std: 0.009


In [454]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times11).mean(), torch.tensor(times11).std()))

Mean: 68.603, Std: 0.362


In [455]:
accuracies = []
times = []
losses = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()

    model = BaseNet()
    model.to(device)
    losses[i-10, :] = torch.tensor(train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False))
    time2 = time.perf_counter()
    times.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies.append(te_accuracy)

print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies).mean(), torch.tensor(accuracies).std()))

tensor(1., device='cuda:0') tensor(0.8380, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8300, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8140, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8320, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8300, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8030, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8350, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8150, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8260, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8260, device='cuda:0')
Mean: 0.825, Std: 0.011


In [456]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times).mean(), torch.tensor(times).std()))

Mean: 38.077, Std: 0.187


In [457]:
accuracies1 = []
times1 = []
losses1 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()
    model = SiameseBaseNet()
    model.to(device)

    losses1[i-10, :] = torch.tensor(train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=True))
    time2 = time.perf_counter()
    times1.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies1.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies1).mean(), torch.tensor(accuracies1).std()))

tensor(1., device='cuda:0') tensor(0.8530, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8460, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8440, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8640, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8520, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8520, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8580, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8660, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8520, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8440, device='cuda:0')
Mean: 0.853, Std: 0.008


In [458]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times1).mean(), torch.tensor(times1).std()))

Mean: 50.379, Std: 1.647


In [459]:
accuracies2 = []
times2 = []
losses2 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)
    time1 = time.perf_counter()

    model = AuxBaseNet()
    model.to(device)
    losses2[i-10, :] = torch.tensor(train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False, aux=True, alpha = 1.0))
    time2 = time.perf_counter()
    times2.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies2.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies2).mean(), torch.tensor(accuracies2).std()))



tensor(1., device='cuda:0') tensor(0.8590, device='cuda:0')
tensor(0.9980, device='cuda:0') tensor(0.9040, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8590, device='cuda:0')
tensor(0.9920, device='cuda:0') tensor(0.8200, device='cuda:0')
tensor(0.9980, device='cuda:0') tensor(0.8560, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8490, device='cuda:0')
tensor(0.9990, device='cuda:0') tensor(0.8550, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8840, device='cuda:0')
tensor(0.9930, device='cuda:0') tensor(0.9020, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8470, device='cuda:0')
Mean: 0.863, Std: 0.026


In [460]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times2).mean(), torch.tensor(times2).std()))

Mean: 145.856, Std: 0.234


In [461]:
accuracies3 = []
times3 = []
losses3 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()
    
    model = AuxsiameseBaseNet()
    model.to(device)
    losses3[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = 0.6))
    time2 = time.perf_counter()
    times3.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies3.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies3).mean(), torch.tensor(accuracies3).std()))



tensor(0.9990, device='cuda:0') tensor(0.8840, device='cuda:0')
tensor(0.9970, device='cuda:0') tensor(0.9210, device='cuda:0')
tensor(0.9960, device='cuda:0') tensor(0.8990, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.9140, device='cuda:0')
tensor(0.9920, device='cuda:0') tensor(0.9010, device='cuda:0')
tensor(0.9980, device='cuda:0') tensor(0.9350, device='cuda:0')
tensor(0.9810, device='cuda:0') tensor(0.8880, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.9120, device='cuda:0')
tensor(0.9780, device='cuda:0') tensor(0.8600, device='cuda:0')
tensor(0.9930, device='cuda:0') tensor(0.9000, device='cuda:0')
Mean: 0.901, Std: 0.021


In [462]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times3).mean(), torch.tensor(times3).std()))

Mean: 30.110, Std: 0.261


In [463]:
accuracies4 = []
times4 = []
losses4 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()

    model = ResNet(nb_residual_blocks = 4, input_channels = 2, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses4[i-10, :] = torch.tensor(train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=False))
    time2 = time.perf_counter()
    times4.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies4.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies4).mean(), torch.tensor(accuracies4).std()))

tensor(1., device='cuda:0') tensor(0.8220, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8200, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8280, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8300, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8460, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8520, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8400, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8230, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8510, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8180, device='cuda:0')
Mean: 0.833, Std: 0.013


In [464]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times4).mean(), torch.tensor(times4).std()))

Mean: 65.422, Std: 0.141


In [465]:
accuracies5 = []
times5 = []
losses5 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()
    
    model = SiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses5[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True))
    time2 = time.perf_counter()
    times5.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_siamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies5.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies5).mean(), torch.tensor(accuracies5).std()))

tensor(0.9970, device='cuda:0') tensor(0.8830, device='cuda:0')
tensor(0.9850, device='cuda:0') tensor(0.8550, device='cuda:0')
tensor(0.9550, device='cuda:0') tensor(0.8460, device='cuda:0')
tensor(0.9910, device='cuda:0') tensor(0.8790, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8620, device='cuda:0')
tensor(0.9450, device='cuda:0') tensor(0.8460, device='cuda:0')
tensor(0.9840, device='cuda:0') tensor(0.8610, device='cuda:0')
tensor(0.9980, device='cuda:0') tensor(0.8610, device='cuda:0')
tensor(0.9860, device='cuda:0') tensor(0.8690, device='cuda:0')
tensor(0.9970, device='cuda:0') tensor(0.8630, device='cuda:0')
Mean: 0.863, Std: 0.012


In [466]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times5).mean(), torch.tensor(times5).std()))

Mean: 90.997, Std: 0.326


In [467]:
accuracies6 = []
times6 = []
losses6 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
    time1 = time.perf_counter()
    
    model = AuxResNet(nb_residual_blocks = 10, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses6[i-10, :] = torch.tensor(train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.6))
    time2 = time.perf_counter()
    times6.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies6.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies6).mean(), torch.tensor(accuracies6).std()))



tensor(1., device='cuda:0') tensor(0.8700, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8690, device='cuda:0')
tensor(0.9990, device='cuda:0') tensor(0.8790, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8630, device='cuda:0')
tensor(0.9990, device='cuda:0') tensor(0.8870, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8380, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8570, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8690, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8680, device='cuda:0')
tensor(0.9990, device='cuda:0') tensor(0.8690, device='cuda:0')
Mean: 0.867, Std: 0.013


In [468]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times6).mean(), torch.tensor(times6).std()))

Mean: 559.451, Std: 0.711


In [472]:
accuracies7 = []
times7 = []
losses7 = torch.empty((10,25))

for i in range(10,20):
    train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)
    time1 = time.perf_counter()
    #model = SiameseBaseNet()
    #model = BaseNet()
    model = AuxsiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
    model.to(device)
    losses7[i-10, :] = torch.tensor(train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = 0.6))
    time2 = time.perf_counter()
    times7.append(time2 - time1)

    tr_accuracy = 1 - compute_nb_errors_auxsiamese(model, train_loader)/1000
    te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
    print(tr_accuracy, te_accuracy)
    accuracies7.append(te_accuracy)
    
print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies7).mean(), torch.tensor(accuracies7).std()))



tensor(0.9990, device='cuda:0') tensor(0.8970, device='cuda:0')
tensor(0.9970, device='cuda:0') tensor(0.8760, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8990, device='cuda:0')
tensor(0.9990, device='cuda:0') tensor(0.8930, device='cuda:0')
tensor(0.9920, device='cuda:0') tensor(0.8880, device='cuda:0')
tensor(0.9990, device='cuda:0') tensor(0.8770, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.9000, device='cuda:0')
tensor(0.9920, device='cuda:0') tensor(0.8790, device='cuda:0')
tensor(0.9740, device='cuda:0') tensor(0.8770, device='cuda:0')
tensor(1., device='cuda:0') tensor(0.8940, device='cuda:0')
Mean: 0.888, Std: 0.010


In [473]:
print('Mean: %.3f, Std: %.3f' %(torch.tensor(times7).mean(), torch.tensor(times7).std()))

Mean: 98.518, Std: 0.228


In [None]:
# Optimize learning rate and batch size

In [315]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies1 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = MLP()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies1[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies1)

tensor([[0.8014, 0.8035, 0.8015, 0.8056, 0.8019],
        [0.8044, 0.8070, 0.8068, 0.8042, 0.8011],
        [0.8072, 0.8071, 0.8053, 0.8004, 0.7953],
        [0.7977, 0.7913, 0.7888, 0.7791, 0.7683]])


In [444]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies2 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = SiameseMLP()
            model.to(device)
            #model = BaseNet()
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=True)
            te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies2[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies2)

tensor([[0.8475, 0.8444, 0.8468, 0.8429, 0.8374],
        [0.8423, 0.8384, 0.8367, 0.8363, 0.8296],
        [0.8406, 0.8350, 0.8342, 0.8300, 0.8140],
        [0.8195, 0.8094, 0.7905, 0.7710, 0.7423]])


In [317]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies3 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = AuxMLP()
            model.to(device)
            #model = BaseNet()
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.0)
            te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies3[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies3)

  app.launch_new_instance()


tensor([[0.8184, 0.8225, 0.8223, 0.8176, 0.8120],
        [0.8139, 0.8119, 0.8114, 0.8063, 0.8106],
        [0.8130, 0.8087, 0.8095, 0.8112, 0.8017],
        [0.8060, 0.7966, 0.7876, 0.7730, 0.7571]])


In [319]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies5 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = BaseNet()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies5[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies5)

tensor([[0.7246, 0.7686, 0.7808, 0.8034, 0.8205],
        [0.8241, 0.8299, 0.8253, 0.8273, 0.8256],
        [0.8287, 0.8311, 0.8293, 0.8291, 0.8189],
        [0.8303, 0.8192, 0.7826, 0.7837, 0.7460]])


In [320]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies6 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = SiameseBaseNet()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=True)
            te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies6[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies6)

tensor([[0.8408, 0.8178, 0.8411, 0.8424, 0.8413],
        [0.8546, 0.8564, 0.8523, 0.8487, 0.8513],
        [0.8520, 0.8536, 0.8530, 0.8507, 0.8529],
        [0.8486, 0.8511, 0.8439, 0.8258, 0.7881]])


In [321]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies7 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = AuxBaseNet()
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.0)
            te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies7[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies7)



tensor([[0.7255, 0.7897, 0.8042, 0.7796, 0.8148],
        [0.8354, 0.8351, 0.8372, 0.8389, 0.8329],
        [0.8410, 0.8391, 0.8380, 0.8347, 0.8325],
        [0.8324, 0.8310, 0.8204, 0.8055, 0.7755]])


In [323]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies9 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = ResNet(nb_residual_blocks = 4, input_channels = 2, nb_channels = 32, kernel_size = 3, nb_classes = 2)
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False)
            te_accuracy = 1 - compute_nb_errors(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies9[j,k] =  torch.cuda.FloatTensor(accurate).mean()
print(test_accuracies9)

tensor([[0.8262, 0.8285, 0.8360, 0.8349, 0.8226],
        [0.8398, 0.8384, 0.8403, 0.8343, 0.8302],
        [0.8354, 0.8340, 0.8184, 0.8279, 0.8218],
        [0.8152, 0.8161, 0.8157, 0.8150, 0.8147]])


In [563]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies10 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = SiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            loss = train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=True)
            te_accuracy = 1 - compute_nb_errors_siamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies10[j,k] =  torch.FloatTensor(accurate).mean()
print(test_accuracies10)

tensor([[0.8146, 0.8483, 0.8466, 0.8430, 0.8749],
        [0.8823, 0.8833, 0.8772, 0.8711, 0.8649],
        [0.8796, 0.8750, 0.8703, 0.8583, 0.8506],
        [0.8509, 0.8482, 0.8454, 0.8483, 0.8466]])


In [564]:
gammas = [5e-3, 1e-3, 5e-4, 1e-4]
batch_sizes = [8, 16, 32, 64, 128]
test_accuracies11 = torch.empty((len(gammas), len(batch_sizes)))

for j in range(len(gammas)):
    for k in range(len(batch_sizes)):
        accurate = []
        for i in range(10):
            model = AuxResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
            model.to(device)
            train_loader, test_loader = load_data(N=1000, batch_size=batch_sizes[k], seed=i)
            loss = train(model, train_loader, gammas[j], 0, 25, verbose=False, siamese=False, aux=True, alpha = 0.0)
            te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
            accurate.append(te_accuracy)
        test_accuracies11[j,k] =  torch.FloatTensor(accurate).mean()
print(test_accuracies11)



tensor([[0.8023, 0.8195, 0.8178, 0.8232, 0.8373],
        [0.8487, 0.8480, 0.8454, 0.8500, 0.8405],
        [0.8492, 0.8465, 0.8447, 0.8417, 0.8321],
        [0.8296, 0.8233, 0.8251, 0.8231, 0.8329]])


In [None]:
# Optimize alpha for auxiliary loss

In [333]:
for j in range(11):
    accuracies100 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)
        
        model = AuxMLP()
        model.to(device)
        loss = train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies100.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies100).mean(), torch.tensor(accuracies100).std()))

  app.launch_new_instance()


Mean: 0.823, Std: 0.021
Mean: 0.836, Std: 0.012
Mean: 0.842, Std: 0.007
Mean: 0.847, Std: 0.012
Mean: 0.849, Std: 0.016
Mean: 0.851, Std: 0.015
Mean: 0.848, Std: 0.015
Mean: 0.856, Std: 0.014
Mean: 0.854, Std: 0.008
Mean: 0.864, Std: 0.013
Mean: 0.857, Std: 0.012


In [561]:
for j in range(11):
    accuracies101 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)

        model = AuxsiameseMLP()
        model.to(device)
        loss = train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies101.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies101).mean(), torch.tensor(accuracies101).std()))

  


Mean: 0.849, Std: 0.017
Mean: 0.864, Std: 0.015
Mean: 0.860, Std: 0.015
Mean: 0.873, Std: 0.011
Mean: 0.866, Std: 0.018
Mean: 0.870, Std: 0.015
Mean: 0.872, Std: 0.018
Mean: 0.872, Std: 0.020
Mean: 0.884, Std: 0.017
Mean: 0.881, Std: 0.015
Mean: 0.881, Std: 0.022


In [562]:
for j in range(11):
    accuracies102 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=32, seed=i)

        model = AuxsiameseBaseNet()
        model.to(device)
        loss = train(model, train_loader, 5e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies102.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies102).mean(), torch.tensor(accuracies102).std()))



Mean: 0.812, Std: 0.089
Mean: 0.855, Std: 0.008
Mean: 0.871, Std: 0.020
Mean: 0.887, Std: 0.011
Mean: 0.897, Std: 0.016
Mean: 0.904, Std: 0.013
Mean: 0.902, Std: 0.016
Mean: 0.909, Std: 0.015
Mean: 0.908, Std: 0.009
Mean: 0.913, Std: 0.016
Mean: 0.912, Std: 0.017


In [330]:
for j in range(11):
    accuracies103 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=8, seed=i)

        model = AuxBaseNet()
        model.to(device)
        train(model, train_loader, 5e-4, 0, 25, verbose=False, siamese=False, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies103.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies103).mean(), torch.tensor(accuracies103).std()))



Mean: 0.840, Std: 0.014
Mean: 0.832, Std: 0.016
Mean: 0.838, Std: 0.011
Mean: 0.843, Std: 0.015
Mean: 0.850, Std: 0.013
Mean: 0.851, Std: 0.013
Mean: 0.855, Std: 0.012
Mean: 0.857, Std: 0.013
Mean: 0.858, Std: 0.013
Mean: 0.859, Std: 0.013
Mean: 0.868, Std: 0.012


In [None]:
for j in range(11):
    accuracies104 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=16, seed=i)

        model = AuxsiameseResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
        model.to(device)
        train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=True, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies104.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies104).mean(), torch.tensor(accuracies104).std()))



Mean: 0.881, Std: 0.011
Mean: 0.889, Std: 0.016
Mean: 0.894, Std: 0.015
Mean: 0.893, Std: 0.011
Mean: 0.891, Std: 0.012


In [None]:
for j in range(11):
    accuracies105 = []

    for i in range(10):
        train_loader, test_loader = load_data(N=1000, batch_size=64, seed=i)

        model = AuxResNet(nb_residual_blocks = 4, input_channels = 1, nb_channels = 32, kernel_size = 3, nb_classes = 2)
        model.to(device)
        train(model, train_loader, 1e-3, 0, 25, verbose=False, siamese=False, aux=True, alpha = j/10)

        te_accuracy = 1 - compute_nb_errors_auxsiamese(model, test_loader)/1000
        accuracies105.append(te_accuracy)
    print('Mean: %.3f, Std: %.3f' %(torch.tensor(accuracies105).mean(), torch.tensor(accuracies105).std()))