In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
import dlc_practical_prologue as prologue

mini_batch_size = 100
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

#normalize the input
train_input/=255
test_input/=255

In [115]:
print_shapes_Net = False

In [3]:
class Siamese_net_auxiliary(nn.Module):
    def __init__(self):
        super(Siamese_net_auxiliary, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, 10)
        self.fc2 = nn.Linear(20, 2)

    def forward(self, data):
        if print_shapes_Net:
            print("initial", data.shape) #100 2 14 14
            
        class_layer = []
        final_layer = []
        for i in range(2):
            x = data[:,i,:,:]
            len0 = x.shape[0]
            x = torch.reshape(x, (len0, 1, 14, 14))
            
            if print_shapes_Net:
                print("X START",x.shape) 
            
            x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
            if print_shapes_Net:
                print("conv1",x.shape) 
                
            x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
            if print_shapes_Net:
                print("conv2",x.shape)
            
            x = F.relu(self.fc1(x.view(-1, 256)))
            if print_shapes_Net:
                print("fc1",x.shape) 
                
            final_layer.append(x)
            class_layer.append(x.reshape(x.shape[0], 1, 10))
            
        final_layer = torch.cat((final_layer[1], final_layer[0]), 1)
        class_layer = torch.cat((class_layer[1], class_layer[0]), 1)
        
        if print_shapes_Net:
                print("class layer",class_layer.shape) #[100, 2, 10]
                
        final_layer = self.fc2(final_layer)
        if print_shapes_Net:
            print("final",final_layer.shape) 
            
        return class_layer, final_layer

In [121]:
class Net_auxiliary_loss(nn.Module):
    def __init__(self, nb_hidden):
        super(Net_auxiliary_loss, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 20)
        self.fc3 = nn.Linear(20, 2)

    def forward(self, x):
        if print_shapes_Net:
            print("initial", x.shape) #100 2 14 14
        batchsize = x.shape[0]
            
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print("conv1",x.shape) #100 32 6 6
            
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print("conv2",x.shape) #100 64 2 2
            
        x = F.relu(self.fc1(x.view(-1, 256)))
        if print_shapes_Net:
            print("fc1",x.shape) #100 64        
        
        x_class = x
            
        x_class = self.fc2(x_class)
        if print_shapes_Net:
            print("final class", x_class.shape) # 100 20 
        x_try = x_class
        
        x_class = torch.reshape(x_class, (batchsize, 2, 10))
        if print_shapes_Net:
            print("x_class",x_class.shape) # 100 2 10
        
            
        x = self.fc3(x_try)
        if print_shapes_Net:
            print("fc1",x.shape) #100 2 
            
        return x_class, x

In [157]:
class Net_auxiliary_loss_2(nn.Module):
    def __init__(self, nb_hidden):
        super(Net_auxiliary_loss_2, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(128, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)
        self.fc3 = nn.Linear(20, 2)

    def forward(self, x):
        if print_shapes_Net:
            print("initial", x.shape) #100 2 14 14
        batchsize = x.shape[0]
            
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print("conv1",x.shape) #100 32 6 6
            
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print("conv2",x.shape) #100 64 2 2
            
        x = F.relu(self.fc1(x.view(-1, 128)))
        if print_shapes_Net:
            print("fc1",x.shape) #200 64        
        
        x_class = x
            
        x_class = self.fc2(x_class)
        if print_shapes_Net:
            print("final class", x_class.shape) # 200 10 
        x_try = torch.reshape(x_class, (batchsize, 20))
        
        x_class = torch.reshape(x_class, (batchsize, 2, 10))
        if print_shapes_Net:
            print("x_class",x_class.shape) # 100 2 10
        
            
        x = self.fc3(x_try)
        if print_shapes_Net:
            print("fc1",x.shape) #100 2 
            
        return x_class, x

In [138]:
def train_model(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch):
    criterion = nn.MSELoss()
    
    optimizer = optim.Adam(model.parameters(), lr)
    
    for e in range(nb_epoch):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
          
            output_class, output_target = model(train_input.narrow(0, b, mini_batch_size))
            #print("OUTPUT CLASS", output_class[0])
            #print("DESIRED CLASS", train_classes.narrow(0, b, mini_batch_size)[0])
            '''print("OUTPUT TARGET", output_target.shape)
            print("TARGET TARGET", train_target.narrow(0, b, mini_batch_size).shape)
            print("OUTPUT CLASS", train_classes.narrow(0, b, mini_batch_size).shape)
            print("TARGET CLASS", train_classes_reshaped.shape)'''
            
            loss_class = criterion(output_class, train_classes.narrow(0, b, mini_batch_size))
            loss_target = criterion(output_target, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss = loss_class + loss_target #*0.5
            loss.backward()
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
        #print(e, sum_loss)

In [152]:
def train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor):
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr)
    
    for e in range(nb_epoch):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
          
            output_class, output_target = model(train_input.narrow(0, b, mini_batch_size))
            output_class = torch.reshape(output_class, (-1, 10))
            '''print("OUTPUT TARGET", output_target.shape)
            print("TARGET TARGET", train_target.narrow(0, b, mini_batch_size).shape)
            print("OUTPUT CLASS", output_class.shape)'''
            
            train_classes_reshaped = train_classes.narrow(0, b, mini_batch_size).view(-1) #with CrossEntropyLoss
            #print("TARGET CLASS", train_classes_reshaped.shape)
            
            loss_class = criterion(output_class, train_classes_reshaped)
            loss_target = criterion(output_target, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss = loss_class + loss_target * loss_factor
            loss.backward()
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
        #print(e, sum_loss)

In [148]:
def train_model_BCE(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch):

    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr)
    
    for e in range(nb_epoch):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
          
            output_class, output_target = model(train_input.narrow(0, b, mini_batch_size))
            output_class = torch.reshape(output_class, (-1, 10))
            '''print("OUTPUT TARGET", output_target.shape)
            print("TARGET TARGET", train_target.narrow(0, b, mini_batch_size).shape)
            print("OUTPUT CLASS", output_class.shape)'''
            
            
            train_classes_reshaped = torch.reshape(train_classes.narrow(0, b, mini_batch_size), (-1, 10)) #with BCELoss
            #print("TARGET CLASS", train_classes_reshaped.shape)
            
            loss_class = criterion(output_class, train_classes_reshaped)
            loss_target = criterion(output_target, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss = loss_class + loss_target*0.5
            loss.backward()
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
        #print(e, sum_loss)

In [78]:
def compute_nb_errors_targets(model, input, target):
    nb_errors = 0
    _, output = model(input)
    _, predicted_target = output.max(1) #max probabilities of target
    
    for b in range(1000):
        if target[b,int(predicted_target[b])] <= 0:
            nb_errors = nb_errors + 1
            
    return nb_errors

In [175]:
def compute_nb_errors_classes(model, input, target):
    nb_errors = 0

    output,_ = model(input)
    _, predicted_classes = output.max(2)

    for b in range(input.shape[0]):
        if target[b][0][predicted_classes[b][0]] <= 0:
            nb_errors = nb_errors + 1
        if target[b][1][predicted_classes[b][1]] <= 0:
            nb_errors = nb_errors + 1

    return nb_errors

In [7]:
#train_target[1000,1]
new_train_target = torch.empty(1000,2)
new_test_target = torch.empty(1000,2)
for i in range(1000):
    if train_target[i] == 1 :
        new_train_target[i,0] = 0
        new_train_target[i,1] = 1
        
    else:
        new_train_target[i,0] = 1
        new_train_target[i,1] = 0
        
    if test_target[i] == 1:
        new_test_target[i,0] = 0
        new_test_target[i,1] = 1
        
    else:
        new_test_target[i,0] = 1
        new_test_target[i,1] = 0

In [8]:
#train_classes[1000, 2]
new_train_classes = torch.zeros(1000, 2, 10)
new_test_classes = torch.zeros(1000, 2, 10)

for i in range(train_classes.shape[0]): #
    new_train_classes[i][0][train_classes[i][0]] = 1
    new_train_classes[i][1][train_classes[i][1]] = 1

for i in range(test_classes.shape[0]):
    new_test_classes[i][0][test_classes[i][0]] = 1
    new_test_classes[i][1][test_classes[i][1]] = 1



# NET AUXILIARY LOSS 2

In [167]:
#WITH CROSS ENTROPY
####predict class of each digit
for k in range(1):
    model = Net_auxiliary_loss_2(64)
    lr = 0.005
    nb_epoch = 25
    loss_factor = 1
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

train error Net_auxiliary_loss 7.00% 70/1000
test error Net_auxiliary_loss 18.90% 189/1000


In [176]:
nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                  nb_train_errors_class, train_input.size(0)))
nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                nb_test_errors_class, test_input.size(0)))

train error Net_auxiliary_loss 7.30% 73/1000
test error Net_auxiliary_loss 26.00% 260/1000


In [160]:
#LR CHOICE: 0.005

for lr in [0.0005, 0.001,0.005, 0.01, 0.05, 0.1, 0.5]:
    model = Net_auxiliary_loss_2(64)
    nb_epoch = 25
    loss_factor = 1
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor)
    
    print("LR:", lr)
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))
    

LR: 0.0005
train error Net_auxiliary_loss 17.00% 170/1000
test error Net_auxiliary_loss 18.70% 187/1000
train error Net_auxiliary_loss 33.80% 338/1000
test error Net_auxiliary_loss 39.30% 393/1000
LR: 0.001
train error Net_auxiliary_loss 12.70% 127/1000
test error Net_auxiliary_loss 19.50% 195/1000
train error Net_auxiliary_loss 21.50% 215/1000
test error Net_auxiliary_loss 31.90% 319/1000
LR: 0.005
train error Net_auxiliary_loss 0.60% 6/1000
test error Net_auxiliary_loss 15.50% 155/1000
train error Net_auxiliary_loss 5.20% 52/1000
test error Net_auxiliary_loss 24.20% 242/1000
LR: 0.01
train error Net_auxiliary_loss 1.40% 14/1000
test error Net_auxiliary_loss 15.80% 158/1000
train error Net_auxiliary_loss 15.00% 150/1000
test error Net_auxiliary_loss 33.80% 338/1000
LR: 0.05
train error Net_auxiliary_loss 44.90% 449/1000
test error Net_auxiliary_loss 47.40% 474/1000
train error Net_auxiliary_loss 174.80% 1748/1000
test error Net_auxiliary_loss 176.80% 1768/1000
LR: 0.1
train error Net_

In [161]:
#NB HIDDEN CHOICE: 64

for nb_hidden in [64, 128, 256]:
    model = Net_auxiliary_loss(nb_hidden)
    nb_epoch = 25
    lr = 0.005
    loss_factor = 1
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor)
    
    print("NB HIDDEN:", nb_hidden)
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))
    

NB HIDDEN: 64
train error Net_auxiliary_loss 0.20% 2/1000
test error Net_auxiliary_loss 16.70% 167/1000
train error Net_auxiliary_loss 7.10% 71/1000
test error Net_auxiliary_loss 33.60% 336/1000
NB HIDDEN: 128
train error Net_auxiliary_loss 2.70% 27/1000
test error Net_auxiliary_loss 17.40% 174/1000
train error Net_auxiliary_loss 5.90% 59/1000
test error Net_auxiliary_loss 34.20% 342/1000
NB HIDDEN: 256
train error Net_auxiliary_loss 8.60% 86/1000
test error Net_auxiliary_loss 22.60% 226/1000
train error Net_auxiliary_loss 6.20% 62/1000
test error Net_auxiliary_loss 33.50% 335/1000


In [165]:
#LOSS FACTOR CHOICE: 1

for loss_factor in [0, 0.2, 0.4, 0.5, 0.6, 1]:
    model = Net_auxiliary_loss(64)
    nb_epoch = 25
    lr = 0.005
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor)
    
    print("LOSS FACTOR:", loss_factor)
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))
    

LOSS FACTOR: 0
train error Net_auxiliary_loss 43.60% 436/1000
test error Net_auxiliary_loss 44.30% 443/1000
train error Net_auxiliary_loss 8.10% 81/1000
test error Net_auxiliary_loss 33.40% 334/1000
LOSS FACTOR: 0.2
train error Net_auxiliary_loss 7.60% 76/1000
test error Net_auxiliary_loss 18.10% 181/1000
train error Net_auxiliary_loss 3.70% 37/1000
test error Net_auxiliary_loss 30.80% 308/1000
LOSS FACTOR: 0.4
train error Net_auxiliary_loss 3.40% 34/1000
test error Net_auxiliary_loss 17.90% 179/1000
train error Net_auxiliary_loss 4.90% 49/1000
test error Net_auxiliary_loss 29.80% 298/1000
LOSS FACTOR: 0.5
train error Net_auxiliary_loss 9.80% 98/1000
test error Net_auxiliary_loss 19.60% 196/1000
train error Net_auxiliary_loss 9.30% 93/1000
test error Net_auxiliary_loss 35.10% 351/1000
LOSS FACTOR: 0.6
train error Net_auxiliary_loss 7.60% 76/1000
test error Net_auxiliary_loss 20.10% 201/1000
train error Net_auxiliary_loss 3.60% 36/1000
test error Net_auxiliary_loss 29.20% 292/1000
LOSS 

In [163]:
#FINAL TEST

for i in range(5):
    model = Net_auxiliary_loss(64)
    nb_epoch = 25
    lr = 0.005
    loss_factor = 1
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor)
    
    print("I:", i + 1)
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))
    

I: 1
train error Net_auxiliary_loss 0.60% 6/1000
test error Net_auxiliary_loss 16.20% 162/1000
train error Net_auxiliary_loss 7.00% 70/1000
test error Net_auxiliary_loss 33.10% 331/1000
I: 2
train error Net_auxiliary_loss 1.40% 14/1000
test error Net_auxiliary_loss 18.00% 180/1000
train error Net_auxiliary_loss 14.00% 140/1000
test error Net_auxiliary_loss 35.80% 358/1000
I: 3
train error Net_auxiliary_loss 0.60% 6/1000
test error Net_auxiliary_loss 17.30% 173/1000
train error Net_auxiliary_loss 11.10% 111/1000
test error Net_auxiliary_loss 35.00% 350/1000
I: 4
train error Net_auxiliary_loss 1.40% 14/1000
test error Net_auxiliary_loss 16.50% 165/1000
train error Net_auxiliary_loss 8.40% 84/1000
test error Net_auxiliary_loss 35.20% 352/1000
I: 5
train error Net_auxiliary_loss 0.70% 7/1000
test error Net_auxiliary_loss 16.30% 163/1000
train error Net_auxiliary_loss 9.30% 93/1000
test error Net_auxiliary_loss 32.00% 320/1000


# NET AUXILIARY LOSS

#### LOSS NORMALE

In [128]:
####predict class of each digit
for k in range(1):
    model = Net_auxiliary_loss(64)
    lr = 0.005
    nb_epoch = 25
    train_model(model, train_input, new_train_classes, new_train_target, mini_batch_size, lr, nb_epoch)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

0 2.5065331757068634
1 2.2292163521051407
2 2.0670663565397263
3 1.8218588531017303
4 1.5977305173873901
5 1.428273156285286
6 1.253657266497612
7 1.1575968489050865
8 1.0382725298404694
9 0.9629436209797859
10 0.8976012691855431
11 0.967954084277153
12 1.0874886959791183
13 0.8518436625599861
14 0.8097478002309799
15 0.7159438878297806
16 0.677453063428402
17 0.646220538765192
18 0.6273945681750774
19 0.6054331138730049
20 0.5870510935783386
21 0.5822736211121082
22 0.6565598733723164
23 0.8934471942484379
24 0.8616812080144882
train error Net_auxiliary_loss 0.90% 9/1000
test error Net_auxiliary_loss 16.40% 164/1000


In [129]:
nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                  nb_train_errors_class, train_input.size(0)))
nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                nb_test_errors_class, test_input.size(0)))

train error Net_auxiliary_loss 71.70% 717/1000
test error Net_auxiliary_loss 81.60% 816/1000


#### CROSS ENTROPY

In [145]:
#WITH CROSS ENTROPY
####predict class of each digit
for k in range(1):
    model = Net_auxiliary_loss(256)
    lr = 0.001
    nb_epoch = 25
    loss_factor = 1
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

train error Net_auxiliary_loss 6.60% 66/1000
test error Net_auxiliary_loss 17.30% 173/1000


In [135]:
nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                  nb_train_errors_class, train_input.size(0)))
nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                nb_test_errors_class, test_input.size(0)))

train error Net_auxiliary_loss 13.30% 133/1000
test error Net_auxiliary_loss 37.60% 376/1000


In [142]:
#LR CHOICE: 0.001

for lr in [0.0005, 0.001,0.005, 0.01, 0.05, 0.1, 0.5]:
    model = Net_auxiliary_loss(64)
    nb_epoch = 25
    loss_factor = 1
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor)
    
    print("LR:", lr)
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))
    

LR: 0.0005
train error Net_auxiliary_loss 21.40% 214/1000
test error Net_auxiliary_loss 23.30% 233/1000
train error Net_auxiliary_loss 52.90% 529/1000
test error Net_auxiliary_loss 60.60% 606/1000
LR: 0.001
train error Net_auxiliary_loss 13.30% 133/1000
test error Net_auxiliary_loss 19.60% 196/1000
train error Net_auxiliary_loss 26.50% 265/1000
test error Net_auxiliary_loss 41.00% 410/1000
LR: 0.005
train error Net_auxiliary_loss 6.20% 62/1000
test error Net_auxiliary_loss 21.70% 217/1000
train error Net_auxiliary_loss 13.70% 137/1000
test error Net_auxiliary_loss 36.40% 364/1000
LR: 0.01
train error Net_auxiliary_loss 3.90% 39/1000
test error Net_auxiliary_loss 20.80% 208/1000
train error Net_auxiliary_loss 28.70% 287/1000
test error Net_auxiliary_loss 60.60% 606/1000
LR: 0.05
train error Net_auxiliary_loss 44.90% 449/1000
test error Net_auxiliary_loss 47.40% 474/1000
train error Net_auxiliary_loss 174.80% 1748/1000
test error Net_auxiliary_loss 176.80% 1768/1000
LR: 0.1
train error N

In [144]:
#NB HIDDEN CHOICE: 256

for nb_hidden in [64, 128, 256]:
    model = Net_auxiliary_loss(nb_hidden)
    nb_epoch = 25
    lr = 0.001
    loss_factor = 1
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor)
    
    print("NB HIDDEN:", nb_hidden)
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))
    

NB HIDDEN: 64
train error Net_auxiliary_loss 7.30% 73/1000
test error Net_auxiliary_loss 18.60% 186/1000
train error Net_auxiliary_loss 34.40% 344/1000
test error Net_auxiliary_loss 46.30% 463/1000
NB HIDDEN: 128
train error Net_auxiliary_loss 7.40% 74/1000
test error Net_auxiliary_loss 19.00% 190/1000
train error Net_auxiliary_loss 28.50% 285/1000
test error Net_auxiliary_loss 41.60% 416/1000
NB HIDDEN: 256
train error Net_auxiliary_loss 6.10% 61/1000
test error Net_auxiliary_loss 17.10% 171/1000
train error Net_auxiliary_loss 19.60% 196/1000
test error Net_auxiliary_loss 34.70% 347/1000


In [154]:
#LOSS FACTOR CHOICE

for loss_factor in [0, 0.2, 0.4, 0.5, 0.6, 1]:
    model = Net_auxiliary_loss(64)
    nb_epoch = 25
    lr = 0.001
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch, loss_factor)
    
    print("NB HIDDEN:", nb_hidden)
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))
    

NB HIDDEN: 256
train error Net_auxiliary_loss 54.30% 543/1000
test error Net_auxiliary_loss 55.80% 558/1000
train error Net_auxiliary_loss 20.50% 205/1000
test error Net_auxiliary_loss 32.50% 325/1000
NB HIDDEN: 256
train error Net_auxiliary_loss 24.90% 249/1000
test error Net_auxiliary_loss 27.40% 274/1000
train error Net_auxiliary_loss 30.20% 302/1000
test error Net_auxiliary_loss 42.60% 426/1000
NB HIDDEN: 256
train error Net_auxiliary_loss 17.50% 175/1000
test error Net_auxiliary_loss 22.20% 222/1000
train error Net_auxiliary_loss 24.30% 243/1000
test error Net_auxiliary_loss 39.20% 392/1000
NB HIDDEN: 256
train error Net_auxiliary_loss 21.60% 216/1000
test error Net_auxiliary_loss 23.90% 239/1000
train error Net_auxiliary_loss 30.30% 303/1000
test error Net_auxiliary_loss 43.30% 433/1000
NB HIDDEN: 256
train error Net_auxiliary_loss 10.70% 107/1000
test error Net_auxiliary_loss 18.50% 185/1000
train error Net_auxiliary_loss 27.10% 271/1000
test error Net_auxiliary_loss 41.80% 418/

#### BCE loss

In [149]:
#WITH BCE
####predict class of each digit
for k in range(1):
    model = Net_auxiliary_loss(64)
    lr = 0.005
    nb_epoch = 25
    train_model_BCE(model, train_input, new_train_classes, new_train_target, mini_batch_size, lr, nb_epoch)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))

train error Net_auxiliary_loss 5.20% 52/1000
test error Net_auxiliary_loss 19.90% 199/1000


In [150]:
nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                  nb_train_errors_class, train_input.size(0)))
nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                nb_test_errors_class, test_input.size(0)))
    

train error Net_auxiliary_loss 58.30% 583/1000
test error Net_auxiliary_loss 74.20% 742/1000


# SIAMESE NET

In [102]:
####predict class of each digit
for k in range(1):
    model = Siamese_net_auxiliary()
    lr = 0.005
    nb_epoch = 25
    train_model(model, train_input, new_train_classes, new_train_target, mini_batch_size, lr, nb_epoch)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

initial torch.Size([100, 2, 14, 14])
X START torch.Size([100, 1, 14, 14])
conv1 torch.Size([100, 32, 6, 6])
conv2 torch.Size([100, 64, 2, 2])
fc1 torch.Size([100, 10])
X START torch.Size([100, 1, 14, 14])
conv1 torch.Size([100, 32, 6, 6])
conv2 torch.Size([100, 64, 2, 2])
fc1 torch.Size([100, 10])
class layer torch.Size([100, 2, 10])
final torch.Size([100, 2])
initial torch.Size([100, 2, 14, 14])
X START torch.Size([100, 1, 14, 14])
conv1 torch.Size([100, 32, 6, 6])
conv2 torch.Size([100, 64, 2, 2])
fc1 torch.Size([100, 10])
X START torch.Size([100, 1, 14, 14])
conv1 torch.Size([100, 32, 6, 6])
conv2 torch.Size([100, 64, 2, 2])
fc1 torch.Size([100, 10])
class layer torch.Size([100, 2, 10])
final torch.Size([100, 2])
initial torch.Size([100, 2, 14, 14])
X START torch.Size([100, 1, 14, 14])
conv1 torch.Size([100, 32, 6, 6])
conv2 torch.Size([100, 64, 2, 2])
fc1 torch.Size([100, 10])
X START torch.Size([100, 1, 14, 14])
conv1 torch.Size([100, 32, 6, 6])
conv2 torch.Size([100, 64, 2, 2])
f

KeyboardInterrupt: 

In [100]:
nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                  nb_train_errors_class, train_input.size(0)))
nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                nb_test_errors_class, test_input.size(0)))

train error Net_auxiliary_loss 155.50% 1555/1000
test error Net_auxiliary_loss 179.20% 1792/1000


## WITH CROSS ENTROPY LOSS

In [90]:
#WITH BCE
####predict class of each digit
for k in range(1):
    model = Siamese_net_auxiliary()
    lr = 0.005
    nb_epoch = 25
    train_model_CEL(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

0 29.857958555221558
1 29.60709547996521
2 29.162097215652466
3 28.624603271484375
4 28.073718309402466
5 27.53482723236084
6 27.035579204559326
7 26.584228038787842
8 26.154109001159668
9 25.831193447113037
10 25.491074323654175
11 25.116344928741455
12 24.854015350341797
13 24.579912424087524
14 24.3172447681427
15 24.091768980026245
16 23.86699342727661
17 23.715679168701172
18 23.609098196029663
19 23.648702144622803
20 23.58529758453369
21 23.299729585647583
22 23.162097930908203
23 23.126863956451416
24 22.977095127105713
train error Net_auxiliary_loss 1.00% 10/1000
test error Net_auxiliary_loss 22.40% 224/1000


In [91]:
nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                  nb_train_errors_class, train_input.size(0)))
nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                nb_test_errors_class, test_input.size(0)))

train error Net_auxiliary_loss 159.10% 1591/1000
test error Net_auxiliary_loss 178.20% 1782/1000


## WITH BCE

In [73]:
#WITH BCE
####predict class of each digit
for k in range(1):
    model = Siamese_net_auxiliary()
    lr = 0.005
    nb_epoch = 25
    train_model_BCE(model, train_input, new_train_classes, new_train_target, mini_batch_size, lr, nb_epoch)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

0 13.854893326759338
1 13.82936429977417
2 13.821517586708069
3 13.816715240478516
4 13.813976883888245
5 13.812509298324585
6 13.811757564544678
7 13.811380863189697
8 13.811193943023682
9 13.811100363731384
10 13.811052918434143
11 13.811028361320496
12 13.811017036437988
13 13.811012268066406
14 13.811010479927063
15 13.811010003089905
16 13.81101131439209
17 13.811011910438538
18 13.811012983322144
19 13.81101369857788
20 13.811014533042908
21 13.811015725135803
22 13.811015963554382
23 13.81101667881012
24 13.811017036437988
train error Net_auxiliary_loss 44.90% 449/1000
test error Net_auxiliary_loss 47.40% 474/1000


In [36]:
#### lr = 0.0005 seems to not overfit
for lr in [0.0005, 0.001,0.005, 0.01, 0.05, 0.1, 0.5]:
    model = Siamese_net_auxiliary()
    nb_epoch = 25
    train_model(model, train_input, new_train_classes, new_train_target, mini_batch_size, lr, nb_epoch)
    
    print("LR", lr)
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))


LR 0.0005
train error Net_auxiliary_loss 9.20% 92/1000
test error Net_auxiliary_loss 16.90% 169/1000
train error Net_auxiliary_loss 171.80% 1718/1000
test error Net_auxiliary_loss 178.70% 1787/1000
LR 0.001
train error Net_auxiliary_loss 7.00% 70/1000
test error Net_auxiliary_loss 19.10% 191/1000
train error Net_auxiliary_loss 175.70% 1757/1000
test error Net_auxiliary_loss 179.10% 1791/1000
LR 0.005
train error Net_auxiliary_loss 0.90% 9/1000
test error Net_auxiliary_loss 18.70% 187/1000
train error Net_auxiliary_loss 180.50% 1805/1000
test error Net_auxiliary_loss 181.40% 1814/1000
LR 0.01
train error Net_auxiliary_loss 0.40% 4/1000
test error Net_auxiliary_loss 20.10% 201/1000
train error Net_auxiliary_loss 180.00% 1800/1000
test error Net_auxiliary_loss 182.30% 1823/1000
LR 0.05
train error Net_auxiliary_loss 44.90% 449/1000
test error Net_auxiliary_loss 47.40% 474/1000
train error Net_auxiliary_loss 181.80% 1818/1000
test error Net_auxiliary_loss 179.00% 1790/1000
LR 0.1
train err

In [37]:
####predict class of each digit
for k in range(10):
    model = Siamese_net_auxiliary()
    lr = 0.0005
    nb_epoch = 25
    train_model(model, train_input, new_train_classes, new_train_target, mini_batch_size, lr, nb_epoch)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))
    

train error Net_auxiliary_loss 16.80% 168/1000
test error Net_auxiliary_loss 22.00% 220/1000
train error Net_auxiliary_loss 176.80% 1768/1000
test error Net_auxiliary_loss 180.00% 1800/1000
train error Net_auxiliary_loss 11.70% 117/1000
test error Net_auxiliary_loss 19.70% 197/1000
train error Net_auxiliary_loss 176.70% 1767/1000
test error Net_auxiliary_loss 179.40% 1794/1000
train error Net_auxiliary_loss 11.20% 112/1000
test error Net_auxiliary_loss 16.40% 164/1000
train error Net_auxiliary_loss 177.10% 1771/1000
test error Net_auxiliary_loss 180.00% 1800/1000
train error Net_auxiliary_loss 14.60% 146/1000
test error Net_auxiliary_loss 20.40% 204/1000
train error Net_auxiliary_loss 176.50% 1765/1000
test error Net_auxiliary_loss 180.50% 1805/1000
train error Net_auxiliary_loss 44.90% 449/1000
test error Net_auxiliary_loss 47.40% 474/1000
train error Net_auxiliary_loss 180.90% 1809/1000
test error Net_auxiliary_loss 181.10% 1811/1000
train error Net_auxiliary_loss 14.30% 143/1000
tes