In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
import dlc_practical_prologue as prologue

mini_batch_size = 100
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

#normalize the input
train_input/=255
test_input/=255

In [10]:
print_shapes_Net = False

In [3]:
class Siamese_net_auxiliary(nn.Module):
    def __init__(self):
        super(Siamese_net_auxiliary, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, 10)
        self.fc2 = nn.Linear(20, 2)

    def forward(self, data):
        if print_shapes_Net:
            print("initial", data.shape) #100 2 14 14
            
        class_layer = []
        final_layer = []
        for i in range(2):
            x = data[:,i,:,:]
            len0 = x.shape[0]
            x = torch.reshape(x, (len0, 1, 14, 14))
            
            if print_shapes_Net:
                print("X START",x.shape) 
            
            x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
            if print_shapes_Net:
                print("conv1",x.shape) 
                
            x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
            if print_shapes_Net:
                print("conv2",x.shape)
            
            x = F.relu(self.fc1(x.view(-1, 256)))
            if print_shapes_Net:
                print("fc1",x.shape) 
                
            final_layer.append(x)
            class_layer.append(x.reshape(x.shape[0], 1, 10))
            
        final_layer = torch.cat((final_layer[1], final_layer[0]), 1)
        class_layer = torch.cat((class_layer[1], class_layer[0]), 1)
        
        if print_shapes_Net:
                print("class layer",class_layer.shape)
                
        final_layer = self.fc2(final_layer)
        if print_shapes_Net:
            print("final",final_layer.shape) 
            
        return class_layer, final_layer

In [35]:
def train_model(model, train_input, train_classes, train_target, mini_batch_size, lr, nb_epoch):
    criterion = nn.MSELoss() #CHANGE LOSS TO CROSS ENTROPY?
    optimizer = optim.Adam(model.parameters(), lr)
    
    for e in range(nb_epoch):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
          
            output_class, output_target = model(train_input.narrow(0, b, mini_batch_size))
            
            loss_class = criterion(output_class, train_classes.narrow(0, b, mini_batch_size))
            loss_target = criterion(output_target, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss = loss_class + loss_target
            loss.backward()
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
        #print(e, sum_loss)

In [5]:
def compute_nb_errors_targets(model, input, target):
    nb_errors = 0
    _, output = model(input)
    _, predicted_target = output.max(1) #max probabilities of target
    
    for b in range(1000):
        if target[b,int(predicted_target[b])] <= 0:
            nb_errors = nb_errors + 1
            
    return nb_errors

In [30]:
def compute_nb_errors_classes(model, input, target):
    nb_errors = 0

    output,_ = model(input)
    _, predicted_classes = output.max(2)

    for b in range(input.shape[0]):
        if target[b][0][predicted_classes[b][0]] <= 0:
            nb_errors = nb_errors + 1
        if target[b][1][predicted_classes[b][1]] <= 0:
            nb_errors = nb_errors + 1

    return nb_errors

In [7]:
#train_target[1000,1]
new_train_target = torch.empty(1000,2)
new_test_target = torch.empty(1000,2)
for i in range(1000):
    if train_target[i] == 1 :
        new_train_target[i,0] = 0
        new_train_target[i,1] = 1
        
    else:
        new_train_target[i,0] = 1
        new_train_target[i,1] = 0
        
    if test_target[i] == 1:
        new_test_target[i,0] = 0
        new_test_target[i,1] = 1
        
    else:
        new_test_target[i,0] = 1
        new_test_target[i,1] = 0

In [8]:
#train_classes[1000, 2]
new_train_classes = torch.zeros(1000, 2, 10)
new_test_classes = torch.zeros(1000, 2, 10)

for i in range(train_classes.shape[0]): #
    new_train_classes[i][0][train_classes[i][0]] = 1
    new_train_classes[i][1][train_classes[i][1]] = 1

for i in range(test_classes.shape[0]):
    new_test_classes[i][0][test_classes[i][0]] = 1
    new_test_classes[i][1][test_classes[i][1]] = 1



In [33]:
####predict class of each digit
for k in range(1):
    model = Siamese_net_auxiliary()
    lr = 0.005
    nb_epoch = 25
    train_model(model, train_input, new_train_classes, new_train_target, mini_batch_size, lr, nb_epoch)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

0 4.405638158321381
1 3.6883074045181274
2 3.4818137884140015
3 3.273284137248993
4 3.0642834901809692
5 2.874796450138092
6 2.6600130647420883
7 2.4488677084445953
8 2.2773241251707077
9 2.128381237387657
10 2.0064972639083862
11 1.900394544005394
12 1.8019713461399078
13 1.7061333060264587
14 1.630494847893715
15 1.562531054019928
16 1.4981640130281448
17 1.4396350532770157
18 1.4116232693195343
19 1.4159819334745407
20 1.5226670801639557
21 1.4392471015453339
22 1.3633665442466736
23 1.3004753962159157
24 1.267670951783657
train error Net_auxiliary_loss 0.10% 1/1000
test error Net_auxiliary_loss 20.60% 206/1000


In [34]:
nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                  nb_train_errors_class, train_input.size(0)))
nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                nb_test_errors_class, test_input.size(0)))

train error Net_auxiliary_loss 179.10% 1791/1000
test error Net_auxiliary_loss 179.50% 1795/1000


In [36]:
#### lr = 0.0005 seems to not overfit
for lr in [0.0005, 0.001,0.005, 0.01, 0.05, 0.1, 0.5]:
    model = Siamese_net_auxiliary()
    nb_epoch = 25
    train_model(model, train_input, new_train_classes, new_train_target, mini_batch_size, lr, nb_epoch)
    
    print("LR", lr)
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))


LR 0.0005
train error Net_auxiliary_loss 9.20% 92/1000
test error Net_auxiliary_loss 16.90% 169/1000
train error Net_auxiliary_loss 171.80% 1718/1000
test error Net_auxiliary_loss 178.70% 1787/1000
LR 0.001
train error Net_auxiliary_loss 7.00% 70/1000
test error Net_auxiliary_loss 19.10% 191/1000
train error Net_auxiliary_loss 175.70% 1757/1000
test error Net_auxiliary_loss 179.10% 1791/1000
LR 0.005
train error Net_auxiliary_loss 0.90% 9/1000
test error Net_auxiliary_loss 18.70% 187/1000
train error Net_auxiliary_loss 180.50% 1805/1000
test error Net_auxiliary_loss 181.40% 1814/1000
LR 0.01
train error Net_auxiliary_loss 0.40% 4/1000
test error Net_auxiliary_loss 20.10% 201/1000
train error Net_auxiliary_loss 180.00% 1800/1000
test error Net_auxiliary_loss 182.30% 1823/1000
LR 0.05
train error Net_auxiliary_loss 44.90% 449/1000
test error Net_auxiliary_loss 47.40% 474/1000
train error Net_auxiliary_loss 181.80% 1818/1000
test error Net_auxiliary_loss 179.00% 1790/1000
LR 0.1
train err

In [37]:
####predict class of each digit
for k in range(10):
    model = Siamese_net_auxiliary()
    lr = 0.0005
    nb_epoch = 25
    train_model(model, train_input, new_train_classes, new_train_target, mini_batch_size, lr, nb_epoch)
    
    nb_train_errors = compute_nb_errors_targets(model, train_input, new_train_target)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_targets(model, test_input, new_test_target)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    
    nb_train_errors_class = compute_nb_errors_classes(model, train_input, new_train_classes)
    print('train error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors_class) / train_input.size(0),
                                                      nb_train_errors_class, train_input.size(0)))
    nb_test_errors_class = compute_nb_errors_classes(model, test_input, new_test_classes)
    print('test error Net_auxiliary_loss {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors_class) / test_input.size(0),
                                                    nb_test_errors_class, test_input.size(0)))
    

train error Net_auxiliary_loss 16.80% 168/1000
test error Net_auxiliary_loss 22.00% 220/1000
train error Net_auxiliary_loss 176.80% 1768/1000
test error Net_auxiliary_loss 180.00% 1800/1000
train error Net_auxiliary_loss 11.70% 117/1000
test error Net_auxiliary_loss 19.70% 197/1000
train error Net_auxiliary_loss 176.70% 1767/1000
test error Net_auxiliary_loss 179.40% 1794/1000
train error Net_auxiliary_loss 11.20% 112/1000
test error Net_auxiliary_loss 16.40% 164/1000
train error Net_auxiliary_loss 177.10% 1771/1000
test error Net_auxiliary_loss 180.00% 1800/1000
train error Net_auxiliary_loss 14.60% 146/1000
test error Net_auxiliary_loss 20.40% 204/1000
train error Net_auxiliary_loss 176.50% 1765/1000
test error Net_auxiliary_loss 180.50% 1805/1000
train error Net_auxiliary_loss 44.90% 449/1000
test error Net_auxiliary_loss 47.40% 474/1000
train error Net_auxiliary_loss 180.90% 1809/1000
test error Net_auxiliary_loss 181.10% 1811/1000
train error Net_auxiliary_loss 14.30% 143/1000
tes