In [55]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim

import dlc_practical_prologue as prologue

Generate pairs

In [56]:
N = 1000 #nb of pairs

#generate pairs
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [57]:
#normalize the input
train_input/=255
test_input/=255

In [58]:
new_train_target = torch.empty(N,2)
new_test_target = torch.empty(N,2)
for i in range(N):
    if train_target[i] == 1 :
        new_train_target[i,0] = 0
        new_train_target[i,1] = 1
        
    else:
        new_train_target[i,0] = 1
        new_train_target[i,1] = 0
        
    if test_target[i] == 1:
        new_test_target[i,0] = 0
        new_test_target[i,1] = 1
        
    else:
        new_test_target[i,0] = 1
        new_test_target[i,1] = 0

In [59]:
def create_shallow_model():
    return nn.Sequential(
        nn.Linear(392, 400),
        nn.ReLU(),
        nn.Linear(400, 500),
        nn.ReLU(),
        nn.Linear(500, 600),
        nn.ReLU(),
        nn.Linear(600, 2),
    )

In [60]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv1d(392, 784, kernel_size=4)
        self.conv2 = nn.Conv1d(784, 1568, kernel_size=5)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [112]:
def train_model(model, train_input, train_target,lr):
    epoch = 25
    eta = 0.2
    mini_batches = 100
    optimizer = optim.Adam(model.parameters(), lr)
    
    criterion = nn.BCEWithLogitsLoss()
    
    for e in range(epoch):
        sum_loss = 0
        
        for b in range(0, train_input.size(0), mini_batches):
            
            output = model(train_input.narrow(0, b, mini_batches).reshape(mini_batches, 1, -1))
            #print('shapes',output.squeeze(1).shape, train_target.narrow(0, b, mini_batches).shape)
            loss = criterion(output.squeeze(1), train_target.narrow(0, b, mini_batches))
            loss.requires_grad_()
            #print("output", output.squeeze(1), "train", train_target.narrow(0, b, mini_batches))
            model.zero_grad()
            #print("output", output, "shape", output.shape)
            loss.backward()
            optimizer.step()
            sum_loss = sum_loss + loss.item()
            
            """#print("LOSS",loss.item())
            with torch.no_grad():
                for p in model.parameters():
                    p -= eta*p.grad
                    #print("grads", p.grad)"""
        print(e, sum_loss)


In [113]:
def compute_nb_errors(model, test_input, target):
    nb_errors = 0
    mini_batch_size = 100
    
    for b in range(0, test_input.size(0), mini_batch_size):
        output = model(test_input.narrow(0, b, mini_batch_size).reshape(mini_batch_size, 1, -1))
        _, predicted_class = output.max(2)
        #print(output)
        #print(predicted_class, output, target)
        #print("pred classes",predicted_class.shape, "output", output.shape, "target", target.shape)
        for k in range(mini_batch_size):
            
            if target[b + k, predicted_class[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

In [114]:
# good result : lr = 0.1 : no overfitting, good test error ~20%

for lr in [0.001,0.005, 0.01, 0.05, 0.1]:
    model = create_shallow_model()
    train_model(model, train_input, new_train_target, lr)
    nb_train_errors = compute_nb_errors(model, train_input, new_train_target)
    nb_test_errors = compute_nb_errors(model, test_input, new_test_target)
    print("Learning rate :", lr)
    print('train error {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                       nb_train_errors, train_input.size(0)))

    print('test error {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                       nb_test_errors, test_input.size(0)))

0 6.451932311058044
1 4.967446804046631
2 4.133762747049332
3 3.3266296982765198
4 2.4942222237586975
5 1.8490521013736725
6 1.6737718358635902
7 0.9952446706593037
8 0.6227876208722591
9 0.6888872403651476
10 1.0454186145216227
11 1.2577249705791473
12 3.8523196950554848
13 3.1034164652228355
14 1.6116936951875687
15 0.7796970121562481
16 0.17608672473579645
17 0.053130764747038484
18 0.01135848433477804
19 0.006402816972695291
20 0.0036853097553830594
21 0.0023685156629653648
22 0.0019043087158934213
23 0.0014951023113098927
24 0.001208610054163728
Learning rate : 0.001
train error 0.00%% 0/1000
test error 15.60%% 156/1000
0 6.234408348798752
1 5.805599391460419
2 4.47316586971283
3 3.4486809372901917
4 2.969285398721695
5 2.0315786823630333
6 1.9706301167607307
7 1.361767716705799
8 0.6641023680567741
9 0.3192249396815896
10 0.40221479209139943
11 0.6118102530017495
12 1.483733182772994
13 0.7836460210382938
14 0.8937353361397982
15 1.125158446840942
16 1.9786163493990898
17 2.97226

In [93]:
for p in model.parameters():
    print(p)

Parameter containing:
tensor([[-4.9820e-03, -2.4232e-02, -4.3068e-02,  ...,  2.7130e-02,
          2.2751e-02,  1.6514e-02],
        [ 3.5037e-02, -9.9278e-03,  1.0671e-02,  ..., -4.6963e-02,
         -1.7590e-02,  4.3742e-02],
        [-1.6918e-02, -1.0712e-02,  3.1103e-02,  ...,  4.7036e-02,
          3.4188e-02,  5.2456e-03],
        ...,
        [-4.5022e-02,  7.9573e-03,  5.3882e-04,  ..., -4.9431e-05,
         -2.9987e-02, -3.7793e-02],
        [-1.1372e-02,  3.5170e-02, -1.5145e-02,  ...,  7.8792e-04,
          1.7646e-02,  4.6101e-02],
        [-4.5752e-02,  2.6018e-02,  3.9071e-02,  ..., -4.0710e-02,
          2.7416e-02,  2.6722e-05]], requires_grad=True)
Parameter containing:
tensor([-0.0284,  0.0502, -0.0079, -0.0008,  0.0088, -0.0015,  0.0201,  0.0247,
        -0.0094, -0.0036, -0.0157, -0.0010,  0.0500, -0.0166, -0.0146,  0.0115,
        -0.0361, -0.0108, -0.0382,  0.0082,  0.0129, -0.0005,  0.0133,  0.0401,
         0.0114, -0.0208,  0.0195,  0.0072,  0.0135,  0.0179,  0

       requires_grad=True)
Parameter containing:
tensor([[ 0.0112, -0.0257,  0.0346,  ..., -0.0103,  0.0142, -0.0071],
        [-0.0185, -0.0333,  0.0019,  ..., -0.0010,  0.0184, -0.0082]],
       requires_grad=True)
Parameter containing:
tensor([-0.0116, -0.0258], requires_grad=True)


In [None]:
'''for i in range(len(train_input)):
            print(train_input[i].reshape(1, -1).shape)
            output = model(train_input[i].reshape(1, -1))
            loss = criterion(output, train_target[i].unsqueeze(0))
            sum_loss = sum_loss + loss.item()
            model.zero_grad()
            loss.backward()
            with torch.no_grad():
                for p in model.parameters():
                    p = p - eta*p.grad'''
    

In [33]:
def train_target_changed(train):
    t = torch.empty(train.size(0),2).zero_()
    for n in range (train.size(0)):
        t[n,int(train[n].item())] = 1
    return t

In [32]:
print(train_input[0].shape)
test = train_input[0].reshape(1, -1)
print(test.shape)
print(train_target[0].reshape(1,-1))
test2 = train_input[0].narrow(1, 2, 1)
print(test2.shape)

torch.Size([2, 14, 14])
torch.Size([1, 392])
tensor([[0]])
torch.Size([2, 1, 14])
