In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim

import dlc_practical_prologue as prologue
N = 1000

Generate pairs

In [2]:
N = 1000 #nb of pairs

#generate pairs
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [3]:
#normalize the input
train_input/=255
test_input/=255

In [4]:
new_train_target = torch.empty(N,2)
new_test_target = torch.empty(N,2)
for i in range(N):
    if train_target[i] == 1 :
        new_train_target[i,0] = 0
        new_train_target[i,1] = 1
        
    else:
        new_train_target[i,0] = 1
        new_train_target[i,1] = 0
        
    if test_target[i] == 1:
        new_test_target[i,0] = 0
        new_test_target[i,1] = 1
        
    else:
        new_test_target[i,0] = 1
        new_test_target[i,1] = 0

In [5]:
def create_shallow_model():
    return nn.Sequential(
        nn.Linear(392, 400),
        nn.ReLU(),
        nn.Linear(400, 500),
        nn.ReLU(),
        nn.Linear(500, 600),
        nn.ReLU(),
        nn.Linear(600, 2),
    )

In [6]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv1d(392, 784, kernel_size=4)
        self.conv2 = nn.Conv1d(784, 1568, kernel_size=5)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [14]:
def train_model(model, train_input, train_target,lr):
    epoch = 25
    eta = 0.2
    mini_batches = 100
    optimizer = optim.SGD(model.parameters(), lr)
    
    criterion = nn.BCEWithLogitsLoss()
    
    for e in range(epoch):
        sum_loss = 0
        
        for b in range(0, train_input.size(0), mini_batches):
            
            output = model(train_input.narrow(0, b, mini_batches).reshape(mini_batches, 1, -1))
            #print('shapes',output.squeeze(1).shape, train_target.narrow(0, b, mini_batches).shape)
            loss = criterion(output.squeeze(1), train_target.narrow(0, b, mini_batches))
            loss.requires_grad_()
            #print("output", output.squeeze(1), "train", train_target.narrow(0, b, mini_batches))
            model.zero_grad()
            #print("output", output, "shape", output.shape)
            loss.backward()
            optimizer.step()
            sum_loss = sum_loss + loss.item()
            
            """#print("LOSS",loss.item())
            with torch.no_grad():
                for p in model.parameters():
                    p -= eta*p.grad
                    #print("grads", p.grad)"""
        print(e, sum_loss)


In [15]:
def compute_nb_errors(model, test_input, target):
    nb_errors = 0
    mini_batch_size = 100
    
    for b in range(0, test_input.size(0), mini_batch_size):
        output = model(test_input.narrow(0, b, mini_batch_size).reshape(mini_batch_size, 1, -1))
        _, predicted_class = output.max(2)
        #print(output)
        #print(predicted_class, target)
        #print("pred classes",predicted_class.shape, "output", output.shape, "target", target.shape)
        for k in range(mini_batch_size):
            
            if target[b + k, predicted_class[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

In [18]:
# good result : lr = 0.1 : no overfitting, good test error ~20%

for lr in [0.001,0.005, 0.01, 0.05, 0.1, 0.5]:
    model = create_shallow_model()
    train_model(model, train_input, new_train_target, lr)
    nb_train_errors = compute_nb_errors(model, train_input, new_train_target)
    nb_test_errors = compute_nb_errors(model, test_input, new_test_target)
    print("Learning rate :", lr)
    print('train error {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                       nb_train_errors, train_input.size(0)))

    print('test error {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                       nb_test_errors, test_input.size(0)))

0 6.938589692115784
1 6.938288986682892
2 6.937989771366119
3 6.937691807746887
4 6.9373950362205505
5 6.937099277973175
6 6.936804354190826
7 6.9365105628967285
8 6.9362183809280396
9 6.935926675796509
10 6.93563586473465
11 6.935346782207489
12 6.935058355331421
13 6.93477064371109
14 6.934484958648682
15 6.93419885635376
16 6.93391489982605
17 6.9336318373680115
18 6.933349192142487
19 6.933068037033081
20 6.9327884912490845
21 6.932509243488312
22 6.932231247425079
23 6.931954860687256
24 6.931678593158722
Learning rate : 0.001
train error 51.30%% 513/1000
test error 49.10%% 491/1000
0 6.940481185913086
1 6.938790798187256
2 6.9371368288993835
3 6.935517132282257
4 6.933930814266205
5 6.932374954223633
6 6.930849552154541
7 6.9293535351753235
8 6.927886128425598
9 6.92644590139389
10 6.925033628940582
11 6.923648655414581
12 6.922289848327637
13 6.920957684516907
14 6.919648885726929
15 6.918364882469177
16 6.917105555534363
17 6.9158695340156555
18 6.914654493331909
19 6.913461208

In [19]:
optimizer : 
    Adam : lr = 0.001 : 0% train error, 20% test error
           lr = 0.1 : 45 % train and test error
    SGD : lr = 0.1 : 22.3% train error, 22.4% test error
            lr = 05 : 8.3 % train error, 20.5 % test error

SyntaxError: invalid syntax (<ipython-input-19-894a9b314d2f>, line 1)

In [None]:
'''for i in range(len(train_input)):
            print(train_input[i].reshape(1, -1).shape)
            output = model(train_input[i].reshape(1, -1))
            loss = criterion(output, train_target[i].unsqueeze(0))
            sum_loss = sum_loss + loss.item()
            model.zero_grad()
            loss.backward()
            with torch.no_grad():
                for p in model.parameters():
                    p = p - eta*p.grad'''
    

In [33]:
def train_target_changed(train):
    t = torch.empty(train.size(0),2).zero_()
    for n in range (train.size(0)):
        t[n,int(train[n].item())] = 1
    return t

In [32]:
print(train_input[0].shape)
test = train_input[0].reshape(1, -1)
print(test.shape)
print(train_target[0].reshape(1,-1))
test2 = train_input[0].narrow(1, 2, 1)
print(test2.shape)

torch.Size([2, 14, 14])
torch.Size([1, 392])
tensor([[0]])
torch.Size([2, 1, 14])
