In [51]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
import dlc_practical_prologue as prologue

mini_batch_size = 100
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [52]:
#normalize the input
train_input/=255
test_input/=255

In [38]:
print_shapes_Net = False

In [39]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        if print_shapes_Net:
            print(x.shape)
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print(x.shape)
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print(x.shape)
        x = F.relu(self.fc1(x.view(-1, 256)))
        if print_shapes_Net:
            print(x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print(x.shape)
        return x

In [40]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        nb_hidden = 200
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(64, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        if print_shapes_Net:
            print("initial shape", x.shape)
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        if print_shapes_Net:
            print("1 conv",x.shape)
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        if print_shapes_Net:
            print("2 conv", x.shape)
        x = F.relu(self.conv3(x))
        if print_shapes_Net:
            print("3 conv",x.shape)
        x = F.relu(self.fc1(x.view(-1, 64)))
        if print_shapes_Net:
            print("fc1",x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print("final",x.shape)
        return x

######################################################################

In [41]:
def train_model(model, train_input, train_target, mini_batch_size):
    criterion = nn.MSELoss()
    lr = 0.005
    optimizer = optim.Adam(model.parameters(), lr)
    
    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
        print(e, sum_loss)

In [42]:
def compute_nb_errors_classes(model, input, target, mini_batch_size):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)

        for k in range(mini_batch_size):
            #print(predicted_classes[k], target[b+k].max(0)[1])
            if target[b + k, predicted_classes[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

In [43]:
def compute_nb_errors_targets(model, input, target):
    nb_errors = 0
    output = model(input)
    _, predicted_classes = output.max(1)
    print(predicted_classes.shape)
    predicted_target = torch.empty(1000)
    
    for b in range (1000):
        if predicted_classes[2*b] > predicted_classes[2*b + 1]:
            predicted_target[b] = 0
        else:
            predicted_target[b] = 1
        #print(predicted_target[b], target[b])
        if predicted_target[b] != target[b]:
            nb_errors = nb_errors + 1
    
    
    """for b in range(1000):
        if predicted_classes[b] > predicted_classes[b+1000]:
            predicted_target[b] = 0
        else:
            predicted_target[b] = 1
        #print(predicted_target[b], target[b])
        if predicted_target[b] != target[b]:
            nb_errors = nb_errors + 1"""

    return nb_errors

In [44]:
new_train_input = train_input.view([2000,1,14,14])
new_test_input = test_input.view([2000,1,14,14])
#print(new_train_input.shape)
new_train_classes = train_classes.view([2000])
new_test_classes = test_classes.view([2000])
#print(train_classes.shape)

In [45]:
train_classes = train_classes.view(2000)
test_classes = test_classes.view(2000)

In [46]:
#transfor classes in a 2000 * 10 
train_classes2 = torch.empty(2000,10)
test_classes2 = torch.empty(2000,10)
for i in range(2000):
    for j in range(10):
        if train_classes[i] == j:
            train_classes2[i,j] = 1
        else:
            train_classes2[i,j] = 0
        if test_classes[i] == j:
            test_classes2[i,j] = 1
        else:
            test_classes2[i,j] = 0

In [47]:
train_input = new_train_input
test_input = new_test_input
train_classes = train_classes2
test_classes = test_classes2

In [48]:
for k in range(1):
    model = Net2()
    train_model(model, train_input, train_classes, mini_batch_size)
    
    nb_train_errors = compute_nb_errors_classes(model, train_input, train_classes, mini_batch_size)
    print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_classes(model, test_input, test_classes, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

0 1.7484488934278488
1 1.222627941519022
2 0.7609372958540916
3 0.47112078219652176
4 0.32556370459496975
5 0.26141330134123564
6 0.2062485320493579
7 0.1712086764164269
8 0.14756606379523873
9 0.12478251336142421
10 0.10892588156275451
11 0.09303884720429778
12 0.08213898516260087
13 0.07249734224751592
14 0.06799758807756007
15 0.0626523153623566
16 0.06680168863385916
17 0.069641153793782
18 0.08480492467060685
19 0.0710900065023452
20 0.053367209853604436
21 0.046089489944279194
22 0.03937864722684026
23 0.030149550293572247
24 0.022792230360209942
train error Net 0.15% 3/2000
test error Net 4.25% 85/2000


lr = 0.01 : train error 2.35%
            test error 8.95%
lr = 0.005 : train error 0%
             test error 4.20%

In [29]:
print(train_target[0], train_classes[0])

tensor(1) tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.])


In [32]:
print(train_input.shape, train_target.shape)

torch.Size([2000, 1, 14, 14]) torch.Size([1000])


In [50]:
nb_train_errors = compute_nb_errors_targets(model, train_input, train_target)
print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_target.size(0),
                                                  nb_train_errors, train_target.size(0)))
nb_test_errors = compute_nb_errors_targets(model, test_input,  test_target)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / train_target.size(0),
                                                nb_test_errors, train_target.size(0)))

torch.Size([2000])
train error Net 0.00% 0/1000
torch.Size([2000])
test error Net 3.00% 30/1000


In [None]:
######################################################################
# Question 3

for nh in [ 10, 50, 200, 500, 2500 ]:
    model = Net(nh)
    train_model(model, train_input, train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print('test error Net nh={:d} {:0.2f}%% {:d}/{:d}'.format(nh,
                                                              (100 * nb_test_errors) / test_input.size(0),
                                                              nb_test_errors, test_input.size(0)))

In [None]:
######################################################################
# Question 4

model = Net2()
train_model(model, train_input, train_target, mini_batch_size)
nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
print('test error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                   nb_test_errors, test_input.size(0)))