In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
import dlc_practical_prologue as prologue

mini_batch_size = 100
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [2]:
#normalize the input
train_input/=255
test_input/=255

In [3]:
print_shapes_Net = False

In [4]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        if print_shapes_Net:
            print(x.shape)
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print(x.shape)
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print(x.shape)
        x = F.relu(self.fc1(x.view(-1, 256)))
        if print_shapes_Net:
            print(x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print(x.shape)
        return x

In [5]:
class Net1(nn.Module):
    def __init__(self, nb_hidden):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv1d(2, 32, kernel_size=3)
        self.conv2 = nn.Conv1d(16, 64, kernel_size=3)
        self.fc1 = nn.Linear(32, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)

    def forward(self, x):
        if print_shapes_Net:
            print("initial", x.shape) #100 2 10
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print("conv1",x.shape) #100 16 4
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print("conv2",x.shape)
        x = F.relu(self.fc1(x.view(-1, 32)))
        if print_shapes_Net:
            print("fc1",x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print("final",x.shape) # 100 1 2 
        return x

In [6]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        nb_hidden = 200
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(64, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        if print_shapes_Net:
            print("initial shape", x.shape)
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        if print_shapes_Net:
            print("1 conv",x.shape)
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        if print_shapes_Net:
            print("2 conv", x.shape)
        x = F.relu(self.conv3(x))
        if print_shapes_Net:
            print("3 conv",x.shape)
        x = F.relu(self.fc1(x.view(-1, 64)))
        if print_shapes_Net:
            print("fc1",x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print("final",x.shape)
        return x

######################################################################

In [7]:
def train_model(model, train_input, train_target, mini_batch_size, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr)
    
    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
        print(e, sum_loss)

In [8]:
def train_model2(model, train_input, train_target, mini_batch_size, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr)
    
    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward(retain_graph=True)
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
        print(e, sum_loss)

In [9]:
def compute_nb_errors_classes(model, input, target, mini_batch_size):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)

        for k in range(mini_batch_size):
            #print(predicted_classes[k], target[b+k].max(0)[1])
            if target[b + k, predicted_classes[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

In [10]:
def compute_nb_errors_targets(model, input, target):
    nb_errors = 0
    output = model(input)
    _, predicted_classes = output.max(1)
    #print(predicted_classes.shape)
    predicted_target = torch.empty(1000)
    
    for b in range (1000):
        if predicted_classes[2*b] > predicted_classes[2*b + 1]:
            predicted_target[b] = 0
        else:
            predicted_target[b] = 1
        #print(predicted_target[b], target[b])
        if predicted_target[b] != target[b]:
            nb_errors = nb_errors + 1

    return nb_errors

In [11]:
def compute_nb_errors_targets2(model, input, target):
    nb_errors = 0
    output = model(input)
    _, predicted_classes = output.max(1) #digits - shape [1000]
    print(predicted_classes.shape)
    predicted_target = torch.empty(1000)
    
    for b in range (1000):
          
        # print(predicted_classes[b], target[b], "b", b)
        if target[b,int(predicted_classes[b])] <= 0:
            nb_errors = nb_errors + 1
            
        """if predicted_target[b] != target[b]:
            nb_errors = nb_errors + 1"""

    return nb_errors

In [12]:
new_train_input = train_input.view([2000,1,14,14])
new_test_input = test_input.view([2000,1,14,14])
#print(new_train_input.shape)
#new_train_classes = train_classes.view([2000])
#new_test_classes = test_classes.view([2000])
#print(train_classes.shape)

In [13]:
train_classes = train_classes.view(2000)
test_classes = test_classes.view(2000)

In [14]:
#transfor classes in a 2000 * 10 
train_classes2 = torch.empty(2000,10)
test_classes2 = torch.empty(2000,10)
for i in range(2000):
    for j in range(10):
        if train_classes[i] == j:
            train_classes2[i,j] = 1
        else:
            train_classes2[i,j] = 0
        if test_classes[i] == j:
            test_classes2[i,j] = 1
        else:
            test_classes2[i,j] = 0

In [15]:
train_input = new_train_input
test_input = new_test_input
train_classes = train_classes2
test_classes = test_classes2

In [16]:
####predict class of each digit
for k in range(1):
    model = Net2()
    lr = 0.005
    train_model(model, train_input, train_classes, mini_batch_size, lr)
    
    nb_train_errors = compute_nb_errors_classes(model, train_input, train_classes, mini_batch_size)
    print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_classes(model, test_input, test_classes, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

0 1.6928061544895172
1 1.0780761018395424
2 0.6628487333655357
3 0.43067725002765656
4 0.3192350845783949
5 0.2654990954324603
6 0.23044653236865997
7 0.18862781021744013
8 0.160379558801651
9 0.1455889535136521
10 0.132787951733917
11 0.11891244002617896
12 0.10436525545082986
13 0.09012579126283526
14 0.08080557710491121
15 0.07255133497528732
16 0.06506303627975285
17 0.05995037348475307
18 0.061419976176694036
19 0.06316676153801382
20 0.06042509828694165
21 0.05995093937963247
22 0.04907253501005471
23 0.037125345203094184
24 0.0327044379664585
train error Net 0.35% 7/2000
test error Net 5.05% 101/2000


In [17]:
output_train = model(new_train_input)
new_output_train = output_train.view([1000,2,10])
output_test = model(new_test_input)
new_output_test = output_test.view([1000,2,10])

In [18]:
#train_target[1000,1]

new_train_target = torch.empty(1000,2)
new_test_target = torch.empty(1000,2)
for i in range(1000):
    if train_target[i] == 1 :
        new_train_target[i,0] = 0
        new_train_target[i,1] = 1
        
    else:
        new_train_target[i,0] = 1
        new_train_target[i,1] = 0
        
    if test_target[i] == 1:
        new_test_target[i,0] = 0
        new_test_target[i,1] = 1
        
    else:
        new_test_target[i,0] = 1
        new_test_target[i,1] = 0

In [19]:
###predict target using model
model1 = Net1(200)

lr = 0.005

train_model2(model1, new_output_train, new_train_target, mini_batch_size, lr)

0 2.743059203028679
1 2.0332554280757904
2 1.0772083178162575
3 0.44072831980884075
4 0.2353417444974184
5 0.15310259349644184
6 0.1323187118396163
7 0.12554017640650272
8 0.1002412592060864
9 0.061012327671051025
10 0.05824705329723656
11 0.0600721794180572
12 0.05147415236569941
13 0.04338753782212734
14 0.041542553110048175
15 0.043966175289824605
16 0.046471541514620185
17 0.04723310098052025
18 0.04601261822972447
19 0.04336920182686299
20 0.0396826125215739
21 0.036232093698345125
22 0.03514285641722381
23 0.03549069399014115
24 0.03598432336002588


In [20]:
nb_train_errors = compute_nb_errors_targets2(model1, new_output_train, new_train_target)
print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / new_output_train.size(0),
                                                  nb_train_errors, new_output_train.size(0)))
nb_test_errors = compute_nb_errors_targets2(model1, new_output_test, new_test_target)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / new_output_test.size(0),
                                                nb_test_errors, new_output_test.size(0)))

torch.Size([1000])
train error Net 0.10% 1/1000
torch.Size([1000])
test error Net 3.30% 33/1000


In [22]:
###predict target using "if"

nb_train_errors = compute_nb_errors_targets(model, train_input, train_target)
print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_target.size(0),
                                                  nb_train_errors, train_target.size(0)))
nb_test_errors = compute_nb_errors_targets(model, test_input,  test_target)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / train_target.size(0),
                                                nb_test_errors, train_target.size(0)))

train error Net 0.30% 3/1000
test error Net 3.70% 37/1000


lr = 0.01 : train error 2.35%
            test error 8.95%
lr = 0.005 : train error 0%
             test error 4.20%

In [24]:
output = model(new_train_input)
print(output.shape)
print(output[0], output[1])

new_output = output.view([1000,2,10])
print(new_output.shape)
print(new_output[0])

torch.Size([2000, 10])
tensor([-0.0221, -0.0341, -0.0094,  0.0758, -0.0256,  0.0292, -0.0165, -0.0414,
         0.0031,  0.9251], grad_fn=<SelectBackward>) tensor([-0.0210,  0.0018, -0.0263,  1.0347,  0.0176,  0.0292,  0.0195,  0.0140,
        -0.0403, -0.0327], grad_fn=<SelectBackward>)
torch.Size([1000, 2, 10])
tensor([[-0.0221, -0.0341, -0.0094,  0.0758, -0.0256,  0.0292, -0.0165, -0.0414,
          0.0031,  0.9251],
        [-0.0210,  0.0018, -0.0263,  1.0347,  0.0176,  0.0292,  0.0195,  0.0140,
         -0.0403, -0.0327]], grad_fn=<SelectBackward>)


In [None]:
######################################################################
# Question 3

for nh in [ 10, 50, 200, 500, 2500 ]:
    model = Net(nh)
    train_model(model, train_input, train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print('test error Net nh={:d} {:0.2f}%% {:d}/{:d}'.format(nh,
                                                              (100 * nb_test_errors) / test_input.size(0),
                                                              nb_test_errors, test_input.size(0)))

In [None]:
######################################################################
# Question 4

model = Net2()
train_model(model, train_input, train_target, mini_batch_size)
nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
print('test error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                   nb_test_errors, test_input.size(0)))