In [3]:
#!/usr/bin/env python

######################################################################

import torch
from torch.autograd import Variable
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

train_input, train_target, test_input, test_target = \
    prologue.load_data(one_hot_labels = True, normalize = True, flatten = False)

######################################################################

class Net(nn.Module):
    def __init__(self, nb_hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        nb_hidden = 200
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(9 * 64, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.conv3(x))
        x = F.relu(self.fc1(x.view(-1, 9 * 64)))
        x = self.fc2(x)
        return x

######################################################################

def train_model(model, train_input, train_target, mini_batch_size):
    criterion = nn.MSELoss()
    eta = 1e-1

    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            sum_loss = sum_loss + loss.item()
            for p in model.parameters():
                p.data.sub_(eta * p.grad.data)
        print(e, sum_loss)

def compute_nb_errors(model, input, target, mini_batch_size):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.data.max(1)
        for k in range(mini_batch_size):
            if target.data[b + k, predicted_classes[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

######################################################################

train_input, train_target = Variable(train_input), Variable(train_target)
test_input, test_target = Variable(test_input), Variable(test_target)

mini_batch_size = 100



* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [7]:
model = Net(200)
output = model(train_input.narrow(0, 0, 10))
print(output)

tensor([[-0.1753,  0.1998, -0.0236,  0.0704, -0.2347, -0.1632, -0.0874,  0.2934,
          0.3089,  0.1217],
        [-0.2045,  0.1267, -0.0427,  0.0299, -0.1113, -0.1951, -0.0818,  0.2626,
          0.3525,  0.0232],
        [-0.1438,  0.1126, -0.0956,  0.0878, -0.1515, -0.1192, -0.2036,  0.1890,
          0.2791, -0.0186],
        [-0.0929,  0.1717, -0.0874,  0.1586, -0.1968, -0.2009, -0.1561,  0.2471,
          0.3413,  0.0180],
        [-0.1788,  0.1257, -0.0268,  0.1488, -0.2316, -0.1226, -0.1106,  0.2372,
          0.3825,  0.0816],
        [-0.1725,  0.1374, -0.0630,  0.0846, -0.2475, -0.1885, -0.1812,  0.2744,
          0.3500,  0.0585],
        [-0.0567,  0.0805, -0.0089,  0.1277, -0.2593, -0.1145, -0.0545,  0.2315,
          0.2504,  0.0784],
        [-0.1943,  0.1483, -0.0687,  0.1447, -0.2043, -0.1678, -0.1447,  0.2742,
          0.3757, -0.0280],
        [-0.0785,  0.0963, -0.0604,  0.1263, -0.2002, -0.1310, -0.0764,  0.1963,
          0.2089,  0.0876],
        [-0.1607,  

In [8]:
######################################################################
# Question 2

for k in range(10):
    model = Net(200)
    train_model(model, train_input, train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))



0 0.9223078563809395
1 0.799484483897686
2 0.7354758158326149
3 0.6770432069897652
4 0.6261134333908558
5 0.5847308896481991
6 0.5515117049217224
7 0.5181113034486771
8 0.48692817986011505
9 0.47050345316529274
10 0.44845792278647423
11 0.4096074551343918
12 0.4355189576745033
13 0.38822848722338676
14 0.3605176918208599
15 0.3519678916782141
16 0.3524400033056736
17 0.34168546460568905
18 0.3179338909685612
19 0.31725407764315605
20 0.3159294296056032
21 0.2932353727519512
22 0.29531256668269634
23 0.2801944874227047
24 0.2756394650787115
test error Net 13.80% 138/1000
0 0.9162067696452141
1 0.7941851168870926
2 0.7182531803846359
3 0.6506625637412071
4 0.5935915857553482
5 0.5465456806123257
6 0.5118890851736069
7 0.497282300144434
8 0.49226564541459084
9 0.43021615967154503
10 0.44787009060382843
11 0.39254794269800186
12 0.3764522485435009
13 0.3685768209397793
14 0.3500896282494068
15 0.33492385037243366
16 0.32696438021957874
17 0.3153888452798128
18 0.2974718548357487
19 0.28934

In [None]:

######################################################################
# Question 3

for nh in [ 10, 50, 200, 500, 2500 ]:
    model = Net(nh)
    train_model(model, train_input, train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print('test error Net nh={:d} {:0.2f}%% {:d}/{:d}'.format(nh,
                                                              (100 * nb_test_errors) / test_input.size(0),
                                                              nb_test_errors, test_input.size(0)))


In [None]:
######################################################################
# Question 4

model = Net2()
train_model(model, train_input, train_target, mini_batch_size)
nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
print('test error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                   nb_test_errors, test_input.size(0)))