In [16]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim

import dlc_practical_prologue as prologue

In [17]:
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(1000)

In [18]:
#normalize the input
train_input/=255
test_input/=255

In [19]:
print_shapes_Net = False

In [20]:
new_train_target = torch.empty(1000,2)
new_test_target = torch.empty(1000,2)
for i in range(1000):
    if train_target[i] == 1 :
        new_train_target[i,0] = 0
        new_train_target[i,1] = 1
        
    else:
        new_train_target[i,0] = 1
        new_train_target[i,1] = 0
        
    if test_target[i] == 1:
        new_test_target[i,0] = 0
        new_test_target[i,1] = 1
        
    else:
        new_test_target[i,0] = 1
        new_test_target[i,1] = 0

In [21]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32,64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)) #6x6
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)) #conv : 4x4, maxpool : 2x2
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        #x = nn.Softmax(dim=1)(x)
        return x

In [39]:
class Net_bn(nn.Module):
    def __init__(self, nb_hidden):
        super(Net_bn, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32,64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.bn1(self.conv1(x)), kernel_size=2, stride=2)) #6x6
        x = F.relu(F.max_pool2d(self.bn2(self.conv2(x)), kernel_size=2, stride=2)) #conv : 4x4, maxpool : 2x2
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        #x = nn.Softmax(dim=1)(x)
        return x

In [22]:
class Net2(nn.Module):
    def __init__(self, nb_hidden):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(2, 16, kernel_size=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=2)
        self.fc1 = nn.Linear(128, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)

    def forward(self, x):
        if print_shapes_Net:
            print("initial shape", x.shape)
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=1))
        if print_shapes_Net:
            print("1 conv",x.shape)
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        if print_shapes_Net:
            print("2 conv", x.shape)
        x = F.relu(F.max_pool2d(self.conv3(x), kernel_size=2))
        if print_shapes_Net:
            print("3 conv", x.shape)
        x = F.relu(self.conv4(x))
        if print_shapes_Net:
            print("4 conv",x.shape)
        x = F.relu(self.fc1(x.view(-1, 128)))
        if print_shapes_Net:
            print("fc1",x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print("final",x.shape)
        return x

######################################################################

In [23]:
class Net3(nn.Module):
    def __init__(self, nb_hidden):
        super(Net3, self).__init__()
        self.conv1 = nn.Conv2d(2, 16, kernel_size=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=2)
        self.fc1 = nn.Linear(128, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)
        self.bn1 = nn.BatchNorm2d(16)
        self.bn2 = nn.BatchNorm2d(32)
        self.bn3 = nn.BatchNorm2d(64)
        self.bn4 = nn.BatchNorm2d(128)

    def forward(self, x):
        if print_shapes_Net:
            print("initial shape", x.shape)
        x = F.relu(F.max_pool2d(self.bn1(self.conv1(x)), kernel_size=1))
        if print_shapes_Net:
            print("1 conv",x.shape)
        x = F.relu(F.max_pool2d(self.bn2(self.conv2(x)), kernel_size=2))
        if print_shapes_Net:
            print("2 conv", x.shape)
        x = F.relu(F.max_pool2d(self.bn3(self.conv3(x)), kernel_size=2))
        if print_shapes_Net:
            print("3 conv", x.shape)
        x = F.relu(self.bn4(self.conv4(x)))
        if print_shapes_Net:
            print("4 conv",x.shape)
        x = F.relu(self.fc1(x.view(-1, 128)))
        if print_shapes_Net:
            print("fc1",x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print("final",x.shape)
        return x

######################################################################

In [24]:
mini_batch_size = 100

In [25]:
def train_model(model, train_input, train_target, mini_batch_size):
    criterion = nn.MSELoss()
    eta = 5e-1
    mini_batch_size=100
    
    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            
            output = model(train_input.narrow(0, b, mini_batch_size))            
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            #print(loss)
            model.zero_grad()
            loss.backward()
            sum_loss = sum_loss + loss.item()
            with torch.no_grad():
                for p in model.parameters():
                    p -= eta * p.grad
                    
        #print(e, sum_loss)

In [26]:
def train_model2(model, train_input, train_target, mini_batch_size, lr):
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr)

    mini_batch_size=100
    
    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            
            output = model(train_input.narrow(0, b, mini_batch_size))            
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            #print(loss)
            model.zero_grad()
            loss.backward()
            optimizer.step()
            sum_loss = sum_loss + loss.item()
            '''with torch.no_grad():
                for p in model.parameters():
                    p -= eta * p.grad'''
                    
        #print(e, sum_loss)

In [27]:
def compute_nb_errors(model, input, target, mini_batch_size): 
    #target[1000], predicted_classes[100], output[100*2]
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)
        #print(output)
        #print(predicted_classes)
        #print("shapes",output.shape, predicted_classes.shape, target.shape)
        for k in range(mini_batch_size):
            if target[b + k, predicted_classes[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

In [28]:
#without optimizer
for k in range(5):
    model = Net(64)
    
    train_model(model, train_input, new_train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))
    nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
    print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / test_input.size(0),
                                                  nb_train_errors, test_input.size(0)))

test error Net 18.80% 188/1000
train error Net 2.70% 27/1000
test error Net 38.60% 386/1000
train error Net 40.60% 406/1000
test error Net 19.70% 197/1000
train error Net 7.20% 72/1000
test error Net 35.10% 351/1000
train error Net 36.10% 361/1000
test error Net 17.80% 178/1000
train error Net 3.20% 32/1000


In [29]:
#with optimizer, 0.001: 16% test, 10% train
#for k in range(5):
for lr in [0.001,0.005, 0.01, 0.05, 0.1, 0.5]:
    model = Net(64)

    train_model2(model, train_input, new_train_target, mini_batch_size, lr)
    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))
    nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
    print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / test_input.size(0),
                                              nb_train_errors, test_input.size(0)))

test error Net 16.60% 166/1000
train error Net 9.70% 97/1000
test error Net 19.20% 192/1000
train error Net 0.10% 1/1000
test error Net 21.70% 217/1000
train error Net 0.30% 3/1000
test error Net 42.90% 429/1000
train error Net 44.40% 444/1000
test error Net 42.90% 429/1000
train error Net 44.40% 444/1000
test error Net 57.10% 571/1000
train error Net 55.60% 556/1000


In [30]:
lr = 0.001
for i in range(10):
    model = Net(64)

    train_model2(model, train_input, new_train_target, mini_batch_size, lr)
    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))
    nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
    print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / test_input.size(0),
                                              nb_train_errors, test_input.size(0)))

test error Net 17.30% 173/1000
train error Net 9.70% 97/1000
test error Net 16.50% 165/1000
train error Net 9.70% 97/1000
test error Net 17.80% 178/1000
train error Net 8.10% 81/1000
test error Net 18.50% 185/1000
train error Net 11.00% 110/1000
test error Net 17.20% 172/1000
train error Net 7.50% 75/1000
test error Net 17.70% 177/1000
train error Net 6.60% 66/1000
test error Net 17.00% 170/1000
train error Net 4.10% 41/1000
test error Net 15.40% 154/1000
train error Net 8.60% 86/1000
test error Net 16.60% 166/1000
train error Net 7.40% 74/1000
test error Net 15.60% 156/1000
train error Net 3.30% 33/1000


In [40]:
#with batch normalization and optimizer
for lr in [0.001,0.005, 0.01, 0.05, 0.1, 0.5]:
    model = Net_bn(64)

    train_model2(model, train_input, new_train_target, mini_batch_size, lr)
    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))
    nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
    print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / test_input.size(0),
                                              nb_train_errors, test_input.size(0)))

test error Net 49.10% 491/1000
train error Net 45.00% 450/1000
test error Net 18.70% 187/1000
train error Net 3.00% 30/1000
test error Net 37.50% 375/1000
train error Net 24.30% 243/1000
test error Net 42.90% 429/1000
train error Net 44.40% 444/1000
test error Net 42.90% 429/1000
train error Net 44.40% 444/1000
test error Net 42.90% 429/1000
train error Net 44.40% 444/1000


## Deeper model

In [31]:
######################################################################
# Deeper model

model = Net2(64)
lr = 0.001
train_model2(model, train_input, new_train_target, mini_batch_size, lr)

nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
print('train error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                   nb_train_errors, train_input.size(0)))

nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
print('test error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                   nb_test_errors, test_input.size(0)))

train error Net2 16.90%% 169/1000
test error Net2 21.00%% 210/1000


#### Test for lr choice

In [12]:
#lr = 0.001 seems good: train error: 12%, test error: 20%
for lr in [0.001,0.005, 0.01, 0.05, 0.1, 0.5]:
    model = Net2(64)
    train_model2(model, train_input, new_train_target, mini_batch_size, lr)

    nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
    print('train error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                       nb_train_errors, train_input.size(0)))

    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                       nb_test_errors, test_input.size(0)))
    

train error Net2 8.00%% 80/1000
test error Net2 18.70%% 187/1000
train error Net2 8.90%% 89/1000
test error Net2 26.40%% 264/1000
train error Net2 10.80%% 108/1000
test error Net2 22.30%% 223/1000
train error Net2 44.90%% 449/1000
test error Net2 47.40%% 474/1000
train error Net2 44.90%% 449/1000
test error Net2 47.40%% 474/1000
train error Net2 44.90%% 449/1000
test error Net2 47.40%% 474/1000


In [38]:
lr = 0.001
for i in range(5):
    model = Net2(64)
    train_model2(model, train_input, new_train_target, mini_batch_size, lr)

    nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
    print('train error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                       nb_train_errors, train_input.size(0)))

    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                       nb_test_errors, test_input.size(0)))

train error Net2 9.20%% 92/1000
test error Net2 19.00%% 190/1000
train error Net2 15.50%% 155/1000
test error Net2 21.30%% 213/1000
train error Net2 17.60%% 176/1000
test error Net2 22.00%% 220/1000
train error Net2 11.70%% 117/1000
test error Net2 20.90%% 209/1000
train error Net2 21.10%% 211/1000
test error Net2 21.70%% 217/1000


### With batch normalization

In [32]:
######################################################################
# Deeper model

model = Net3(64)
lr = 0.05
train_model2(model, train_input, new_train_target, mini_batch_size, lr)

nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
print('train error Net3 {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                   nb_train_errors, train_input.size(0)))

nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
print('test error Net3 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                   nb_test_errors, test_input.size(0)))

train error Net3 2.40%% 24/1000
test error Net3 26.30%% 263/1000


In [28]:
#lr = 0.05??? : train error: 0.9%, test error: 20%
for lr in [0.001,0.005, 0.01, 0.05, 0.1, 0.5]:
    model = Net3(64)
    train_model2(model, train_input, new_train_target, mini_batch_size, lr)

    nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
    print("learning rate: ", lr)
    print('train error Net3 {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                       nb_train_errors, train_input.size(0)))

    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net3 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                       nb_test_errors, test_input.size(0)))
    

learning rate:  0.001
train error Net2 0.00%% 0/1000
test error Net2 22.70%% 227/1000
learning rate:  0.005
train error Net2 0.00%% 0/1000
test error Net2 19.30%% 193/1000
learning rate:  0.01
train error Net2 0.00%% 0/1000
test error Net2 18.40%% 184/1000
learning rate:  0.05
train error Net2 0.90%% 9/1000
test error Net2 20.40%% 204/1000
learning rate:  0.1
train error Net2 0.50%% 5/1000
test error Net2 28.20%% 282/1000
learning rate:  0.5
train error Net2 44.70%% 447/1000
test error Net2 47.10%% 471/1000


In [29]:
lr = 0.05
for i in range(5):
    model = Net3(64)
    train_model2(model, train_input, new_train_target, mini_batch_size, lr)

    nb_train_errors = compute_nb_errors(model, train_input, new_train_target, mini_batch_size)
    print('train error Net3 {:0.2f}%% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                       nb_train_errors, train_input.size(0)))

    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net3 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                       nb_test_errors, test_input.size(0)))

train error Net3 44.90%% 449/1000
test error Net3 47.40%% 474/1000
train error Net3 0.00%% 0/1000
test error Net3 22.20%% 222/1000
train error Net3 1.00%% 10/1000
test error Net3 20.00%% 200/1000
train error Net3 0.00%% 0/1000
test error Net3 19.80%% 198/1000
train error Net3 0.00%% 0/1000
test error Net3 18.80%% 188/1000


In [23]:
######################################################################
# Question 2

for k in range(2):
    model = Net(200)
    train_model(model, train_input, new_train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))

test error Net 19.70% 197/1000
test error Net 20.40% 204/1000


In [15]:
######################################################################
# Question 3

for nh in [ 10, 50, 200, 500, 2500 ]:
    model = Net(nh)
    train_model(model, train_input, new_train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, new_test_target, mini_batch_size)
    print('test error Net nh={:d} {:0.2f}%% {:d}/{:d}'.format(nh,
                                                              (100 * nb_test_errors) / test_input.size(0),
                                                              nb_test_errors, test_input.size(0)))

test error Net nh=10 21.50%% 215/1000
test error Net nh=50 20.50%% 205/1000
test error Net nh=200 20.30%% 203/1000
test error Net nh=500 21.40%% 214/1000
test error Net nh=2500 47.40%% 474/1000
