In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
import dlc_practical_prologue as prologue

mini_batch_size = 100
N = 1000
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(N)

In [2]:
#normalize the input
train_input/=255
test_input/=255

In [19]:
print_shapes_Net = False

In [4]:
class Net(nn.Module):
    def __init__(self, nb_hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        if print_shapes_Net:
            print(x.shape)
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print(x.shape)
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print(x.shape)
        x = F.relu(self.fc1(x.view(-1, 256)))
        if print_shapes_Net:
            print(x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print(x.shape)
        return x

In [5]:
class Net1(nn.Module):
    def __init__(self, nb_hidden):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv1d(2, 32, kernel_size=3)
        self.conv2 = nn.Conv1d(16, 64, kernel_size=3)
        self.fc1 = nn.Linear(32, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)

    def forward(self, x):
        if print_shapes_Net:
            print("initial", x.shape) #100 2 10
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print("conv1",x.shape) #100 16 4
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        if print_shapes_Net:
            print("conv2",x.shape)
        x = F.relu(self.fc1(x.view(-1, 32)))
        if print_shapes_Net:
            print("fc1",x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print("final",x.shape) # 100 1 2 
        return x

In [6]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        nb_hidden = 200
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(64, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)

    def forward(self, x):
        if print_shapes_Net:
            print("initial shape", x.shape)
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        if print_shapes_Net:
            print("1 conv",x.shape)
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        if print_shapes_Net:
            print("2 conv", x.shape)
        x = F.relu(self.conv3(x))
        if print_shapes_Net:
            print("3 conv",x.shape)
        x = F.relu(self.fc1(x.view(-1, 64)))
        if print_shapes_Net:
            print("fc1",x.shape)
        x = self.fc2(x)
        if print_shapes_Net:
            print("final",x.shape)
        return x

######################################################################

In [7]:
def train_model(model, train_input, train_target, mini_batch_size, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr)
    
    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
        print(e, sum_loss)

In [8]:
def train_model2(model, train_input, train_target, mini_batch_size, lr):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr)
    
    for e in range(25):
        sum_loss = 0
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            model.zero_grad()
            loss.backward(retain_graph=True)
            optimizer.step()
            
            sum_loss = sum_loss + loss.item()
        print(e, sum_loss)

In [9]:
def compute_nb_errors_classes(model, input, target, mini_batch_size):
    nb_errors = 0

    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)

        for k in range(mini_batch_size):
            #print(predicted_classes[k], target[b+k].max(0)[1])
            if target[b + k, predicted_classes[k]] <= 0:
                nb_errors = nb_errors + 1

    return nb_errors

In [10]:
def compute_nb_errors_targets(model, input, target):
    nb_errors = 0
    output = model(input)
    _, predicted_classes = output.max(1)
    #print(predicted_classes.shape)
    predicted_target = torch.empty(1000)
    
    for b in range (1000):
        if predicted_classes[2*b] > predicted_classes[2*b + 1]:
            predicted_target[b] = 0
        else:
            predicted_target[b] = 1
        #print(predicted_target[b], target[b])
        if predicted_target[b] != target[b]:
            nb_errors = nb_errors + 1

    return nb_errors

In [41]:
def compute_nb_errors_targets2(model, input, target):
    nb_errors = 0
    output = model(input)
    _, predicted_classes = output.max(1) #digits - shape [1000]
    print(predicted_classes.shape)
    predicted_target = torch.empty(1000)
    
    for b in range (1000):
          
        print(predicted_classes[b], target[b], "b", b)
        if target[b,int(predicted_classes[b])] <= 0:
            nb_errors = nb_errors + 1
            
        """if predicted_target[b] != target[b]:
            nb_errors = nb_errors + 1"""

    return nb_errors

In [11]:
new_train_input = train_input.view([2000,1,14,14])
new_test_input = test_input.view([2000,1,14,14])
#print(new_train_input.shape)
#new_train_classes = train_classes.view([2000])
#new_test_classes = test_classes.view([2000])
#print(train_classes.shape)

In [12]:
train_classes = train_classes.view(2000)
test_classes = test_classes.view(2000)

In [13]:
#transfor classes in a 2000 * 10 
train_classes2 = torch.empty(2000,10)
test_classes2 = torch.empty(2000,10)
for i in range(2000):
    for j in range(10):
        if train_classes[i] == j:
            train_classes2[i,j] = 1
        else:
            train_classes2[i,j] = 0
        if test_classes[i] == j:
            test_classes2[i,j] = 1
        else:
            test_classes2[i,j] = 0

In [14]:
train_input = new_train_input
test_input = new_test_input
train_classes = train_classes2
test_classes = test_classes2

In [15]:
####predict class of each digit
for k in range(1):
    model = Net2()
    lr = 0.005
    train_model(model, train_input, train_classes, mini_batch_size, lr)
    
    nb_train_errors = compute_nb_errors_classes(model, train_input, train_classes, mini_batch_size)
    print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    nb_test_errors = compute_nb_errors_classes(model, test_input, test_classes, mini_batch_size)
    print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                    nb_test_errors, test_input.size(0)))
    

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.Size([100, 10])
initial shape torch.Size([100, 1, 14, 14])
1 conv torch.Size([100, 32, 6, 6])
2 conv torch.Size([100, 32, 2, 2])
3 conv torch.Size([100, 64, 1, 1])
fc1 torch.Size([100, 200])
final torch.S

In [21]:
output_train = model(new_train_input)
new_output_train = output.view([1000,2,10])
output_test = model(new_test_input)
new_output_test = output.view([1000,2,10])

In [17]:
#train_target[1000,1]

new_train_target = torch.empty(1000,2)
new_test_target = torch.empty(1000,2)
for i in range(1000):
    if train_target[i] == 1 :
        new_train_target[i,0] = 0
        new_train_target[i,1] = 1
        
    else:
        new_train_target[i,0] = 1
        new_train_target[i,1] = 0
        
    if test_target[i] == 1:
        new_test_target[i,0] = 0
        new_test_target[i,1] = 1
        
    else:
        new_test_target[i,0] = 1
        new_test_target[i,1] = 0

In [24]:
###predict target using model
model1 = Net1(200)

lr = 0.005

train_model2(model1, new_output_train, new_train_target, mini_batch_size, lr)

0 2.8392718583345413
1 1.9532969295978546
2 1.0290538296103477
3 0.5819277465343475
4 0.3561679720878601
5 0.21182098891586065
6 0.1320451907813549
7 0.11191814206540585
8 0.10431172978132963
9 0.09564325725659728
10 0.08669047895818949
11 0.07967587653547525
12 0.07323113875463605
13 0.0678792311809957
14 0.06335736671462655
15 0.059975941898301244
16 0.05770993488840759
17 0.05557556846179068
18 0.05497343931347132
19 0.05405075871385634
20 0.05216968315653503
21 0.049753613071516156
22 0.04701198195107281
23 0.04430603701621294
24 0.042073394637554884


In [42]:
nb_train_errors = compute_nb_errors_targets2(model1, new_output_train, new_train_target)
print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / new_output_train.size(0),
                                                  nb_train_errors, new_output_train.size(0)))
nb_test_errors = compute_nb_errors_targets2(model1, new_output_test, new_test_target)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / new_output_test.size(0),
                                                nb_test_errors, new_output_test.size(0)))

torch.Size([1000])
tensor(0) tensor([1., 0.]) b 0
tensor(0) tensor([1., 0.]) b 1
tensor(0) tensor([1., 0.]) b 2
tensor(0) tensor([1., 0.]) b 3
tensor(1) tensor([0., 1.]) b 4
tensor(0) tensor([1., 0.]) b 5
tensor(1) tensor([0., 1.]) b 6
tensor(0) tensor([1., 0.]) b 7
tensor(1) tensor([0., 1.]) b 8
tensor(1) tensor([0., 1.]) b 9
tensor(0) tensor([1., 0.]) b 10
tensor(0) tensor([1., 0.]) b 11
tensor(0) tensor([1., 0.]) b 12
tensor(0) tensor([1., 0.]) b 13
tensor(0) tensor([1., 0.]) b 14
tensor(0) tensor([1., 0.]) b 15
tensor(0) tensor([1., 0.]) b 16
tensor(1) tensor([0., 1.]) b 17
tensor(0) tensor([1., 0.]) b 18
tensor(1) tensor([0., 1.]) b 19
tensor(1) tensor([0., 1.]) b 20
tensor(1) tensor([0., 1.]) b 21
tensor(1) tensor([0., 1.]) b 22
tensor(1) tensor([0., 1.]) b 23
tensor(0) tensor([1., 0.]) b 24
tensor(1) tensor([0., 1.]) b 25
tensor(1) tensor([0., 1.]) b 26
tensor(0) tensor([1., 0.]) b 27
tensor(1) tensor([0., 1.]) b 28
tensor(0) tensor([1., 0.]) b 29
tensor(1) tensor([0., 1.]) b 30

tensor(0) tensor([1., 0.]) b 251
tensor(1) tensor([0., 1.]) b 252
tensor(0) tensor([1., 0.]) b 253
tensor(1) tensor([0., 1.]) b 254
tensor(1) tensor([0., 1.]) b 255
tensor(0) tensor([1., 0.]) b 256
tensor(1) tensor([0., 1.]) b 257
tensor(1) tensor([0., 1.]) b 258
tensor(0) tensor([1., 0.]) b 259
tensor(0) tensor([1., 0.]) b 260
tensor(1) tensor([0., 1.]) b 261
tensor(1) tensor([0., 1.]) b 262
tensor(1) tensor([0., 1.]) b 263
tensor(0) tensor([1., 0.]) b 264
tensor(1) tensor([0., 1.]) b 265
tensor(0) tensor([1., 0.]) b 266
tensor(1) tensor([0., 1.]) b 267
tensor(1) tensor([0., 1.]) b 268
tensor(0) tensor([1., 0.]) b 269
tensor(0) tensor([1., 0.]) b 270
tensor(1) tensor([0., 1.]) b 271
tensor(1) tensor([0., 1.]) b 272
tensor(0) tensor([1., 0.]) b 273
tensor(1) tensor([0., 1.]) b 274
tensor(1) tensor([0., 1.]) b 275
tensor(1) tensor([0., 1.]) b 276
tensor(1) tensor([0., 1.]) b 277
tensor(1) tensor([0., 1.]) b 278
tensor(1) tensor([0., 1.]) b 279
tensor(0) tensor([1., 0.]) b 280
tensor(1) 

tensor(0) tensor([1., 0.]) b 499
tensor(0) tensor([1., 0.]) b 500
tensor(0) tensor([1., 0.]) b 501
tensor(0) tensor([1., 0.]) b 502
tensor(0) tensor([1., 0.]) b 503
tensor(0) tensor([1., 0.]) b 504
tensor(1) tensor([0., 1.]) b 505
tensor(1) tensor([0., 1.]) b 506
tensor(0) tensor([1., 0.]) b 507
tensor(0) tensor([1., 0.]) b 508
tensor(1) tensor([0., 1.]) b 509
tensor(0) tensor([1., 0.]) b 510
tensor(1) tensor([0., 1.]) b 511
tensor(0) tensor([1., 0.]) b 512
tensor(0) tensor([1., 0.]) b 513
tensor(1) tensor([0., 1.]) b 514
tensor(0) tensor([1., 0.]) b 515
tensor(0) tensor([1., 0.]) b 516
tensor(0) tensor([1., 0.]) b 517
tensor(1) tensor([0., 1.]) b 518
tensor(1) tensor([0., 1.]) b 519
tensor(0) tensor([1., 0.]) b 520
tensor(1) tensor([0., 1.]) b 521
tensor(0) tensor([1., 0.]) b 522
tensor(0) tensor([1., 0.]) b 523
tensor(1) tensor([0., 1.]) b 524
tensor(1) tensor([0., 1.]) b 525
tensor(1) tensor([0., 1.]) b 526
tensor(1) tensor([0., 1.]) b 527
tensor(0) tensor([1., 0.]) b 528
tensor(1) 

tensor(0) tensor([1., 0.]) b 747
tensor(0) tensor([1., 0.]) b 748
tensor(1) tensor([0., 1.]) b 749
tensor(0) tensor([1., 0.]) b 750
tensor(1) tensor([0., 1.]) b 751
tensor(0) tensor([1., 0.]) b 752
tensor(1) tensor([0., 1.]) b 753
tensor(1) tensor([0., 1.]) b 754
tensor(0) tensor([1., 0.]) b 755
tensor(1) tensor([0., 1.]) b 756
tensor(0) tensor([1., 0.]) b 757
tensor(0) tensor([1., 0.]) b 758
tensor(1) tensor([0., 1.]) b 759
tensor(1) tensor([0., 1.]) b 760
tensor(0) tensor([1., 0.]) b 761
tensor(0) tensor([1., 0.]) b 762
tensor(0) tensor([1., 0.]) b 763
tensor(0) tensor([1., 0.]) b 764
tensor(0) tensor([1., 0.]) b 765
tensor(0) tensor([1., 0.]) b 766
tensor(0) tensor([1., 0.]) b 767
tensor(0) tensor([1., 0.]) b 768
tensor(0) tensor([1., 0.]) b 769
tensor(0) tensor([1., 0.]) b 770
tensor(1) tensor([0., 1.]) b 771
tensor(0) tensor([1., 0.]) b 772
tensor(1) tensor([0., 1.]) b 773
tensor(1) tensor([0., 1.]) b 774
tensor(1) tensor([0., 1.]) b 775
tensor(1) tensor([0., 1.]) b 776
tensor(1) 

tensor(0) tensor([1., 0.]) b 995
tensor(0) tensor([1., 0.]) b 996
tensor(1) tensor([0., 1.]) b 997
tensor(1) tensor([0., 1.]) b 998
tensor(1) tensor([0., 1.]) b 999
train error Net 0.10% 1/1000
torch.Size([1000])
tensor(0) tensor([0., 1.]) b 0
tensor(0) tensor([1., 0.]) b 1
tensor(0) tensor([1., 0.]) b 2
tensor(0) tensor([0., 1.]) b 3
tensor(1) tensor([1., 0.]) b 4
tensor(0) tensor([0., 1.]) b 5
tensor(1) tensor([1., 0.]) b 6
tensor(0) tensor([0., 1.]) b 7
tensor(1) tensor([0., 1.]) b 8
tensor(1) tensor([0., 1.]) b 9
tensor(0) tensor([0., 1.]) b 10
tensor(0) tensor([1., 0.]) b 11
tensor(0) tensor([0., 1.]) b 12
tensor(0) tensor([1., 0.]) b 13
tensor(0) tensor([0., 1.]) b 14
tensor(0) tensor([0., 1.]) b 15
tensor(0) tensor([1., 0.]) b 16
tensor(1) tensor([1., 0.]) b 17
tensor(0) tensor([0., 1.]) b 18
tensor(1) tensor([1., 0.]) b 19
tensor(1) tensor([0., 1.]) b 20
tensor(1) tensor([1., 0.]) b 21
tensor(1) tensor([1., 0.]) b 22
tensor(1) tensor([1., 0.]) b 23
tensor(0) tensor([1., 0.]) b 

tensor(0) tensor([1., 0.]) b 245
tensor(0) tensor([0., 1.]) b 246
tensor(0) tensor([1., 0.]) b 247
tensor(0) tensor([0., 1.]) b 248
tensor(0) tensor([1., 0.]) b 249
tensor(0) tensor([1., 0.]) b 250
tensor(0) tensor([1., 0.]) b 251
tensor(1) tensor([0., 1.]) b 252
tensor(0) tensor([0., 1.]) b 253
tensor(1) tensor([1., 0.]) b 254
tensor(1) tensor([0., 1.]) b 255
tensor(0) tensor([0., 1.]) b 256
tensor(1) tensor([0., 1.]) b 257
tensor(1) tensor([0., 1.]) b 258
tensor(0) tensor([1., 0.]) b 259
tensor(0) tensor([0., 1.]) b 260
tensor(1) tensor([1., 0.]) b 261
tensor(1) tensor([0., 1.]) b 262
tensor(1) tensor([0., 1.]) b 263
tensor(0) tensor([0., 1.]) b 264
tensor(1) tensor([1., 0.]) b 265
tensor(0) tensor([1., 0.]) b 266
tensor(1) tensor([1., 0.]) b 267
tensor(1) tensor([0., 1.]) b 268
tensor(0) tensor([1., 0.]) b 269
tensor(0) tensor([0., 1.]) b 270
tensor(1) tensor([1., 0.]) b 271
tensor(1) tensor([1., 0.]) b 272
tensor(0) tensor([0., 1.]) b 273
tensor(1) tensor([1., 0.]) b 274
tensor(1) 

tensor(0) tensor([1., 0.]) b 556
tensor(1) tensor([1., 0.]) b 557
tensor(1) tensor([0., 1.]) b 558
tensor(1) tensor([1., 0.]) b 559
tensor(1) tensor([1., 0.]) b 560
tensor(0) tensor([0., 1.]) b 561
tensor(0) tensor([0., 1.]) b 562
tensor(1) tensor([1., 0.]) b 563
tensor(0) tensor([1., 0.]) b 564
tensor(1) tensor([0., 1.]) b 565
tensor(0) tensor([0., 1.]) b 566
tensor(0) tensor([1., 0.]) b 567
tensor(1) tensor([0., 1.]) b 568
tensor(0) tensor([1., 0.]) b 569
tensor(0) tensor([0., 1.]) b 570
tensor(1) tensor([0., 1.]) b 571
tensor(1) tensor([0., 1.]) b 572
tensor(1) tensor([0., 1.]) b 573
tensor(1) tensor([0., 1.]) b 574
tensor(0) tensor([1., 0.]) b 575
tensor(0) tensor([0., 1.]) b 576
tensor(1) tensor([0., 1.]) b 577
tensor(0) tensor([0., 1.]) b 578
tensor(0) tensor([1., 0.]) b 579
tensor(1) tensor([1., 0.]) b 580
tensor(1) tensor([0., 1.]) b 581
tensor(0) tensor([0., 1.]) b 582
tensor(1) tensor([0., 1.]) b 583
tensor(1) tensor([0., 1.]) b 584
tensor(0) tensor([0., 1.]) b 585
tensor(0) 

tensor(1) tensor([0., 1.]) b 804
tensor(0) tensor([0., 1.]) b 805
tensor(1) tensor([0., 1.]) b 806
tensor(1) tensor([1., 0.]) b 807
tensor(1) tensor([1., 0.]) b 808
tensor(1) tensor([1., 0.]) b 809
tensor(0) tensor([0., 1.]) b 810
tensor(1) tensor([1., 0.]) b 811
tensor(0) tensor([0., 1.]) b 812
tensor(0) tensor([0., 1.]) b 813
tensor(1) tensor([0., 1.]) b 814
tensor(1) tensor([0., 1.]) b 815
tensor(1) tensor([0., 1.]) b 816
tensor(1) tensor([1., 0.]) b 817
tensor(1) tensor([0., 1.]) b 818
tensor(1) tensor([0., 1.]) b 819
tensor(0) tensor([0., 1.]) b 820
tensor(0) tensor([0., 1.]) b 821
tensor(1) tensor([0., 1.]) b 822
tensor(0) tensor([1., 0.]) b 823
tensor(0) tensor([1., 0.]) b 824
tensor(0) tensor([1., 0.]) b 825
tensor(0) tensor([0., 1.]) b 826
tensor(1) tensor([1., 0.]) b 827
tensor(1) tensor([0., 1.]) b 828
tensor(0) tensor([1., 0.]) b 829
tensor(1) tensor([0., 1.]) b 830
tensor(1) tensor([0., 1.]) b 831
tensor(1) tensor([1., 0.]) b 832
tensor(1) tensor([0., 1.]) b 833
tensor(0) 

In [34]:
2*999+1

1999

In [25]:
###predict target using "if"

nb_train_errors = compute_nb_errors_targets(model, train_input, train_target)
print('train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_target.size(0),
                                                  nb_train_errors, train_target.size(0)))
nb_test_errors = compute_nb_errors_targets(model, test_input,  test_target)
print('test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / train_target.size(0),
                                                nb_test_errors, train_target.size(0)))

train error Net 0.50% 5/1000
test error Net 3.90% 39/1000


lr = 0.01 : train error 2.35%
            test error 8.95%
lr = 0.005 : train error 0%
             test error 4.20%

In [24]:
output = model(new_train_input)
print(output.shape)
print(output[0], output[1])

new_output = output.view([1000,2,10])
print(new_output.shape)
print(new_output[0])

torch.Size([2000, 10])
tensor([-0.0221, -0.0341, -0.0094,  0.0758, -0.0256,  0.0292, -0.0165, -0.0414,
         0.0031,  0.9251], grad_fn=<SelectBackward>) tensor([-0.0210,  0.0018, -0.0263,  1.0347,  0.0176,  0.0292,  0.0195,  0.0140,
        -0.0403, -0.0327], grad_fn=<SelectBackward>)
torch.Size([1000, 2, 10])
tensor([[-0.0221, -0.0341, -0.0094,  0.0758, -0.0256,  0.0292, -0.0165, -0.0414,
          0.0031,  0.9251],
        [-0.0210,  0.0018, -0.0263,  1.0347,  0.0176,  0.0292,  0.0195,  0.0140,
         -0.0403, -0.0327]], grad_fn=<SelectBackward>)


In [None]:
######################################################################
# Question 3

for nh in [ 10, 50, 200, 500, 2500 ]:
    model = Net(nh)
    train_model(model, train_input, train_target, mini_batch_size)
    nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
    print('test error Net nh={:d} {:0.2f}%% {:d}/{:d}'.format(nh,
                                                              (100 * nb_test_errors) / test_input.size(0),
                                                              nb_test_errors, test_input.size(0)))

In [None]:
######################################################################
# Question 4

model = Net2()
train_model(model, train_input, train_target, mini_batch_size)
nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
print('test error Net2 {:0.2f}%% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                   nb_test_errors, test_input.size(0)))