In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime
from torchvision import datasets, transforms
from torch.utils.data import random_split
from matplotlib import pyplot as plt

torch.manual_seed(123)

  Referenced from: <AE5A0901-5B6C-3028-ADEE-0C068D0474D9> /Users/kristiansvendsen/opt/anaconda3/lib/python3.9/site-packages/torchvision/image.so
  Expected in:     <22ECBAD5-EEDD-3C80-9B5A-0564B60B6811> /Users/kristiansvendsen/opt/anaconda3/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib
  warn(f"Failed to load image Python extension: {e}")


<torch._C.Generator at 0x7ff028567930>

In [2]:
# Based on code from the pytorch tutorials

data_path = '../data/'
bp_filter = lambda x: x[1] in [0,2] # Bird and Plane Filter. Or if you will, Everything but Bird and Plane Filter
preprocessor = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4915, 0.4823, 0.4468),
                                (0.2470, 0.2435, 0.2616))
        ])

cifar10_train_val = list(filter(bp_filter, datasets.CIFAR10(data_path, train=True, download=True, transform=preprocessor)))
cifar10_test = list(filter(bp_filter, datasets.CIFAR10(data_path, train=False, download=True, transform=preprocessor)))

label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']

# For each dataset, keep only airplanes and birds
cifar2_train_val = [(img, label_map[label]) for img, label in cifar10_train_val if label in [0, 2]]
cifar2_test = [(img, label_map[label]) for img, label in cifar10_test if label in [0, 2]]

n_train = int(len(cifar2_train_val)*0.90)
n_val =  len(cifar2_train_val) - n_train
cifar2_train, cifar2_val = random_split(
        cifar2_train_val, 
        [n_train, n_val],
        generator=torch.Generator().manual_seed(123)
)


Files already downloaded and verified
Files already downloaded and verified


In [4]:
class MyMLP(nn.Module):
    def __init__(self):
        super().__init__()  # to inherit the '__init__' method from the 'nn.Module' class
        # Add whatever you want here (e.g layers and activation functions)
        # The order and names don't matter here but it is easier to understand
        # if you go for Layer1, fun1, layer2, fun2, etc
        # Some conventions:
        # - conv stands for convolution
        # - pool for pooling
        # - fc for fully connected

        self.flat = nn.Flatten()
        # 32*32*3: determined by our dataset: 32x32 RGB images
        self.fc1 = nn.Linear(32*32*3, 512)
        self.fc2 = nn.Linear(512, 128)
        self.act2 = nn.ReLU()
        # 2: determined by our number of classes (birds and planes)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 2)
        
    # Remember, we saw earlier that `forward` defines the 
    # computation performed at every call (the forward pass) and that it
    # should be overridden by all subclasses.
    def forward(self, x):
        # Now the order matters! 
        out = self.flat(x)
        out = self.act2(self.fc1(out))
        out = self.act2(self.fc2(out))
        out = self.act2(self.fc3(out))
        out = self.fc4(out)
        return out
        

In [5]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
    
    n_batch = len(train_loader)
    
    # We'll store there the training loss for each epoch
    losses_train = []
    
    # Set the network in training mode
    model.train()
    
    # Re-initialize gradients, just in case the model has been inappropriately 
    # manipulated before the training
    optimizer.zero_grad(set_to_none=True)
    
    for epoch in range(1, n_epochs + 1): 
        
        # Training loss for the current epoch
        loss_train = 0

        # Loop over our dataset (in batches the data loader creates for us)
        for imgs, labels in train_loader:
            
            # Feed a batch into our model
            outputs = model(imgs)
            
            # Compute the loss we wish to minimize 
            # Note that by default, it is the mean loss that is computed
            # (so entire_batch_loss / batch_size)
            loss = loss_fn(outputs, labels) 
            #print(loss)
            
            # Perform the backward step. That is, compute the gradients of all parameters we want the network to learn
            loss.backward()
            
            # Update the model
            optimizer.step() 
            
            # Zero out gradients before the next round (or the end of training)
            optimizer.zero_grad() 

            # Update loss for this epoch
            # It is important to transform the loss to a number with .item()
            loss_train += loss.item()
            
        # Store current epoch loss. 
        losses_train.append(loss_train / n_batch)
        

        if epoch == 1 or epoch % 10 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.3f}'.format(
                datetime.now().time(), epoch, loss_train / n_batch))
            
    return losses_train

In [6]:
def train_manual_update(n_epochs, model, loss_fn, train_loader, lr=0.01, weight_decay=0, momentum_coeff=0):
    n_batch = len(train_loader)
    model.train()
    loss_train = []
    vel = []
    for epoch in range(1, n_epochs + 1):
        losses = 0
        for img, label in train_loader:
            output = model(img)
            loss = loss_fn(output, label)
            
            losses += loss #loss ble ikke plusset
            loss.backward()
    
            with torch.no_grad():
                for i, p in enumerate(model.parameters()):
                    if epoch == 1:
                        vel.append(torch.zeros(p.data.size(), dtype=torch.float).to(device="cpu"))
                    
                    grad = p.grad
                    
                    if weight_decay > 0:
                        grad = grad + weight_decay*p.data
                        
                    if momentum_coeff > 0:
                        vel[i] = momentum_coeff * vel[i] + p.grad
                        tempy = lr*(weight_decay*p.data) #nb
                        p.data = p.data - lr*vel[i] - tempy
                        
                    #else:
                        #tempy = lr*(weight_decay*p.data)
                        #p.data = p.data - (lr*p.grad) - tempy
                        
                    p.data = p.data - lr*grad
                    p.grad.data.zero_()
                    #p.grad.detach_()
        loss_train.append(losses/n_batch)
        if epoch == 1 or epoch % 10 == 0:
            print('{}  |  Epoch {}  |  Training loss {:.3f}'.format(
                datetime.now().time(), epoch, losses / n_batch))
    return loss_train

        

In [38]:
train_loader = torch.utils.data.DataLoader(cifar2_train, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)
test_loader = torch.utils.data.DataLoader(cifar2_test, batch_size=64, shuffle=False)

In [8]:
torch.manual_seed(123)
model = MyMLP()

optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

loss_train = train(3, optimizer, model, loss_fn, train_loader)
loss_train



11:39:02.267529  |  Epoch 1  |  Training loss 0.640


[0.6401884158452352, 0.5386688945986701, 0.48169365990246443]

In [9]:

torch.manual_seed(123)

model_manual = MyMLP()

loss_fn = nn.CrossEntropyLoss()

loss_train = train_manual_update(3, model_manual, loss_fn, train_loader)
loss_train


11:39:11.555316  |  Epoch 1  |  Training loss 0.640


[tensor(0.6402, grad_fn=<DivBackward0>),
 tensor(0.5387, grad_fn=<DivBackward0>),
 tensor(0.4817, grad_fn=<DivBackward0>)]

In [16]:
def compute_accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0

    # We do not want gradients here, as we will not want to update the parameters.
    with torch.no_grad():
        for imgs, labels in loader:

            outputs = model(imgs)
            _, predicted = torch.max(outputs, dim=1)
            total += labels.shape[0]
            correct += int((predicted == labels).sum())

    acc =  correct / total
    print("Accuracy: {:.2f}".format(acc))
    return acc


In [25]:
torch.manual_seed(123)

model_1 = MyMLP()

train_manual_update(20, model_1, loss_fn, train_loader, lr=0.01, weight_decay=0, momentum_coeff=0)

compute_accuracy(model_1, val_loader)


11:55:59.500211  |  Epoch 1  |  Training loss 0.640
11:56:26.578565  |  Epoch 10  |  Training loss 0.332
11:56:57.662808  |  Epoch 20  |  Training loss 0.209
Accuracy: 0.86


0.86

In [32]:
torch.manual_seed(123)

model_2 = MyMLP()

train_manual_update(5, model_2, loss_fn, train_loader, lr=0.02, weight_decay=0.02, momentum_coeff=0)

compute_accuracy(model_2, val_loader)

12:02:55.204466  |  Epoch 1  |  Training loss 0.594
Accuracy: 0.84


0.839

In [40]:
torch.manual_seed(123)

model_3 = MyMLP()

train_manual_update(20, model_3, loss_fn, train_loader, lr=0.01, weight_decay=0, momentum_coeff=0.9)

compute_accuracy(model_3, val_loader)

14:43:10.659046  |  Epoch 1  |  Training loss 0.505
14:43:39.608202  |  Epoch 10  |  Training loss 0.170
14:44:12.749470  |  Epoch 20  |  Training loss 0.063
Accuracy: 0.85


0.852

In [41]:
torch.manual_seed(123)

model_4 = MyMLP()

train_manual_update(20, model_4, loss_fn, train_loader, lr=0.01, weight_decay=0.01, momentum_coeff=0.9)

compute_accuracy(model_4, val_loader)

14:44:51.796711  |  Epoch 1  |  Training loss 0.506
14:45:21.020911  |  Epoch 10  |  Training loss 0.184
14:45:53.745535  |  Epoch 20  |  Training loss 0.091
Accuracy: 0.86


0.865

In [42]:
compute_accuracy(model_4, test_loader)

Accuracy: 0.85


0.854