# Dataset

CIFAR 10

In [9]:
import torchvision

cifar10Train = torchvision.datasets.CIFAR10("./CIFAR10", download=True, transform=lambda im: torchvision.transforms.functional.pil_to_tensor(im)/255)
cifar10Test = torchvision.datasets.CIFAR10("./CIFAR10", train=False, download=True, transform=lambda im: torchvision.transforms.functional.pil_to_tensor(im)/255)

Files already downloaded and verified
Files already downloaded and verified


In [10]:
import numpy as np
import time

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import PIL
from sklearn.metrics import accuracy_score, f1_score

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Models

## CNN1

3 Conv, 2 FC, No dropout, no batch norm

Activation fn: ReLU

Optimizer: SGD

In [11]:
class CNN1(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn1 = nn.Conv2d(3, 16, (3, 3), padding='same').to(device)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d((2, 2), stride=(2, 2)).to(device)

        self.cnn2 = nn.Conv2d(16, 32, (3, 3), padding='same').to(device)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d((2, 2), stride=(2,2)).to(device)

        self.cnn3 = nn.Conv2d(32, 64, (3, 3), padding='same').to(device)
        self.relu3 = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d((2, 2), stride=(2,2)).to(device)

        self.linear1 = nn.Linear(64 * 4 * 4, 512)
        self.relu4 = nn.ReLU()

        self.linear2 = nn.Linear(512, 512)
        self.relu5 = nn.ReLU()

        self.linear3 = nn.Linear(512, 10)

    def forward(self, inputs):
        x = self.cnn1(inputs)
        x = self.relu1(x)
        x = self.maxpool1(x)

        x = self.cnn2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)

        x = self.cnn3(x)
        x = self.relu3(x)
        x = self.maxpool3(x)

        x = torch.flatten(x, 1)

        x = self.linear1(x)
        x = self.relu4(x)

        x = self.linear2(x)
        x = self.relu5(x)

        x = self.linear3(x)
    
        return x


In [12]:
trainloader = DataLoader(cifar10Train, 64)
testloader = DataLoader(cifar10Test, 64)

In [13]:
cnn1 = CNN1().to(device)

optimizer = torch.optim.SGD(cnn1.parameters(), 1e-3)
loss_fn = nn.CrossEntropyLoss()

In [14]:
def train_one_epoch(model, optimzer, loss_fn, training_loader):
    running_loss = 0.

    for i, data in enumerate(training_loader):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs.to(device))

        loss = loss_fn(outputs, labels.to(device))
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

        
    average_loss = running_loss/(i+1)

    return average_loss

In [15]:
def evaluate(tdataset, model):
    preds = []
    truths = []
    for example in tdataset:
        input = example[0].unsqueeze(0).to(device)
        logits = model(input)
        pred = torch.argmax(torch.softmax(logits, 1))
        preds.append(pred.item())
        truths.append(example[1])
    return accuracy_score(truths, preds), f1_score(truths, preds, average='macro'), preds, truths

In [16]:
def save_model(model, filepath):
    torch.save(model.state_dict(), filepath)

def load_model(model, filepath, device='cpu'):
    model.load_state_dict(torch.load(filepath, map_location=device))
    model.eval()

In [18]:
EPOCHS = 500

begin = time.time()

max_f1 = -float('inf')

log = []

for epoch in range(EPOCHS):
    cnn1.train(True)
    average_train_loss = train_one_epoch(cnn1, optimizer, loss_fn, trainloader)

    cnn1.eval()
    running_test_loss = 0.
    for i, data in enumerate(testloader):
        inputs, labels = data
        outputs = cnn1(inputs.to(device))
        loss = loss_fn(outputs, labels.to(device))
        running_test_loss += loss.item()
    average_test_loss = running_test_loss/(i+1)
    end = time.time()
    log.append({'average_test_loss': average_test_loss, 'average_train_loss': average_train_loss, 'time_from_start': end-begin})
    if (epoch+1)%10 == 0:
        train_accuracy, train_f1, _, _ = evaluate(cifar10Train, cnn1)
        test_accuracy, test_f1, _, _ = evaluate(cifar10Test, cnn1)
        if test_f1 > max_f1:
            max_f1 = test_f1
            save_model(cnn1, './models/cnn1.pth')
        print(f"Epoch {epoch+1} | train loss: {average_train_loss:.3f} | train accuracy: {100*train_accuracy:.2f}% | train f1: {train_f1:.2f} | test loss: {average_test_loss:.3f} | test accuracy: {100*test_accuracy:.2f}% | test f1: {test_f1:.2f} | time: {end-begin:.2f}s")

Epoch 10 | train loss: 2.301 | train accuracy: 12.61% | train f1: 0.05 | test loss: 2.301 | test accuracy: 12.77% | test f1: 0.05 | time: 97.47s
Epoch 20 | train loss: 2.296 | train accuracy: 17.74% | train f1: 0.10 | test loss: 2.295 | test accuracy: 17.43% | test f1: 0.10 | time: 247.46s
Epoch 30 | train loss: 2.216 | train accuracy: 18.65% | train f1: 0.11 | test loss: 2.201 | test accuracy: 19.06% | test f1: 0.11 | time: 397.67s
Epoch 40 | train loss: 1.983 | train accuracy: 28.25% | train f1: 0.25 | test loss: 1.969 | test accuracy: 28.95% | test f1: 0.26 | time: 548.40s
Epoch 50 | train loss: 1.842 | train accuracy: 33.87% | train f1: 0.32 | test loss: 1.832 | test accuracy: 34.33% | test f1: 0.32 | time: 698.74s
Epoch 60 | train loss: 1.655 | train accuracy: 37.26% | train f1: 0.35 | test loss: 1.732 | test accuracy: 37.19% | test f1: 0.35 | time: 849.32s
Epoch 70 | train loss: 1.535 | train accuracy: 37.02% | train f1: 0.36 | test loss: 1.744 | test accuracy: 37.06% | test f1: 