In [9]:
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import random_split
from torch.utils.data.dataloader import DataLoader

In [2]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available. Training on CPU')
else:
    print('CUDA is available! Training on GPU\n', torch.cuda.get_device_name(0))

CUDA is available! Training on GPU
 NVIDIA GeForce RTX 3060 Laptop GPU


# Load Data

In [11]:
dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(dataset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

batchSize = 200

trainset, validateset = random_split(dataset, [45000, 5000])

train = DataLoader(trainset, batchSize, shuffle=True)
validate = DataLoader(validateset, batchSize, shuffle=True)
testLoader = DataLoader(testset, batch_size=batchSize, shuffle=False)

classes = ("plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")

# Define A Simple Neural Network

In [12]:
class Cifar10Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1, stride=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1, stride=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            nn.Flatten(),
            nn.Linear(256 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def trainingStep(self, batch):
        images, labels = batch

        out = self(images)

        loss = F.cross_entropy(out, labels)

        return loss

    def accuracy(self, outputs, labels):
        _, preds = torch.max(outputs, dim=1)
        return torch.tensor(torch.sum(preds == labels).item() / len(preds))

    def validationStep(self, batch):
        images, labels = batch
        out = self(images)
        loss = F.cross_entropy(out, labels)
        accuracy = self.accuracy(out, labels)

        return {"loss": loss, "accuracy": accuracy}

    def validationEpochEnd(self, outputs):

        batchLosses = [row["loss"] for row in outputs]
        epochLosses = torch.stack(batchLosses).mean()
        batchAcc = [row["accuracy"] for row in outputs]
        epochAcc = torch.stack(batchAcc).mean()

        return {"loss": epochLosses.item(), "accuracy": epochAcc.item()}

    def forward(self, x):
        return self.network(x)

# Train model

In [13]:
@torch.no_grad()
def evaluateModel(model, validationLoader):
    model.eval()
    out = [model.validationStep(batch) for batch in validationLoader]

    return model.validationEpochEnd(out)

In [14]:
def trainModel(epochs, lr, model, trainLoader, validationLoader, optimizationFunction=torch.optim.SGD):
    optimizer = optimizationFunction(model.parameters(), lr)

    for epoch in range(epochs):
        print(f"training epoch {epoch}")
        model.train()
        trainingLosses = []

        for batch in trainLoader:
            loss = model.trainingStep(batch)
            trainingLosses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        print(f"after training epoch {epoch} we get results {evaluateModel(model, validationLoader)}")

In [15]:
model = Cifar10Classifier()
evaluateModel(model, validate)

{'loss': 2.3029520511627197, 'accuracy': 0.09919998794794083}

# Optimizer Adam

In [17]:
%%time
trainModel(6, 0.001, model, trainLoader=train, validationLoader=validate, optimizationFunction=torch.optim.Adam)

training epoch 0
after training epoch 0 we get results {'loss': 1.3440661430358887, 'accuracy': 0.48680001497268677}
training epoch 1
after training epoch 1 we get results {'loss': 1.0307908058166504, 'accuracy': 0.6258000731468201}
training epoch 2
after training epoch 2 we get results {'loss': 0.836700975894928, 'accuracy': 0.7051999568939209}
training epoch 3
after training epoch 3 we get results {'loss': 0.7258360385894775, 'accuracy': 0.7504000067710876}
training epoch 4
after training epoch 4 we get results {'loss': 0.6655386090278625, 'accuracy': 0.763200044631958}
training epoch 5
after training epoch 5 we get results {'loss': 0.6501905918121338, 'accuracy': 0.7755999565124512}
CPU times: total: 36min 10s
Wall time: 15min 13s


# Optimizer Adagrad

In [21]:
model = Cifar10Classifier()

In [22]:
%%time
trainModel(6, 0.001, model, trainLoader=train, validationLoader=validate, optimizationFunction=torch.optim.Adagrad)

training epoch 0
after training epoch 0 we get results {'loss': 1.6845744848251343, 'accuracy': 0.3628000020980835}
training epoch 1
after training epoch 1 we get results {'loss': 1.4758896827697754, 'accuracy': 0.44679996371269226}
training epoch 2
after training epoch 2 we get results {'loss': 1.4316095113754272, 'accuracy': 0.477400004863739}
training epoch 3
after training epoch 3 we get results {'loss': 1.3671494722366333, 'accuracy': 0.49880000948905945}
training epoch 4
after training epoch 4 we get results {'loss': 1.3041396141052246, 'accuracy': 0.5202000141143799}
training epoch 5
after training epoch 5 we get results {'loss': 1.2676283121109009, 'accuracy': 0.5356000065803528}
CPU times: total: 12min 16s
Wall time: 10min 42s


# Optimizer AdamW

In [25]:
model = Cifar10Classifier()

In [26]:
%%time
trainModel(6, 0.001, model, trainLoader=train, validationLoader=validate, optimizationFunction=torch.optim.AdamW)

training epoch 0
after training epoch 0 we get results {'loss': 1.422218680381775, 'accuracy': 0.46459999680519104}
training epoch 1
after training epoch 1 we get results {'loss': 1.0822327136993408, 'accuracy': 0.6132000088691711}
training epoch 2
after training epoch 2 we get results {'loss': 0.9080066084861755, 'accuracy': 0.6833999752998352}
training epoch 3
after training epoch 3 we get results {'loss': 0.7564858198165894, 'accuracy': 0.7310000061988831}
training epoch 4
after training epoch 4 we get results {'loss': 0.7107223272323608, 'accuracy': 0.7539999485015869}
training epoch 5
after training epoch 5 we get results {'loss': 0.6667570471763611, 'accuracy': 0.781999945640564}
CPU times: total: 17min 53s
Wall time: 11min 46s


# Optimizer Adadelta

In [27]:
model = Cifar10Classifier()

In [29]:
%%time
trainModel(6, 0.001, model, trainLoader=train, validationLoader=validate, optimizationFunction=torch.optim.Adadelta)

training epoch 0
after training epoch 0 we get results {'loss': 2.3031013011932373, 'accuracy': 0.09940000623464584}
training epoch 1
after training epoch 1 we get results {'loss': 2.3030920028686523, 'accuracy': 0.09940000623464584}
training epoch 2
after training epoch 2 we get results {'loss': 2.3030834197998047, 'accuracy': 0.09940000623464584}
training epoch 3
after training epoch 3 we get results {'loss': 2.3030753135681152, 'accuracy': 0.09939999878406525}
training epoch 4
after training epoch 4 we get results {'loss': 2.3030667304992676, 'accuracy': 0.09940000623464584}
training epoch 5
after training epoch 5 we get results {'loss': 2.3030588626861572, 'accuracy': 0.09940000623464584}
CPU times: total: 4min 57s
Wall time: 9min 26s


# Optimizer Adamax

In [30]:
model = Cifar10Classifier()

In [31]:
%%time
trainModel(6, 0.001, model, trainLoader=train, validationLoader=validate, optimizationFunction=torch.optim.Adamax)

training epoch 0
after training epoch 0 we get results {'loss': 1.48053777217865, 'accuracy': 0.43880000710487366}
training epoch 1
after training epoch 1 we get results {'loss': 1.3311346769332886, 'accuracy': 0.5192000269889832}
training epoch 2
after training epoch 2 we get results {'loss': 1.1267459392547607, 'accuracy': 0.587399959564209}
training epoch 3
after training epoch 3 we get results {'loss': 1.0351043939590454, 'accuracy': 0.6294000148773193}
training epoch 4
after training epoch 4 we get results {'loss': 0.9445332288742065, 'accuracy': 0.6580000519752502}
training epoch 5
after training epoch 5 we get results {'loss': 0.8266209363937378, 'accuracy': 0.7071999907493591}
CPU times: total: 18min 23s
Wall time: 12min 7s


# Optimizer ASGD

In [16]:
model = Cifar10Classifier()

In [17]:
%%time
trainModel(6, 0.001, model, trainLoader=train, validationLoader=validate, optimizationFunction=torch.optim.ASGD)

training epoch 0
after training epoch 0 we get results {'loss': 2.3031508922576904, 'accuracy': 0.0965999960899353}
training epoch 1
after training epoch 1 we get results {'loss': 2.3031275272369385, 'accuracy': 0.0965999960899353}
training epoch 2
after training epoch 2 we get results {'loss': 2.303105354309082, 'accuracy': 0.0965999960899353}
training epoch 3
after training epoch 3 we get results {'loss': 2.3030846118927, 'accuracy': 0.0965999960899353}
training epoch 4
after training epoch 4 we get results {'loss': 2.303065538406372, 'accuracy': 0.0965999960899353}
training epoch 5
after training epoch 5 we get results {'loss': 2.303046941757202, 'accuracy': 0.0966000109910965}
CPU times: total: 2h 47min 50s
Wall time: 37min 3s
