In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split

In [2]:
torch.randn(5).cuda()

tensor([-1.3116,  0.3369,  0.1849,  0.9587, -0.5912], device='cuda:0')

In [3]:
#flexible model
class CIFAR10Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layer = nn.Sequential(
            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Conv Layer block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05),

            # Conv Layer block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),
            nn.Linear(512, 10)
        )
        
    def forward(self, x):
        # Flatten images into vectors
         # conv layers
        x = self.conv_layer(x)
        
        # flatten
        x = x.view(x.size(0), -1)
        
        # fc layer
        x = self.fc_layer(x)

        return x


model = CIFAR10Model().cuda()

In [4]:
#define optimiser
params = model.parameters()
optimiser = optim.SGD(params, lr=1e-2)


In [5]:
#define loss
loss = nn.CrossEntropyLoss()

In [6]:
#train, val, label
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),
    ]
)

train_data = datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)

val_size = 5000
batch_size = 128
train, val = random_split(train_data, [len(train_data)-val_size, val_size])
train_loader = DataLoader(train, batch_size=batch_size)
val_loader = DataLoader(val, batch_size=batch_size)

In [7]:
#training and validation loop

num_epochs = 8
for epoch in range(num_epochs):
    losses = list()
    accuracies = list()
    for batch in train_loader:
        x, y = batch
        x = x.cuda()
        y = y.cuda()
        l = model(x)
        J = loss(l, y)
        model.zero_grad()
        J.backward()
        optimiser.step()
        
        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1)).float().mean())
    print(f'Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():.2f}, train acc: {torch.tensor(accuracies).mean():.2f}')

    losses = list()
    accuracies = list()
    for batch in val_loader:
        x, y = batch
        x = x.cuda()
        y = y.cuda()
        with torch.no_grad():
            l = model(x)
        J = loss(l, y.cuda())
        
        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1)).float().mean())
    print(f'Epoch {epoch + 1}, val loss: {torch.tensor(losses).mean():.2f}, val acc: {torch.tensor(accuracies).mean():.2f}')
    

Epoch 1, train loss: 1.99, train acc: 0.27
Epoch 1, val loss: 1.73, val acc: 0.36
Epoch 2, train loss: 1.47, train acc: 0.45
Epoch 2, val loss: 1.46, val acc: 0.47
Epoch 3, train loss: 1.24, train acc: 0.54
Epoch 3, val loss: 1.20, val acc: 0.57
Epoch 4, train loss: 1.06, train acc: 0.62
Epoch 4, val loss: 1.13, val acc: 0.60
Epoch 5, train loss: 0.93, train acc: 0.66
Epoch 5, val loss: 1.02, val acc: 0.64
Epoch 6, train loss: 0.84, train acc: 0.70
Epoch 6, val loss: 0.98, val acc: 0.66
Epoch 7, train loss: 0.76, train acc: 0.73
Epoch 7, val loss: 0.94, val acc: 0.67
Epoch 8, train loss: 0.69, train acc: 0.76
Epoch 8, val loss: 0.92, val acc: 0.68


In [10]:
#TODO: SAVE AND LOAD MODEL HERE; export as dict and load dict into nn module
PATH = './cifar_model.pth'
torch.save(model.state_dict(), PATH)
trained_model = "Load model into this variable?"

In [11]:
test_dataset = datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
test_loader = DataLoader(test_dataset, batch_size = len(test_dataset), shuffle=True)

In [12]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        x, y = data
        x = x.cuda()
        y = y.cuda()
        outputs = model(x)
        _, predicted = torch.max(outputs.data, 1)
        total += y.size(0)
        correct += (predicted == y.cuda()).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 68 %
