In [2]:
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split

In [3]:
torch.randn(5).cuda()

tensor([ 0.8406, -0.7359,  0.1632, -1.5821, -1.0615], device='cuda:0')

In [3]:
#define model

model = nn.Sequential(
    nn.Linear(32*32*3, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 10),
)

In [4]:
#flexible model
class CIFAR10Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layer = nn.Sequential(
            # Conv Layer block 1
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Conv Layer block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05),

            # Conv Layer block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),
            nn.Linear(512, 10)
        )
        
    def forward(self, x):
        # Flatten images into vectors
         # conv layers
        x = self.conv_layer(x)
        
        # flatten
        x = x.view(x.size(0), -1)
        
        # fc layer
        x = self.fc_layer(x)

        return x


model = CIFAR10Model().cuda()

In [5]:
#define optimiser
params = model.parameters()
optimiser = optim.SGD(params, lr=1e-2)


In [6]:
#define loss
loss = nn.CrossEntropyLoss()

In [7]:
#train, val, label
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),
    ]
)

train_data = datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)

val_size = 5000
batch_size = 32
train, val = random_split(train_data, [len(train_data)-val_size, val_size])
train_loader = DataLoader(train, batch_size=batch_size)
val_loader = DataLoader(val, batch_size=batch_size)

In [10]:
#training and validation loop

num_epochs = 8
for epoch in range(num_epochs):
    losses = list()
    accuracies = list()
    for batch in train_loader:
        x, y = batch
        x = x.cuda()
        y = y.cuda()
        l = model(x)
        J = loss(l, y)
        model.zero_grad()
        J.backward()
        optimiser.step()
        
        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1)).float().mean())
    print(f'Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():.2f}, train acc: {torch.tensor(accuracies).mean():.2f}')

    losses = list()
    accuracies = list()
    for batch in val_loader:
        x, y = batch
        x = x.cuda()
        y = y.cuda()
        with torch.no_grad():
            l = model(x)
        J = loss(l, y.cuda())
        
        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1)).float().mean())
    print(f'Epoch {epoch + 1}, val loss: {torch.tensor(losses).mean():.2f}, val acc: {torch.tensor(accuracies).mean():.2f}')
    

Epoch 1, train loss: 1.58, train acc: 0.41
Epoch 1, val loss: 1.34, val acc: 0.50
Epoch 2, train loss: 1.05, train acc: 0.62
Epoch 2, val loss: 1.03, val acc: 0.62
Epoch 3, train loss: 0.81, train acc: 0.72
Epoch 3, val loss: 0.82, val acc: 0.70
Epoch 4, train loss: 0.66, train acc: 0.77
Epoch 4, val loss: 0.72, val acc: 0.74
Epoch 5, train loss: 0.56, train acc: 0.81
Epoch 5, val loss: 0.71, val acc: 0.75
Epoch 6, train loss: 0.47, train acc: 0.84
Epoch 6, val loss: 0.67, val acc: 0.77
Epoch 7, train loss: 0.39, train acc: 0.87
Epoch 7, val loss: 0.66, val acc: 0.78
Epoch 8, train loss: 0.31, train acc: 0.89
Epoch 8, val loss: 0.65, val acc: 0.78


In [11]:
PATH = './cifar_model.pth'
torch.save(model.state_dict(), PATH)

In [12]:
test_dataset = datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
test_loader = DataLoader(test_dataset, batch_size = len(test_dataset), shuffle=True)

In [13]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        x, y = data
        x = x.cuda()
        y = y.cuda()
        outputs = model(x)
        _, predicted = torch.max(outputs.data, 1)
        total += y.size(0)
        correct += (predicted == y.cuda()).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 78 %
