In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import torch

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x13c8a924250>

In [2]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

In [3]:
#importing cifar10 data and loading the training and validation sets 
from torchvision import datasets, transforms
data_path = 'data/'

cifar10 = datasets.CIFAR10(
    data_path, train=True, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

In [4]:
cifar10

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data/
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4915, 0.4823, 0.4468), std=(0.247, 0.2435, 0.2616))
           )

In [5]:
cifar10_val

Dataset CIFAR10
    Number of datapoints: 10000
    Root location: data/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4915, 0.4823, 0.4468), std=(0.247, 0.2435, 0.2616))
           )

In [6]:
#Function to print out last models parameter count, training accuracy and validation accuracy
def model_summary():
    print(model)
    print("Parameter Count:" , sum([p.numel() for p in model.parameters()]))

    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            batch_size = imgs.shape[0]
            outputs = model(imgs.view(batch_size,-1))
            _, predicted = torch.max(outputs, dim=1)
            total += labels.shape[0]
            correct += int((predicted == labels).sum())

    print("Training Accuracy: %f" % (correct / total))

    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            batch_size = imgs.shape[0]
            outputs = model(imgs.view(batch_size,-1))
            _, predicted = torch.max(outputs, dim=1)
            total += labels.shape[0]
            correct += int((predicted == labels).sum())

    print("Validation Accuracy: %f" % (correct / total))

In [7]:
#Seperating training and validation into batches for faster processing
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=1000,
                                           shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=1000,
                                         shuffle=True)

In [8]:
#setting device to GPU if applicable 
device = (torch.device('cuda') if torch.cuda.is_available()
          else torch.device('cpu'))
#device =torch.device('cpu') #uncomment if you want to force device=cpu
print(f"Training on device {device}.")


Training on device cuda.


In [9]:
#First model, 1 hidden layer with size of 512
import torch
import torch.nn as nn
import torch.optim as optim
import datetime

model1 = nn.Sequential(
            nn.Linear(3072, 512),
            nn.Tanh(),
            nn.Linear(512, 10),
            nn.LogSoftmax(dim=1)).to(device)

model=model1

learning_rate = 1e-2

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.NLLLoss().to(device)

n_epochs = 300

for epoch in range(n_epochs):
    epoch=epoch+1#added just to format print epoch 1,10,20...200 later on
    for imgs, labels in train_loader:
        imgs = imgs.to(device=device)  # <1>
        labels = labels.to(device=device)
        
        batch_size = imgs.shape[0]
        out = model(imgs.view(batch_size,-1))
        loss = loss_fn(out, labels)
                
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if epoch == 1  or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss))

2022-03-04 22:30:29.276114 Epoch 1, Training loss 1.9310541152954102
2022-03-04 22:31:46.964814 Epoch 10, Training loss 1.7414556741714478
2022-03-04 22:33:13.222461 Epoch 20, Training loss 1.6992571353912354
2022-03-04 22:34:39.379048 Epoch 30, Training loss 1.6687684059143066
2022-03-04 22:36:05.441707 Epoch 40, Training loss 1.574831247329712
2022-03-04 22:37:31.376514 Epoch 50, Training loss 1.5466022491455078
2022-03-04 22:38:57.390502 Epoch 60, Training loss 1.5148578882217407
2022-03-04 22:40:23.342599 Epoch 70, Training loss 1.4609713554382324
2022-03-04 22:41:49.192217 Epoch 80, Training loss 1.446658730506897
2022-03-04 22:43:15.035403 Epoch 90, Training loss 1.3557206392288208
2022-03-04 22:44:41.084784 Epoch 100, Training loss 1.424475073814392
2022-03-04 22:46:06.971936 Epoch 110, Training loss 1.367943525314331
2022-03-04 22:47:32.787593 Epoch 120, Training loss 1.3909856081008911
2022-03-04 22:48:58.731197 Epoch 130, Training loss 1.3064318895339966
2022-03-04 22:50:24.7

KeyboardInterrupt: 

In [10]:
#Using predifined function(self explanitory)
model_summary()

Sequential(
  (0): Linear(in_features=3072, out_features=512, bias=True)
  (1): Tanh()
  (2): Linear(in_features=512, out_features=10, bias=True)
  (3): LogSoftmax(dim=1)
)
Parameter Count: 1578506
Training Accuracy: 0.702300
Validation Accuracy: 0.499800


In [12]:
#Third model with following hidden layer size..
#    1st hidden layer =4096
#    2nd hidden layer = 1024
#    3rd hidden layer = 64
import torch
import torch.nn as nn
import torch.optim as optim
import datetime

model3 = nn.Sequential(
            nn.Linear(3072, 4096),
            nn.Tanh(),
            nn.Linear(4096, 1024),
            nn.Tanh(),
            nn.Linear(1024, 64),
            nn.Tanh(),
            nn.Linear(64, 10),
            nn.LogSoftmax(dim=1)).to(device)

model=model3
learning_rate = 1e-2

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.NLLLoss().to(device)

n_epochs = 200

for epoch in range(n_epochs):
    epoch=epoch+1#added just to format print epoch 1,10,20...200 later on
    for imgs, labels in train_loader:
        imgs = imgs.to(device=device)  # <1>
        labels = labels.to(device=device)
        
        batch_size = imgs.shape[0]
        out = model(imgs.view(batch_size,-1))
        loss = loss_fn(out, labels)
                
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss))

2022-03-04 23:07:25.913612 Epoch 1, Training loss 2.1229357719421387
2022-03-04 23:08:41.942802 Epoch 10, Training loss 1.8321539163589478
2022-03-04 23:10:06.391315 Epoch 20, Training loss 1.794692873954773
2022-03-04 23:11:30.877161 Epoch 30, Training loss 1.670572280883789
2022-03-04 23:12:55.298707 Epoch 40, Training loss 1.6845860481262207
2022-03-04 23:14:19.633908 Epoch 50, Training loss 1.6629959344863892
2022-03-04 23:15:44.173685 Epoch 60, Training loss 1.5822911262512207
2022-03-04 23:17:08.531978 Epoch 70, Training loss 1.5260292291641235
2022-03-04 23:18:32.932765 Epoch 80, Training loss 1.519485592842102
2022-03-04 23:19:57.221113 Epoch 90, Training loss 1.4629285335540771
2022-03-04 23:21:21.469079 Epoch 100, Training loss 1.4809085130691528
2022-03-04 23:22:45.698898 Epoch 110, Training loss 1.3728492259979248
2022-03-04 23:24:09.986523 Epoch 120, Training loss 1.4014768600463867
2022-03-04 23:25:34.268310 Epoch 130, Training loss 1.361535668373108
2022-03-04 23:26:58.5

In [13]:
#Using predifined function(self explanitory)
model_summary()

Sequential(
  (0): Linear(in_features=3072, out_features=4096, bias=True)
  (1): Tanh()
  (2): Linear(in_features=4096, out_features=1024, bias=True)
  (3): Tanh()
  (4): Linear(in_features=1024, out_features=64, bias=True)
  (5): Tanh()
  (6): Linear(in_features=64, out_features=10, bias=True)
  (7): LogSoftmax(dim=1)
)
Parameter Count: 16848586
Training Accuracy: 0.655860
Validation Accuracy: 0.485500
