In [2]:
!pip install torch

Collecting torch
  Downloading torch-2.0.1-cp38-cp38-win_amd64.whl (172.4 MB)
Installing collected packages: torch
Successfully installed torch-2.0.1


In [7]:
!pip install torchvision

Collecting torchvision
  Downloading torchvision-0.15.2-cp38-cp38-win_amd64.whl (1.2 MB)
Installing collected packages: torchvision
Successfully installed torchvision-0.15.2


In [3]:
import torch
print(torch.__version__)

2.0.1+cpu


In [4]:
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [28]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

In [27]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
device

device(type='cuda')

In [11]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

trainset = datasets.CIFAR10(root='~/.pytorch/CIFAR10',train=True, download=True,transform=transform)
testset = datasets.CIFAR10(root='~/.pytorch/CIFAR10',train=False, transform=transform)

trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to C:\Users\user/.pytorch/CIFAR10\cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [03:22<00:00, 841702.82it/s] 


Extracting C:\Users\user/.pytorch/CIFAR10\cifar-10-python.tar.gz to C:\Users\user/.pytorch/CIFAR10


In [12]:
for images, labels in trainloader:
    print(images.size(), labels.size())
    break

torch.Size([64, 3, 224, 224]) torch.Size([64])


In [13]:
# pretrained the model 
model = models.vgg16(pretrained=True)

# see the classifier
model.classifier

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\user/.cache\torch\hub\checkpoints\vgg16-397923af.pth
100%|██████████| 528M/528M [08:29<00:00, 1.09MB/s]  


Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [14]:
# see the features
model.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [18]:
# freeze the network (autograd-- weights) -- Transfers Learning
for param in model.parameters():
    param.requires_grad = False
print(model.parameters)

<bound method Module.parameters of VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size

In [17]:
for i in range(0,7):
    model.classifier[i].requires_grad = True

In [20]:
# cross entropy loss => nn.CrossEntropyLoss or NLLLoss-Negative Log liklihood Loss
model.classifier[6] = nn.Sequential(
                        nn.Linear(4096, 512),
                        nn.ReLU(),
                        nn.Dropout(0.5),
                        nn.Linear(512, 10),
                        nn.LogSoftmax(dim=1))

In [22]:
model.classifier[6]

Sequential(
  (0): Linear(in_features=4096, out_features=512, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=512, out_features=10, bias=True)
  (4): LogSoftmax(dim=1)
)

In [24]:
model.parameters

<bound method Module.parameters of VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size

In [25]:
# initialize the Negative Log Likelihood Loss Function
criterion = nn.NLLLoss()

In [30]:
from torch.optim import Adam

lr = 3e-4 #0.0003
optimizer = Adam([
    {'params': model.classifier[0].parameters(),'lr':lr},
    {'params': model.classifier[3].parameters(),'lr':lr},
    {'params': model.classifier[6].parameters(),'lr':lr}
],lr=lr)

In [None]:
num_epochs = 1
batch_loss = 0
cum_epoch_loss = 0

for e in range(num_epochs):
    cum_epoch_loss = 0
    
    for batch, (images, labels) in enumerate(trainloader, 1):
        # clear the parameters 
        optimizer.zero_grad()
        # computes the predicted log probabilities for each class using the model.
        logps = model(images)
        # calculate loss
        loss = criterion(logps, labels)
        # backward pass -  computes the gradients of the loss with respect to the model's parameters.
        loss.backward()
        # update the model parameters
        optimizer.step()
        batch_loss += loss.item()
        print(f'Epoch({e}/{num_epochs} : Batch number({batch}/{len(trainloader)}) : Batch loss : {loss.item()}')
        
    print(f'Training loss : {batch_loss/len(trainloader)}')  

In [38]:
# accuracy of the model

# certain layers like dropout or batch normalization behave differently during evaluation
model.eval()

# stop gradients - inference or testing doesn't need autograd
with torch.no_grad():
    num_correct = 0
    total = 0
    
    # set trace
    for batch, (images, labels) in enumerate(testloader, 1):
        logps = model(images)
        output = torch.exp(logps)
        
        # highest probability of each image
        pred = torch.argmax(output, 1)
        print("pred" , pred)
        print("labels ", labels)
        total += labels.size(0)
        print("total: ", total)
        num_correct += (pred == labels).sum().item()
        print("num_correct ", num_correct)
        
        if batch == 5:
            break

print(f'Accuracy of the model on {total} test images: {num_correct * 100 / total}% ')

pred tensor([3, 8, 8, 8, 6, 6, 1, 6, 3, 1, 0, 9, 5, 3, 9, 8, 5, 7, 8, 6, 7, 0, 4, 1,
        5, 2, 3, 2, 9, 6, 6, 5, 4, 3, 9, 3, 4, 1, 9, 5, 2, 6, 3, 6, 0, 9, 3, 8,
        4, 6, 9, 8, 0, 3, 8, 8, 7, 3, 3, 3, 4, 3, 6, 3])
labels  tensor([3, 8, 8, 0, 6, 6, 1, 6, 3, 1, 0, 9, 5, 7, 9, 8, 5, 7, 8, 6, 7, 0, 4, 9,
        5, 2, 4, 0, 9, 6, 6, 5, 4, 5, 9, 2, 4, 1, 9, 5, 4, 6, 5, 6, 0, 9, 3, 9,
        7, 6, 9, 8, 0, 3, 8, 8, 7, 7, 4, 6, 7, 3, 6, 3])
total:  64
num_correct  49
pred tensor([6, 6, 1, 0, 4, 7, 8, 6, 8, 8, 1, 2, 9, 5, 5, 8, 8, 1, 1, 7, 3, 2, 2, 7,
        8, 8, 8, 3, 8, 6, 4, 6, 6, 0, 0, 7, 4, 4, 6, 3, 1, 1, 2, 6, 8, 2, 4, 0,
        2, 2, 1, 3, 0, 4, 6, 7, 8, 3, 1, 2, 8, 2, 8, 3])
labels  tensor([6, 2, 1, 2, 3, 7, 2, 6, 8, 8, 0, 2, 9, 3, 3, 8, 8, 1, 1, 7, 2, 5, 2, 7,
        8, 9, 0, 3, 8, 6, 4, 6, 6, 0, 0, 7, 4, 5, 6, 3, 1, 1, 3, 6, 8, 7, 4, 0,
        6, 2, 1, 3, 0, 4, 2, 7, 8, 3, 1, 2, 8, 0, 8, 3])
total:  128
num_correct  96
pred tensor([3, 2, 4, 1, 8, 9, 1, 2, 9, 7, 2, 1, 2,

In [None]:
for i in range(24,31):
  model.features[i].requires_grad = True

In [None]:
from torch.optim import Adam

lr = 3e-4
optimizer = Adam([
    { 'params': model.features[24].parameters(), 'lr': lr},
    { 'params': model.features[26].parameters(), 'lr': lr},
    { 'params': model.features[28].parameters(), 'lr': lr},
    { 'params': model.classifier[0].parameters(), 'lr': lr},
    { 'params': model.classifier[3].parameters(), 'lr': lr},
    { 'params': model.classifier[6].parameters(), 'lr': lr}
    ], lr=lr)

In [None]:
num_epochs = 3
batch_loss = 0
cum_epoch_loss = 0

for e in range(num_epochs):
  cum_epoch_loss = 0
  
  for batch, (images, labels) in enumerate(trainloader,1):
    images = images.to(device)
    labels = labels.to(device)

    optimizer.zero_grad()
    logps = model(images)
    loss = criterion(logps, labels)
    loss.backward()
    optimizer.step()
    
    batch_loss += loss.item()
    print(f'Epoch({e}/{num_epochs} : Batch number({batch}/{len(trainloader)}) : Batch loss : {loss.item()}')
    
  print(f'Training loss : {batch_loss/len(trainloader)}')  
    

In [None]:
model.eval()
with torch.no_grad():
    num_correct = 0
    total = 0

    #set_trace()
    for batch, (images, labels) in enumerate(testloader,1):
        
        logps = model(images)
        output = torch.exp(logps)
        
        pred = torch.argmax(output, 1)
        total += labels.size(0)
        num_correct += (pred == labels).sum().item()
        print(f'Batch ({batch}/{len(testloader)})')
        
        if batch == 5:
          break

    print(f'Accuracy of the model on {total} test images: {num_correct * 100 / total}% ')