In [1]:
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [6]:
# get data mean and std for each channel
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# initialize data transformer
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
    ])

# load and transform data
train = datasets.CIFAR10(root='~/.pytorch/CIFAR10',train=True, download=True,transform=transform)
test = datasets.CIFAR10(root='~/.pytorch/CIFAR10',train=False, transform=transform)

train_loader = DataLoader(train, batch_size=64, shuffle=True)
test_loader = DataLoader(test, batch_size=64, shuffle=False)

Files already downloaded and verified


In [7]:
for images, labels in train_loader:
  print(images.size(), labels.size())
  break

torch.Size([64, 3, 224, 224]) torch.Size([64])


In [8]:
# get pre-trained VGG16 model
model = models.vgg16(pretrained=True)
model

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 246MB/s]


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [9]:
# freeze mosel params
for param in model.parameters():
  param.requires_grad = False

In [10]:
model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [11]:
# unfreeze the classifier layers
for i in range(7):
  model.classifier[0].requires_grad = True

In [12]:
# define new sequential network
new_seq = nn.Sequential(
                      nn.Linear(4096,512),
                      nn.ReLU(),
                      nn.Dropout(0.5),
                      nn.Linear(512,10),
                      nn.LogSoftmax(dim=1)
                      )

In [13]:
# replace the final layer of the classifier with this new sequential model
model.classifier[6] = new_seq

In [14]:
model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Sequential(
    (0): Linear(in_features=4096, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=512, out_features=10, bias=True)
    (4): LogSoftmax(dim=1)
  )
)

In [15]:
for param in model.parameters():
  print(param.requires_grad)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
True
True
True
True


In [18]:
# define criterion for loss and optimizer

from torch.optim import Adam

model = model.to(device)

# only trainable parameters should be passed to optimizer in this case
optimizer = Adam(filter(lambda p:p.requires_grad, model.parameters()))
criterion = nn.NLLLoss()

### Retrain the Model

In [19]:
epochs = 1
batch_loss = 0
cum_epoch_loss = 0

for i in range(epochs):
  cum_epoch_loss = 0

  for batch_n, (X_train, y_train) in enumerate(train_loader,1):
    X_train = X_train.to(device)
    y_train = y_train.to(device)

    optimizer.zero_grad()
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    loss.backward()
    optimizer.step()

    batch_loss += loss.item()
    print(f'Epoch({i}/{epochs}) : Batch no ({batch_n}/{len(train_loader)}) : Batch loss : {loss.item()}')

  print(f'Training loss : {batch_loss/len(train_loader)}')

Epoch(0/1) : Batch no (1/782) : Batch loss : 2.2825655937194824
Epoch(0/1) : Batch no (2/782) : Batch loss : 2.1926777362823486
Epoch(0/1) : Batch no (3/782) : Batch loss : 2.044671058654785
Epoch(0/1) : Batch no (4/782) : Batch loss : 1.8856464624404907
Epoch(0/1) : Batch no (5/782) : Batch loss : 1.7129738330841064
Epoch(0/1) : Batch no (6/782) : Batch loss : 1.5244320631027222
Epoch(0/1) : Batch no (7/782) : Batch loss : 1.2131257057189941
Epoch(0/1) : Batch no (8/782) : Batch loss : 1.3469524383544922
Epoch(0/1) : Batch no (9/782) : Batch loss : 1.2806134223937988
Epoch(0/1) : Batch no (10/782) : Batch loss : 0.928964376449585
Epoch(0/1) : Batch no (11/782) : Batch loss : 1.1841177940368652
Epoch(0/1) : Batch no (12/782) : Batch loss : 1.15919029712677
Epoch(0/1) : Batch no (13/782) : Batch loss : 0.9362459182739258
Epoch(0/1) : Batch no (14/782) : Batch loss : 1.1043065786361694
Epoch(0/1) : Batch no (15/782) : Batch loss : 0.9515876770019531
Epoch(0/1) : Batch no (16/782) : Batch

### Get Accuracy

In [22]:
model.to('cpu')

model.eval()
with torch.no_grad():
    num_correct = 0
    total = 0

    #set_trace()
    for batch_n, (X_test, y_test) in enumerate(test_loader,1):

        y_pred = model(X_test)
        sfmax = torch.exp(y_pred)

        y_pred_class = torch.argmax(sfmax, 1)
        total += y_test.size(0)
        num_correct += (y_pred_class == y_test).sum().item()
        print(f'Batch ({batch_n}/{len(test_loader)})')

        if batch_n == 5:
          break

    print(f'Accuracy of the model on {total} test images: {num_correct * 100 / total}% ')

Batch (1/157)
Batch (2/157)
Batch (3/157)
Batch (4/157)
Batch (5/157)
Accuracy of the model on 320 test images: 80.0% 


### Fine-tuning on the Last CNN block

In [20]:
model.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [23]:
# the last Conv block ranges from 24 to 31, make these trainable
for i in range(24,31):
  model.features[i].requires_grad = True

model = model.to(device)
optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()))

In [24]:
epochs = 3
batch_loss = 0
cum_epoch_loss = 0

for i in range(epochs):
  cum_epoch_loss = 0

  for batch_n, (X_train, y_train) in enumerate(train_loader,1):
    X_train = X_train.to(device)
    y_train = y_train.to(device)

    optimizer.zero_grad()
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    loss.backward()
    optimizer.step()

    batch_loss += loss.item()
    print(f'Epoch({i+1}/{epochs}) : Batch no: ({batch_n}/{len(train_loader)}) : Batch loss : {loss.item()}')

  print(f'Training loss : {batch_loss/len(train_loader)}')

Epoch(1/3) : Batch no: (1/782) : Batch loss : 0.3980860114097595
Epoch(1/3) : Batch no: (2/782) : Batch loss : 0.448984831571579
Epoch(1/3) : Batch no: (3/782) : Batch loss : 0.5425161719322205
Epoch(1/3) : Batch no: (4/782) : Batch loss : 0.5636059641838074
Epoch(1/3) : Batch no: (5/782) : Batch loss : 0.48988616466522217
Epoch(1/3) : Batch no: (6/782) : Batch loss : 0.7176986336708069
Epoch(1/3) : Batch no: (7/782) : Batch loss : 0.7442265748977661
Epoch(1/3) : Batch no: (8/782) : Batch loss : 0.5768059492111206
Epoch(1/3) : Batch no: (9/782) : Batch loss : 0.5499346852302551
Epoch(1/3) : Batch no: (10/782) : Batch loss : 0.3052346706390381
Epoch(1/3) : Batch no: (11/782) : Batch loss : 0.6364123225212097
Epoch(1/3) : Batch no: (12/782) : Batch loss : 0.538742184638977
Epoch(1/3) : Batch no: (13/782) : Batch loss : 0.44893333315849304
Epoch(1/3) : Batch no: (14/782) : Batch loss : 0.5587729811668396
Epoch(1/3) : Batch no: (15/782) : Batch loss : 0.49918895959854126
Epoch(1/3) : Batch

KeyboardInterrupt: ignored

### Get Accuracy

In [25]:
model.to('cpu')

model.eval()
with torch.no_grad():
    num_correct = 0
    total = 0

    for batch_n, (X_test, y_test) in enumerate(test_loader,1):

        y_pred = model(X_test)
        sfmax = torch.exp(y_pred)  # take exp as output is log of softmax

        y_pred_class = torch.argmax(sfmax, 1)
        total += labels.size(0)
        num_correct += (y_pred_class == y_test).sum().item()
        print(f'Batch ({batch_n}/{len(test_loader)})')

        if batch_n == 5:
          break

    print(f'Accuracy of the model on {total} test images: {num_correct * 100 / total}% ')

Batch (1/157)
Batch (2/157)
Batch (3/157)
Batch (4/157)
Batch (5/157)
Accuracy of the model on 320 test images: 84.6875% 
