In [1]:
# import libraries
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler

import numpy as np
from tqdm import tqdm

from torch.utils.tensorboard import SummaryWriter
from utils import device, get_num_correct
from vgg16modified import Network

OK CUDA was avaialbe.
cuda


In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('OK CUDA was avaialbe.')
else:
    device = torch.device('cpu')
print(device)

OK CUDA was avaialbe.
cuda


In [3]:
# declare the transforms
data_transforms = {
    'train': transforms.Compose([
        # add augmentations
        transforms.ColorJitter(brightness=0.25, saturation=0.1),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ]),
    'test': transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])
}

In [4]:
# choose the training and test datasets
print(f'- Downlaoding training set')
train_set = torchvision.datasets.CIFAR10('ds/train/', train=True, download=True, transform=data_transforms['train'])
print(f'- Downloading testing set')
test_set = torchvision.datasets.CIFAR10('ds/test/', train=False, download=True, transform=data_transforms['test'])

- Downlaoding training set
Files already downloaded and verified
- Downloading testing set
Files already downloaded and verified


In [5]:
batch_size = 256
valid_size = 0.5  # percentage of test_set to be used as validation

# obtain training indices that will be used for validation
num_test = len(test_set)
indices = list(range(num_test))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_test))
test_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
valid_sampler = SubsetRandomSampler(valid_idx)
test_sampler = SubsetRandomSampler(test_idx)

# prepare the data loaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=1)
valid_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, sampler=valid_sampler, num_workers=1)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, sampler=test_sampler, num_workers=1)

In [6]:
vgg16 = torchvision.models.vgg16(pretrained=True)
vgg16



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [7]:
# replace the vgg16 classifier
model = Network(vgg16)
model

Network(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilati

In [8]:
# Freeze Layers
freeze_features = 26
freeze_classifier = 0
for layer_num, child in enumerate(model.features.children()):
    if layer_num < freeze_features:
        for param in child.parameters():
            param.requires_grad_(False)
    else:
        print(f'Freezed: {child}')

Freezed: Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Freezed: ReLU(inplace=True)
Freezed: Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Freezed: ReLU(inplace=True)
Freezed: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)


In [9]:
for layer_num, child in enumerate(model.classifier.children()):
    if layer_num < freeze_classifier:
        for param in child.parameters():
            param.requires_grad_(False)
    else:
        print(f'Freezed: {child}')

Freezed: Linear(in_features=512, out_features=256, bias=True)
Freezed: ReLU(inplace=True)
Freezed: Linear(in_features=256, out_features=128, bias=True)
Freezed: ReLU(inplace=True)
Freezed: Linear(in_features=128, out_features=10, bias=True)


In [10]:
model.to(device)

criterion = nn.CrossEntropyLoss()  # loss function (categorical cross-entropy)
optimizer = optim.SGD(
    [      # parameters which need optimization
        {'params':model.features[19:].parameters(), 'lr':0.001},
        {'params':model.classifier.parameters()}
    ], lr=0.01, momentum=0.9, weight_decay=1e-3)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=1/3, patience=5, verbose=True) # lr scheduler

comment = f'-transferlr_vgg16'  # will be used for naming the run
tb = SummaryWriter(comment=comment)

# initialize tracker for minimum validation loss
valid_loss_min = np.Inf  # set initial minimum to infinity
num_epochs = 30  # number of epochs used for training

for epoch in range(num_epochs):
    train_loss, train_correct = 0, 0  # wil be used to track the running loss and correct
    ###################
    # train the model #
    ###################
    train_loop = tqdm(train_loader)
    model.train()  # set the model to train mode
    for batch in train_loop:
        images, labels = batch[0].to(device), batch[1].to(device)  # load the batch to the available device (cpu/gpu)
        preds = model(images)  # forward pass
        loss = criterion(preds, labels)  # calculate loss
        optimizer.zero_grad()  # clear accumulated gradients from the previous pass
        loss.backward()  # backward pass
        optimizer.step()  # perform a single optimization step

        train_loss += loss.item() * labels.size(0) # update the running loss
        train_correct += get_num_correct(preds, labels)  # update running num correct

        train_loop.set_description(f'Epoch [{epoch+1:2d}/{num_epochs}]')
        train_loop.set_postfix(loss=loss.item(), acc=train_correct/len(train_set))

    # add train loss and train accuracy for the current epoch to tensorboard
    tb.add_scalar('Train Loss', train_loss, epoch)
    tb.add_scalar('Train Accuracy', train_correct/len(train_set), epoch)

    model.eval()  # set the model to evaluation mode
    with torch.no_grad():  # turn off grad tracking, as we don't need gradients for validation
        valid_loss, valid_correct = 0, 0  # will be used to track the running validation loss and correct
        ######################
        # validate the model #
        ######################
        for batch in valid_loader:
            images, labels = batch[0].to(device), batch[1].to(device)  # load the batch to the available device
            preds = model(images)  # forward pass
            loss = criterion(preds, labels)  # calculate the loss

            valid_loss += loss.item() * labels.size(0)  # update the running loss
            valid_correct += get_num_correct(preds, labels)  # update running num correct
            

        # add validation loss and validation accuracy for the current epoch to tensorboard
        tb.add_scalar('Validation Loss', valid_loss, epoch)
        tb.add_scalar('Validation Accuracy', valid_correct/len(valid_loader.sampler), epoch)

        # print training/validation statistics
        # calculate average loss over an epoch
        train_loss = train_loss/len(train_set)
        valid_loss = valid_loss/len(valid_loader.sampler)
        train_loop.write(f'\t\tAvg training loss: {train_loss:.6f}\tAvg validation loss: {valid_loss:.6f}')
        scheduler.step(valid_loss)

        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            train_loop.write(f'\t\tvalid_loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f})  saving model...')
            torch.save(model.state_dict(), f'./models/model{comment}.pth')
            valid_loss_min = valid_loss


        test_loss, test_correct = 0, 0  # will be used to track the running test loss and correct
        ##################
        # test the model #
        ##################
        for batch in test_loader:
            images, labels = batch[0].to(device), batch[1].to(device)  # load the batch to the available device
            preds = model(images)  # forward pass
            loss = criterion(preds, labels)  # calculate the loss

            test_loss += loss.item() * labels.size(0)  # update the running loss
            test_correct += get_num_correct(preds, labels)  # update running num correct

        # add test loss and test accuracy for the current epoch to tensorboard
        tb.add_scalar('Test Loss', test_loss, epoch)
        tb.add_scalar('Test Accuracy', test_correct/len(test_loader.sampler), epoch)


Epoch [ 1/30]: 100%|██████████| 196/196 [00:29<00:00,  6.75it/s, acc=0.512, loss=1.1]  


		Avg training loss: 1.396535	Avg validation loss: 0.991712
		valid_loss decreased (inf --> 0.991712)  saving model...


Epoch [ 2/30]: 100%|██████████| 196/196 [00:26<00:00,  7.26it/s, acc=0.635, loss=1.14] 


		Avg training loss: 1.034078	Avg validation loss: 0.911661
		valid_loss decreased (0.991712 --> 0.911661)  saving model...


Epoch [ 3/30]: 100%|██████████| 196/196 [00:30<00:00,  6.42it/s, acc=0.661, loss=1.02] 


		Avg training loss: 0.955677	Avg validation loss: 0.835294
		valid_loss decreased (0.911661 --> 0.835294)  saving model...


Epoch [ 4/30]: 100%|██████████| 196/196 [00:32<00:00,  6.01it/s, acc=0.685, loss=1.08] 


		Avg training loss: 0.896698	Avg validation loss: 0.782570
		valid_loss decreased (0.835294 --> 0.782570)  saving model...


Epoch [ 5/30]: 100%|██████████| 196/196 [00:34<00:00,  5.70it/s, acc=0.698, loss=0.911]


		Avg training loss: 0.857689	Avg validation loss: 0.764032
		valid_loss decreased (0.782570 --> 0.764032)  saving model...


Epoch [ 6/30]: 100%|██████████| 196/196 [00:32<00:00,  5.99it/s, acc=0.705, loss=0.727]


		Avg training loss: 0.834442	Avg validation loss: 0.753213
		valid_loss decreased (0.764032 --> 0.753213)  saving model...


Epoch [ 7/30]: 100%|██████████| 196/196 [00:31<00:00,  6.28it/s, acc=0.716, loss=0.894]


		Avg training loss: 0.800323	Avg validation loss: 0.731767
		valid_loss decreased (0.753213 --> 0.731767)  saving model...


Epoch [ 8/30]: 100%|██████████| 196/196 [00:32<00:00,  5.98it/s, acc=0.726, loss=0.766]


		Avg training loss: 0.776558	Avg validation loss: 0.716521
		valid_loss decreased (0.731767 --> 0.716521)  saving model...


Epoch [ 9/30]: 100%|██████████| 196/196 [00:31<00:00,  6.17it/s, acc=0.726, loss=0.742]


		Avg training loss: 0.770554	Avg validation loss: 0.707563
		valid_loss decreased (0.716521 --> 0.707563)  saving model...


Epoch [10/30]: 100%|██████████| 196/196 [00:35<00:00,  5.48it/s, acc=0.736, loss=0.59] 


		Avg training loss: 0.745659	Avg validation loss: 0.694599
		valid_loss decreased (0.707563 --> 0.694599)  saving model...


Epoch [11/30]: 100%|██████████| 196/196 [00:31<00:00,  6.29it/s, acc=0.74, loss=0.795] 


		Avg training loss: 0.731402	Avg validation loss: 0.678858
		valid_loss decreased (0.694599 --> 0.678858)  saving model...


Epoch [12/30]: 100%|██████████| 196/196 [00:31<00:00,  6.20it/s, acc=0.747, loss=0.713]


		Avg training loss: 0.716510	Avg validation loss: 0.682861


Epoch [13/30]: 100%|██████████| 196/196 [00:30<00:00,  6.36it/s, acc=0.75, loss=0.586] 


		Avg training loss: 0.704470	Avg validation loss: 0.668189
		valid_loss decreased (0.678858 --> 0.668189)  saving model...


Epoch [14/30]: 100%|██████████| 196/196 [00:37<00:00,  5.29it/s, acc=0.753, loss=0.782]


		Avg training loss: 0.695582	Avg validation loss: 0.671861


Epoch [15/30]: 100%|██████████| 196/196 [00:32<00:00,  6.08it/s, acc=0.758, loss=0.609]


		Avg training loss: 0.681115	Avg validation loss: 0.661429
		valid_loss decreased (0.668189 --> 0.661429)  saving model...


Epoch [16/30]: 100%|██████████| 196/196 [00:36<00:00,  5.42it/s, acc=0.762, loss=0.487]


		Avg training loss: 0.671349	Avg validation loss: 0.658216
		valid_loss decreased (0.661429 --> 0.658216)  saving model...


Epoch [17/30]: 100%|██████████| 196/196 [00:33<00:00,  5.81it/s, acc=0.764, loss=0.589]


		Avg training loss: 0.667157	Avg validation loss: 0.642292
		valid_loss decreased (0.658216 --> 0.642292)  saving model...


Epoch [18/30]: 100%|██████████| 196/196 [00:30<00:00,  6.41it/s, acc=0.768, loss=0.766]


		Avg training loss: 0.651778	Avg validation loss: 0.638804
		valid_loss decreased (0.642292 --> 0.638804)  saving model...


Epoch [19/30]: 100%|██████████| 196/196 [00:30<00:00,  6.34it/s, acc=0.774, loss=0.613]


		Avg training loss: 0.640824	Avg validation loss: 0.637800
		valid_loss decreased (0.638804 --> 0.637800)  saving model...


Epoch [20/30]: 100%|██████████| 196/196 [00:30<00:00,  6.42it/s, acc=0.777, loss=0.499]


		Avg training loss: 0.629878	Avg validation loss: 0.647888


Epoch [21/30]: 100%|██████████| 196/196 [00:30<00:00,  6.39it/s, acc=0.777, loss=0.662]


		Avg training loss: 0.625629	Avg validation loss: 0.634561
		valid_loss decreased (0.637800 --> 0.634561)  saving model...


Epoch [22/30]: 100%|██████████| 196/196 [00:30<00:00,  6.34it/s, acc=0.782, loss=0.676]


		Avg training loss: 0.612757	Avg validation loss: 0.641938


Epoch [23/30]: 100%|██████████| 196/196 [00:29<00:00,  6.60it/s, acc=0.782, loss=0.738]


		Avg training loss: 0.605817	Avg validation loss: 0.648690


Epoch [24/30]: 100%|██████████| 196/196 [00:30<00:00,  6.45it/s, acc=0.786, loss=0.703]


		Avg training loss: 0.600906	Avg validation loss: 0.625585
		valid_loss decreased (0.634561 --> 0.625585)  saving model...


Epoch [25/30]: 100%|██████████| 196/196 [00:31<00:00,  6.18it/s, acc=0.79, loss=0.695] 


		Avg training loss: 0.590871	Avg validation loss: 0.626316


Epoch [26/30]: 100%|██████████| 196/196 [00:27<00:00,  7.07it/s, acc=0.79, loss=0.522] 


		Avg training loss: 0.586805	Avg validation loss: 0.629201


Epoch [27/30]: 100%|██████████| 196/196 [00:29<00:00,  6.63it/s, acc=0.794, loss=0.535]


		Avg training loss: 0.576479	Avg validation loss: 0.629249


Epoch [28/30]: 100%|██████████| 196/196 [00:31<00:00,  6.26it/s, acc=0.798, loss=0.571]


		Avg training loss: 0.564457	Avg validation loss: 0.624567
		valid_loss decreased (0.625585 --> 0.624567)  saving model...


Epoch [29/30]: 100%|██████████| 196/196 [00:31<00:00,  6.29it/s, acc=0.799, loss=0.497]


		Avg training loss: 0.562412	Avg validation loss: 0.618387
		valid_loss decreased (0.624567 --> 0.618387)  saving model...


Epoch [30/30]: 100%|██████████| 196/196 [00:29<00:00,  6.56it/s, acc=0.8, loss=0.458]  


		Avg training loss: 0.554520	Avg validation loss: 0.642175
