<a href="https://colab.research.google.com/github/Daivar/Deep_Learning_Models/blob/main/Transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import copy
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

import torchvision
from torchvision import datasets,transforms 

In [13]:
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

In [14]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = mean, std = std)
])

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
project_name = 'chest X-ray'

In [16]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = mean, std = std)
])

In [17]:
test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [18]:
data_dir = 'datasets/cifar10/train'
batch_size = 8
num_workers = 2

In [19]:
trainset = datasets.CIFAR10(root=data_dir,
                            train=True,
                            download=True,
                            transform=train_transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to datasets/cifar10/train/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting datasets/cifar10/train/cifar-10-python.tar.gz to datasets/cifar10/train


In [20]:
testset = datasets.CIFAR10(root=data_dir,
                           train=False,
                           download=True,
                           transform=test_transform)

Files already downloaded and verified


In [21]:
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=num_workers)

In [23]:
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers)

In [24]:
dataloaders = {
    'train': trainloader,
    'test': testloader
}

In [25]:
dataset_sizes = { 'train': len(trainloader), 'test': len(testloader) }

In [26]:
dataset_sizes # 1250 * 8 = 10000 and 6250 * 8 = 50000

{'test': 1250, 'train': 6250}

In [27]:
class_names = trainset.classes
print(class_names)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [28]:
# Load the pretrained model, Resnet18

from torchvision import models

In [29]:
model = models.resnet18(pretrained=True) # pretrained=False # set to false, when you only want to use the architecture

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [None]:
# Freeze and replace
dir(model)

In [None]:
dir(list(model.parameters())[0])

In [32]:
# Freezing in Pytorch: specify that the model params do not need gradients
for param in model.parameters():
    param.requires_grad = False

In [None]:
for param in model.parameters():
    print(f'{param.name} : {param.requires_grad}')

In [34]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [35]:
model.layer4[0]

BasicBlock(
  (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (downsample): Sequential(
    (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [36]:
# take the input feature count of the fully-connected layer (fc)
# ... why we need that? Because in the subsequent step we are going to 
# ... replace this layer, so we need to know how many input features it has
num_ftrs = model.fc.in_features
num_ftrs

512

In [37]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [38]:
# we are going to classify images into 10, not 1000 catgories
# It performs a linear transformation: https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear
model.fc = nn.Linear(num_ftrs, 10)

In [61]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [48]:
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [54]:
lossf            = nn.CrossEntropyLoss()
optimizer_ft     = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
epochs = 1

In [55]:
t = torch.randn(1, requires_grad=True)
t

tensor([-0.9093], requires_grad=True)

In [56]:
model.fc

Linear(in_features=512, out_features=10, bias=True)

In [57]:
optimizer = optim.SGD([t], lr=0.001)
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=10)
for epoch in range(0, 30):
    optimizer.step()
    exp_lr_scheduler.step()
    print('Epoch {}, lr {}'.format(epoch, optimizer.param_groups[0]['lr']))

Epoch 0, lr 0.001
Epoch 1, lr 0.001
Epoch 2, lr 0.001
Epoch 3, lr 0.001
Epoch 4, lr 0.001
Epoch 5, lr 0.001
Epoch 6, lr 0.001
Epoch 7, lr 0.001
Epoch 8, lr 0.001
Epoch 9, lr 0.01
Epoch 10, lr 0.01
Epoch 11, lr 0.01
Epoch 12, lr 0.01
Epoch 13, lr 0.01
Epoch 14, lr 0.01
Epoch 15, lr 0.01
Epoch 16, lr 0.01
Epoch 17, lr 0.01
Epoch 18, lr 0.01
Epoch 19, lr 0.1
Epoch 20, lr 0.1
Epoch 21, lr 0.1
Epoch 22, lr 0.1
Epoch 23, lr 0.1
Epoch 24, lr 0.1
Epoch 25, lr 0.1
Epoch 26, lr 0.1
Epoch 27, lr 0.1
Epoch 28, lr 0.1
Epoch 29, lr 1.0


In [None]:
model.state_dict()

In [59]:
def train_model(model, lossf, optimizer, scheduler, num_epochs):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        step = 0
        
        for phase in ['train', 'test']:
            # model.train() is like the opposite of model.eval() - layers like dropout, batchnorm are switched on
            model.train(True) if phase == 'train' else model.train(False)
            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = lossf(outputs, labels)
                step += 1
                if step % 500 == 0:
                    print('Epoch: {} Loss: {:.4f},  Step: {}'.format(epoch, loss.item(), step))
                
                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()   # perform backprop
                    optimizer.step()  # updates the weigths in the model from the gradients calculated
                    scheduler.step()  # inform LR scheduler that we completed one step, so it can take appropriate action

                # statistics
                running_loss += loss.item() * inputs.size(0) # 8 loss [ 10.1, 10.1, ... , 10.1]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase] 
            # dataset_sizes[phase] * batch_size ---> will be 6250 * 8 for the train dataset i.e. entire train dataset
            epoch_acc = running_corrects.double() / (dataset_sizes[phase] * batch_size)

            print('{} Loss: {:.4f} Acc: {:.4f} '.format(phase, epoch_loss, epoch_acc))

            # if we note, that we are inside a loop, then  
            # ... this is the max() algorithm with additional operation of deep copy
            # ... good argument why we still need to know how to perform max(), min() 
            # ... and so on using loops and branches
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        print()
    
    print('Training complete')
    print('Best test Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model

In [62]:
model = train_model(model, lossf, optimizer_ft, exp_lr_scheduler, num_epochs=epochs)

Epoch 0/0
----------
Epoch: 0 Loss: 1.7172,  Step: 500
Epoch: 0 Loss: 2.6325,  Step: 1000
Epoch: 0 Loss: 1.8524,  Step: 1500
Epoch: 0 Loss: 1.1489,  Step: 2000
Epoch: 0 Loss: 0.6852,  Step: 2500
Epoch: 0 Loss: 2.3514,  Step: 3000
Epoch: 0 Loss: 1.8246,  Step: 3500
Epoch: 0 Loss: 0.8802,  Step: 4000
Epoch: 0 Loss: 1.1198,  Step: 4500
Epoch: 0 Loss: 2.1057,  Step: 5000
Epoch: 0 Loss: 1.0947,  Step: 5500
Epoch: 0 Loss: 1.4518,  Step: 6000
train Loss: 11.0811 Acc: 0.5253 
Epoch: 0 Loss: 0.7788,  Step: 6500
Epoch: 0 Loss: 0.7005,  Step: 7000
Epoch: 0 Loss: 0.6376,  Step: 7500
test Loss: 6.4685 Acc: 0.7238 

Training complete
Best test Acc: 0.723800
