# Transfer Learning

These two major transfer learning scenarios look as follows:



1.   Finetuning the convnet: Instead of random initializaion, we initialize the network with a pretrained network, like the one that is trained on imagenet 1000 dataset. Rest of the training looks as usual.
2.   ConvNet as fixed feature extractor: Here, we will freeze the weights for all of the network except that of the final fully connected layer. This last fully connected layer is replaced with a new one with random weights and only this layer is trained.


In [4]:
!pip3 install torch torchvision

Collecting torch
[?25l  Downloading https://files.pythonhosted.org/packages/49/0e/e382bcf1a6ae8225f50b99cc26effa2d4cc6d66975ccf3fa9590efcbedce/torch-0.4.1-cp36-cp36m-manylinux1_x86_64.whl (519.5MB)
[K    100% |████████████████████████████████| 519.5MB 33kB/s 
tcmalloc: large alloc 1073750016 bytes == 0x593ca000 @  0x7f667e5332a4 0x594e17 0x626104 0x51190a 0x4f5277 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x510c78 0x5119bd 0x4f5277 0x4f3338 0x510fb0 0x5119bd 0x4f6070 0x4f3338 0x510fb0 0x5119bd 0x4f6070
[?25hCollecting torchvision
[?25l  Downloading https://files.pythonhosted.org/packages/ca/0d/f00b2885711e08bd71242ebe7b96561e6f6d01fdb4b9dcf4d37e2e13c5e1/torchvision-0.2.1-py2.py3-none-any.whl (54kB)
[K    100% |████████████████████████████████| 61kB 21.8MB/s 
Collecting pillow>=4.1.1 (from torchvision)
[?25l  Downloading https://files.pythonhosted.org/packages/62/94/5430ebaa83f91cc7a9f687f

In [0]:
!wget https://dl.dropboxusercontent.com/s/b3khjfddz5wpvai/alzhdset.zip?dl=0 -O alzhdset.zip;
!unzip alzhdset.zip;
!mv "Alzheimer's Dataset" alzhdset
!ls

In [0]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()   # interactive mode

In [0]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [0]:
train_data = datasets.ImageFolder("./alzhdset/Train",transform=data_transforms['train'])
test_data = datasets.ImageFolder("./alzhdset/Test",transform=data_transforms['test'])

In [0]:
dataset_sizes = {'train': len(train_data), 'test': len(test_data)}

In [0]:
batch_size = 64
n_iters = 3000
num_epochs = int(n_iters / (5121/batch_size))

train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False)

In [8]:
class_names = train_data.classes
print(class_names)

['MildDemented', 'ModerateDemented', 'NonDemented', 'VeryMildDemented']


# Using ResNet18

In [0]:
resnet18 = models.resnet18(pretrained=True)

In [10]:
print(resnet18)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

The last fc layer outputs 1000 features but we need only 4, so gotta modify it.

In [0]:
num_ftrs = resnet18.fc.in_features
resnet18.fc = nn.Linear(num_ftrs, 4)

In [12]:
print(resnet18)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [0]:
resnet18 = resnet18.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet18.parameters(), lr=0.001, momentum=0.9)

In [0]:
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [0]:
dataloaders = {
    'train': train_loader,
    'test': test_loader
}
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and testing phase
        for phase in ['train', 'test']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.cuda()
                labels = labels.cuda()

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [18]:
resnet18 = train_model(resnet18, criterion, optimizer, exp_lr_scheduler,
                       num_epochs=30)

Epoch 0/29
----------
train Loss: 0.8030 Acc: 0.6233
test Loss: 0.8547 Acc: 0.6114

Epoch 1/29
----------
train Loss: 0.7929 Acc: 0.6290
test Loss: 0.8332 Acc: 0.6013

Epoch 2/29
----------
train Loss: 0.7869 Acc: 0.6309
test Loss: 0.8378 Acc: 0.6106

Epoch 3/29
----------
train Loss: 0.7928 Acc: 0.6259
test Loss: 0.8340 Acc: 0.5981

Epoch 4/29
----------
train Loss: 0.7809 Acc: 0.6405
test Loss: 0.8510 Acc: 0.6091

Epoch 5/29
----------
train Loss: 0.7799 Acc: 0.6384
test Loss: 0.8368 Acc: 0.6130

Epoch 6/29
----------
train Loss: 0.7892 Acc: 0.6284
test Loss: 0.8298 Acc: 0.5997

Epoch 7/29
----------
train Loss: 0.7759 Acc: 0.6413
test Loss: 0.8427 Acc: 0.5848

Epoch 8/29
----------
train Loss: 0.7686 Acc: 0.6384
test Loss: 0.8467 Acc: 0.6114

Epoch 9/29
----------
train Loss: 0.7747 Acc: 0.6335
test Loss: 0.8362 Acc: 0.5966

Epoch 10/29
----------
train Loss: 0.7791 Acc: 0.6376
test Loss: 0.8309 Acc: 0.6036

Epoch 11/29
----------
train Loss: 0.7772 Acc: 0.6364
test Loss: 0.8267 Acc

# ConvNet(VGG16) as fixed feature extractor

Here, we need to freeze all the network except the final layer. We need to set requires_grad == False to freeze the parameters so that the gradients are not computed in backward().

In [0]:
model_conv = torchvision.models.vgg16_bn(pretrained=True)

In [0]:
# Alter last layer to output 4 features.
num_feats = (model_conv.classifier[6].in_features)
features = list(model_conv.classifier.children())
features[-1] = nn.Linear(in_features=num_feats, out_features=4, bias=True)
model_conv.classifier = nn.Sequential(*features)

In [13]:
print(model_conv)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3)

In [0]:
for param in model_conv.features.parameters():
    param.requires_grad = False

In [16]:
# Verifying which are freezed and which aren't
for name, child in model_conv.named_children():
  for name_2, params in child.named_parameters():
    print(name_2, params.requires_grad)

0.weight False
0.bias False
1.weight False
1.bias False
3.weight False
3.bias False
4.weight False
4.bias False
7.weight False
7.bias False
8.weight False
8.bias False
10.weight False
10.bias False
11.weight False
11.bias False
14.weight False
14.bias False
15.weight False
15.bias False
17.weight False
17.bias False
18.weight False
18.bias False
20.weight False
20.bias False
21.weight False
21.bias False
24.weight False
24.bias False
25.weight False
25.bias False
27.weight False
27.bias False
28.weight False
28.bias False
30.weight False
30.bias False
31.weight False
31.bias False
34.weight False
34.bias False
35.weight False
35.bias False
37.weight False
37.bias False
38.weight False
38.bias False
40.weight False
40.bias False
41.weight False
41.bias False
0.weight True
0.bias True
3.weight True
3.bias True
6.weight True
6.bias True


In [0]:

model_conv = model_conv.cuda()

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(model_conv.classifier.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)


In [19]:
model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=10)

Epoch 0/9
----------
train Loss: 0.9463 Acc: 0.5518
test Loss: 0.9295 Acc: 0.5575

Epoch 1/9
----------
train Loss: 0.9384 Acc: 0.5571
test Loss: 0.9073 Acc: 0.5841

Epoch 2/9
----------
train Loss: 0.9317 Acc: 0.5524
test Loss: 0.9044 Acc: 0.5848

Epoch 3/9
----------
train Loss: 0.9213 Acc: 0.5573
test Loss: 0.8909 Acc: 0.5919

Epoch 4/9
----------
train Loss: 0.9097 Acc: 0.5714
test Loss: 0.9047 Acc: 0.5754

Epoch 5/9
----------
train Loss: 0.9112 Acc: 0.5614
test Loss: 0.8968 Acc: 0.5856

Epoch 6/9
----------
train Loss: 0.9068 Acc: 0.5651
test Loss: 0.8864 Acc: 0.5864

Epoch 7/9
----------
train Loss: 0.8993 Acc: 0.5731
test Loss: 0.9063 Acc: 0.5809

Epoch 8/9
----------
train Loss: 0.9174 Acc: 0.5626
test Loss: 0.8902 Acc: 0.5856

Epoch 9/9
----------
train Loss: 0.8998 Acc: 0.5681
test Loss: 0.8980 Acc: 0.5848

Training complete in 15m 26s
Best val Acc: 0.591869


# Binary Classification

In [21]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [25]:
!ls ./drive/"My Drive"/dataset

test  train


In [7]:
cd ./drive/"My Drive"/dataset

/content/drive/My Drive/dataset


In [8]:
!ls

test  train


In [0]:
data_transforms = {
    'train': transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [0]:
train_data = datasets.ImageFolder("./train",transform=data_transforms['train'])
test_data = datasets.ImageFolder("./test",transform=data_transforms['test'])

In [0]:
dataset_sizes = {'train': len(train_data), 'test': len(test_data)}

In [0]:
batch_size = 64
n_iters = 3000
num_epochs = int(n_iters / (5121/batch_size))

train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False)

In [60]:
class_names = train_data.classes
print(class_names)

['ALZ', 'NALZ']


In [0]:
# Using ResNet.... taking forever... will go with VGG

In [0]:
# Using VGG as feature extractor... that's taking even longer... better go woth resnet as feature extractor. I'm doomed! :(

In [0]:
resnet18 = models.resnet18(pretrained=True)

In [0]:
for param in resnet18.parameters():
    param.requires_grad = False

In [0]:
num_ftrs = resnet18.fc.in_features
resnet18.fc = nn.Linear(num_ftrs, 2)

In [50]:
for name, child in resnet18.named_children():
  for name_2, params in child.named_parameters():
    print(name_2, params.requires_grad)

weight False
weight False
bias False
0.conv1.weight False
0.bn1.weight False
0.bn1.bias False
0.conv2.weight False
0.bn2.weight False
0.bn2.bias False
1.conv1.weight False
1.bn1.weight False
1.bn1.bias False
1.conv2.weight False
1.bn2.weight False
1.bn2.bias False
0.conv1.weight False
0.bn1.weight False
0.bn1.bias False
0.conv2.weight False
0.bn2.weight False
0.bn2.bias False
0.downsample.0.weight False
0.downsample.1.weight False
0.downsample.1.bias False
1.conv1.weight False
1.bn1.weight False
1.bn1.bias False
1.conv2.weight False
1.bn2.weight False
1.bn2.bias False
0.conv1.weight False
0.bn1.weight False
0.bn1.bias False
0.conv2.weight False
0.bn2.weight False
0.bn2.bias False
0.downsample.0.weight False
0.downsample.1.weight False
0.downsample.1.bias False
1.conv1.weight False
1.bn1.weight False
1.bn1.bias False
1.conv2.weight False
1.bn2.weight False
1.bn2.bias False
0.conv1.weight False
0.bn1.weight False
0.bn1.bias False
0.conv2.weight False
0.bn2.weight False
0.bn2.bias False
0

In [0]:
resnet18 = resnet18.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet18.parameters(), lr=0.001, momentum=0.9)

In [0]:
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [66]:
resnet18 = train_model(resnet18, criterion, optimizer,
                         exp_lr_scheduler, num_epochs=15)

Epoch 0/14
----------
train Loss: 0.5387 Acc: 0.7112
test Loss: 0.6026 Acc: 0.6658

Epoch 1/14
----------
train Loss: 0.5360 Acc: 0.7144
test Loss: 0.6168 Acc: 0.6603

Epoch 2/14
----------
train Loss: 0.5353 Acc: 0.7142
test Loss: 0.6024 Acc: 0.6667

Epoch 3/14
----------
train Loss: 0.5330 Acc: 0.7151
test Loss: 0.6020 Acc: 0.6585

Epoch 4/14
----------
train Loss: 0.5326 Acc: 0.7174
test Loss: 0.5990 Acc: 0.6639

Epoch 5/14
----------
train Loss: 0.5308 Acc: 0.7167
test Loss: 0.6003 Acc: 0.6703

Epoch 6/14
----------
train Loss: 0.5332 Acc: 0.7177
test Loss: 0.6015 Acc: 0.6658

Epoch 7/14
----------
train Loss: 0.5327 Acc: 0.7156
test Loss: 0.6042 Acc: 0.6530

Epoch 8/14
----------
train Loss: 0.5344 Acc: 0.7236
test Loss: 0.6048 Acc: 0.6539

Epoch 9/14
----------
train Loss: 0.5314 Acc: 0.7195
test Loss: 0.6011 Acc: 0.6530

Epoch 10/14
----------
train Loss: 0.5312 Acc: 0.7158
test Loss: 0.6004 Acc: 0.6667

Epoch 11/14
----------
train Loss: 0.5331 Acc: 0.7181
test Loss: 0.6003 Acc



---



In [1]:
# # memory footprint support libraries/code
# !ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
# !pip install gputil
# !pip install psutil
# !pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

Gen RAM Free: 12.9 GB  | Proc size: 137.5 MB
GPU RAM Free: 11441MB | Used: 0MB | Util   0% | Total 11441MB
