In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms

from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


import os
import time
from datetime import datetime 
import os
import copy
import tqdm

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# custom pytorch dataset for the SkinMnistDataset 
class SkinMnistDataset(Dataset):
  '''
  csv_file is the csv_file containing the information
  root_dir is the parent directory/folder containing the images
  transforms specifies the transformation to be applied on the images
  '''

  def __init__(self, csv_file, root_dir, transforms=None):
    self.csv_file = pd.read_csv(csv_file)
    self.root_dir = root_dir
    self.transforms = transforms

  def __len__(self):
    '''
    :return: returns the length of the dataset
    '''
    return len(self.csv_file)
  
  def __getitem__(self, item):
    """
    :param item:it is the the index of the data item to be fetched.
    :return: it returns the specified index dataitem and its corresponding label
    """
    image_path = os.path.join(self.root_dir, self.csv_file['image_path'][item])
    image = Image.open(image_path)
    row = self.csv_file.iloc[item, -3:]
    label = torch.tensor(row)

    if self.transforms:
      image=self.transforms(image)
    return (image, label)


In [5]:
pd.read_csv('/content/drive/MyDrive/final year project/datasets/skin_mnist/final_train.csv')

Unnamed: 0.1,Unnamed: 0,lesion_id,image_id,image_path,label,nv,bkl,mel
0,0,HAM_0006955,ISIC_0025105,ham10000_images_part_1/ISIC_0025105.jpg,mel,0,0,1
1,1,HAM_0001847,ISIC_0024792,ham10000_images_part_1/ISIC_0024792.jpg,mel,0,0,1
2,2,HAM_0002076,ISIC_0024720,ham10000_images_part_1/ISIC_0024720.jpg,nv,1,0,0
3,3,HAM_0004072,ISIC_0032130,ham10000_images_part_2/ISIC_0032130.jpg,nv,1,0,0
4,5,HAM_0007044,ISIC_0024501,ham10000_images_part_1/ISIC_0024501.jpg,nv,1,0,0
...,...,...,...,...,...,...,...,...
1295,1669,HAM_0000553,ISIC_0034061,ham10000_images_part_2/ISIC_0034061.jpg,mel,0,0,1
1296,1670,HAM_0004569,ISIC_0031495,ham10000_images_part_2/ISIC_0031495.jpg,bkl,0,1,0
1297,1672,HAM_0007102,ISIC_0030031,ham10000_images_part_2/ISIC_0030031.jpg,mel,0,0,1
1298,1673,HAM_0003176,ISIC_0028148,ham10000_images_part_1/ISIC_0028148.jpg,mel,0,0,1


In [6]:
# All the required paths as variables
train_csv_file = '/content/drive/MyDrive/final year project/datasets/skin_mnist/final_train.csv'
val_csv_file = '/content/drive/MyDrive/final year project/datasets/skin_mnist/final_val.csv'
test_csv_file = '/content/drive/MyDrive/final year project/datasets/skin_mnist/final_test.csv'

root_dir = '/content/drive/MyDrive/final year project/datasets/skin_mnist'

In [7]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(90),
        transforms.ToTensor(),
        transforms.Normalize([0.6373545 , 0.44605875, 0.46191868], [0.27236816, 0.22500427, 0.24329403])
    ]),
    'test': transforms.Compose([
        transforms.Resize(226),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.6373545 , 0.44605875, 0.46191868], [0.27236816, 0.22500427, 0.24329403])
    ]),
}

In [8]:
train_dataset = SkinMnistDataset(train_csv_file, root_dir, data_transforms['train'])

In [9]:
val_dataset = SkinMnistDataset(val_csv_file, root_dir, data_transforms['test'])

In [10]:
test_dataset = SkinMnistDataset(test_csv_file, root_dir, data_transforms['test'])

In [11]:
len(train_dataset), len(val_dataset)

(1300, 335)

In [12]:
batch_size = 64
shuffle = True
num_classes = 3

In [13]:
dataloaders = {
    'train': DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=shuffle),
    'val': DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=shuffle),
    'test': DataLoader(dataset=test_dataset, batch_size=len(test_dataset))
    }

dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset),
    'test': len(test_dataset)
}

In [14]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.6373545 , 0.44605875, 0.46191868])
    std = np.array([0.27236816, 0.22500427, 0.24329403])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


In [15]:
# pop_mean = []
# pop_std0 = []

# for i, data in enumerate(dataloaders['train'], 0):
#     # shape (batch_size, 3, height, width)
#     numpy_image = data[0].numpy()
    
#     # shape (3,)
#     batch_mean = np.mean(numpy_image, axis=(0,2,3))
#     batch_std0 = np.std(numpy_image, axis=(0,2,3))
    
#     pop_mean.append(batch_mean)
#     pop_std0.append(batch_std0)

# # shape (num_iterations, 3) -> (mean across 0th axis) -> shape (3,)
# pop_mean = np.array(pop_mean).mean(axis=0)
# pop_std0 = np.array(pop_std0).mean(axis=0)

In [None]:
# pop_mean, pop_std0 = [0.6373545 , 0.44605875, 0.46191868], [0.27236816, 0.22500427, 0.24329403] 

## Initializing Pretrained Model

In [16]:
def set_parameter_requires_grad(model, num_freeze):
  for i, param in enumerate(model.parameters()):
    if i >= num_freeze:
      break
    param.requires_grad = False

In [17]:
model_ft = models.vgg19_bn(pretrained=True)
set_parameter_requires_grad(model_ft, 30)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
inputs_size = 224

Downloading: "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth" to /root/.cache/torch/hub/checkpoints/vgg19_bn-c79401a0.pth


  0%|          | 0.00/548M [00:00<?, ?B/s]

In [None]:
# model_ft.load_state_dict(torch.load('./drive/My Drive/ML_Club/Projects/grad_cam/models/vgg19/run4/best_model_vgg19.pt'))

In [18]:
model_ft.parameters

<bound method Module.parameters of VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mo

In [19]:
run = 7
run_path = f'/content/drive/MyDrive/final year project/models/vgg19/run{run}'
if os.path.isdir(run_path):
  print("This run was already completed, please specify a different run if wanna start again")
else: 
  os.makedirs(run_path)
  os.makedirs(os.path.join(run_path, 'checkpoints'))
best_model_path = f'/content/drive/MyDrive/final year project/models/vgg19/run{run}/best_model_vgg19.pt'
checkpoint_dir = f'/content/drive/MyDrive/final year project/models/vgg19/run{run}/checkpoints/'

## Train Loop

In [20]:
def train_model(model, dataloaders, criterion, optimizer, scheduler, start_epoch=0, num_epochs=25, best_acc=0.0, hist={'val_acc': False, 'train_acc': False} ):
    since = time.time()

    val_acc_history = hist['val_acc'] or []
    train_acc_history = hist['train_acc'] or []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = best_acc

    for epoch in range(start_epoch, num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()
                # print('beginning of forwardprop')                
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):

                  # Get model outputs and calculate loss
                  
                  outputs = model(inputs)
                  _, preds = torch.max(outputs, 1)
                  loss = criterion(outputs, torch.max(labels, 1)[1])


                  # print('beginning of backprop')
                  # backward + optimize only if in training phase
                  if phase == 'train':
                      loss.backward()
                      optimizer.step()
                  # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == torch.max(labels, 1)[1].data)
            if phase == 'val':
              # print('LR Decreased')
              print('LR', optimizer.param_groups[0]['lr'])
              scheduler.step(loss)
      
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                print('Saving as best model')
                torch.save( model.state_dict(),
                 best_model_path)
            if phase == 'train':
              train_acc_history.append(epoch_acc)
            if phase == 'val':
              val_acc_history.append(epoch_acc)
              if epoch % 4 == 0:
                print("Saving Checkpoint")
                print("Best Acc", best_acc.item())
                torch.save({
                    "epoch": epoch,
                    "loss": loss,
                    "model_state_dict": model.state_dict(),
                    "best_acc": best_acc,
                    "hist": {'val_acc':val_acc_history, 'train_acc': train_acc_history}
                }, os.path.join(checkpoint_dir, 'Epoch={0:0=3d}.pt'.format(epoch)))

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, {'val_acc':val_acc_history, 'train_acc': train_acc_history}

In [21]:
loss_vs_time = []
checkpoints = os.listdir(path=checkpoint_dir)
for checkpoint_name in checkpoints:
  checkpoint = torch.load(checkpoint_name)
  loss = checkpoint['loss']
  loss_vs_time.append(loss)

## Instatiate Optimizer and pass in only trainable parameters

In [22]:
model_ft = model_ft.to(device)

params_to_update = model_ft.parameters()
print("Parameters to Learn:")

params_to_update = []
for name, param in model_ft.named_parameters():
  if param.requires_grad == True:
    params_to_update.append(param)
    print('\t', name)
print(len(params_to_update))
# optimizer_ft = optim.Adam(params_to_update, lr=0.01)
optimizer_ft = optim.SGD(params_to_update, lr=0.01, momentum=0.9,nesterov=True)
exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(
    optimizer_ft, mode='min',
    factor=0.5, patience=1, verbose=True,
    threshold=0.0001, threshold_mode='rel',
    cooldown=0, min_lr=0, eps=1e-08
)

Parameters to Learn:
	 features.24.weight
	 features.24.bias
	 features.27.weight
	 features.27.bias
	 features.28.weight
	 features.28.bias
	 features.30.weight
	 features.30.bias
	 features.31.weight
	 features.31.bias
	 features.33.weight
	 features.33.bias
	 features.34.weight
	 features.34.bias
	 features.36.weight
	 features.36.bias
	 features.37.weight
	 features.37.bias
	 features.40.weight
	 features.40.bias
	 features.41.weight
	 features.41.bias
	 features.43.weight
	 features.43.bias
	 features.44.weight
	 features.44.bias
	 features.46.weight
	 features.46.bias
	 features.47.weight
	 features.47.bias
	 features.49.weight
	 features.49.bias
	 features.50.weight
	 features.50.bias
	 classifier.0.weight
	 classifier.0.bias
	 classifier.3.weight
	 classifier.3.bias
	 classifier.6.weight
	 classifier.6.bias
40


In [23]:
criterion = nn.CrossEntropyLoss()

num_epochs = 51
best_acc = 0.0
start_epoch = 0
hist = {'val_acc': False, 'train_acc': False}

checkpoints = os.listdir(path=checkpoint_dir)

if len(checkpoints):
  print(f"Getting the latest checkpoint in run{run}: ")
  name = checkpoints[-1]
  print(f"Loading Checkpoint file: {name}")
  checkpoint_path = os.path.join(checkpoint_dir, name)
  checkpoint = torch.load(checkpoint_path)
  best_acc = checkpoint['best_acc']
  model_ft.load_state_dict(checkpoint['model_state_dict']) 
  hist = checkpoint['hist']
  print(f'{name} loaded successfully')
  start_epoch = checkpoint['epoch'] + 1
  print(f"{start_epoch} epochs done already")
  print(f'traning for {num_epochs - start_epoch} more epochs')
else:
  print('training from scratch')

training from scratch


In [26]:
model_ft, hist = train_model(model_ft, dataloaders, criterion, optimizer_ft, 
                              scheduler=exp_lr_scheduler,
                              num_epochs=num_epochs,
                              start_epoch=start_epoch,
                              best_acc=best_acc,
                              hist=hist
                             )

Epoch 0/50
----------
train Loss: 0.6866 Acc: 0.7038
LR 0.01
val Loss: 0.7384 Acc: 0.6896
Saving as best model
Saving Checkpoint
Best Acc 0.6895522388059702

Epoch 1/50
----------
train Loss: 0.5818 Acc: 0.7515
LR 0.01
val Loss: 0.7026 Acc: 0.7164
Saving as best model

Epoch 2/50
----------
train Loss: 0.5653 Acc: 0.7600
LR 0.01
val Loss: 0.6215 Acc: 0.7433
Saving as best model

Epoch 3/50
----------
train Loss: 0.5282 Acc: 0.7746
LR 0.01
val Loss: 0.6689 Acc: 0.7343

Epoch 4/50
----------
train Loss: 0.4617 Acc: 0.8108
LR 0.01
val Loss: 0.6033 Acc: 0.7493
Saving as best model
Saving Checkpoint
Best Acc 0.7492537313432835

Epoch 5/50
----------
train Loss: 0.4486 Acc: 0.8177
LR 0.01
Epoch 00006: reducing learning rate of group 0 to 5.0000e-03.
val Loss: 0.8275 Acc: 0.6925

Epoch 6/50
----------
train Loss: 0.4149 Acc: 0.8277
LR 0.005
val Loss: 0.6425 Acc: 0.7552
Saving as best model

Epoch 7/50
----------
train Loss: 0.3771 Acc: 0.8462
LR 0.005
Epoch 00008: reducing learning rate of gr

In [27]:
stats =  {
  'training_time': '20m 14s',
  'val_acc':  0.8209,
  'train_acc': 0.8468,
  'optmizer': 'SGD',
  'initial_lr': 0.01,
  'layers_frozen': 30,
  'loss_diff': 0.1017,
  'momentum': 0.9,
  'patience': 2,
  'factor': 0.25,
  'text': '''
Epoch 10/50
----------
train Loss: 0.3758 Acc: 0.8468
LR 0.0025
val Loss: 0.4775 Acc: 0.8209
Saving as best model
'''
}


stats_df = pd.DataFrame.from_dict(stats, orient='index')

In [28]:
stats_df.to_csv(os.path.join(run_path, 'best_stats.csv'))

In [29]:
def evaluate(model, dataloader, criterion):
    since = time.time()

    model.eval()   # Set model to evaluate mode

    running_loss = 0.0
    running_corrects = 0

    # Iterate over data.
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
            
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, torch.max(labels, 1)[1])

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == torch.max(labels, 1)[1].data)
  
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_acc = running_corrects.double() / len(dataloader.dataset)

        print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
        time_elapsed = time.time() - since
        print('Inference Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    return model

In [30]:
model_ft = model_ft.to(device)
evaluate(model_ft, dataloaders['test'], nn.CrossEntropyLoss())
print('Test Complete')

Loss: 0.4210 Acc: 0.9000
Inference Time 0m 32s
Test Complete


In [31]:
model_ft.load_state_dict(torch.load(best_model_path))
model_ft.eval()

def inference(dataloaders, criterion):

    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)
        labels = torch.max(labels, 1)[1]
     
        outputs = model_ft(inputs)
        preds = torch.max(outputs, 1)[1]
        loss = criterion(outputs, labels)

        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data)
        
    test_loss = running_loss / len(dataloaders['test'])
    test_acc = running_corrects.double() / dataset_sizes['test']

    print('Test Results:\n Loss: {:.4f} Acc: {:.4f}'.format(test_loss, test_acc))
    return outputs, preds, test_loss, test_acc 

In [32]:
model_ft(test_dataset[0][0].unsqueeze(0).to(device)).shape

torch.Size([1, 3])