# Hackathon - Brain Tumor Classification

In [28]:
%load_ext autoreload
%autoreload 2
import os
import sys
import json
import numpy as np
import joblib as jb
import matplotlib.pyplot as plt
from glob import glob

import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision import datasets

from collections import defaultdict
from PIL import Image, ImageFile
from src.utils import train_image_classifier, test_image_classifier
# the following import is required for training to be robust to truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

# check if CUDA is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Dataset Structure

## Image Data Loaders

* Training DataLoader
    * Transforms
    * Normalize
    * Image Augmentation
* Validation DataLoader
    * Transforms
    * Normalize
* Testing DataLoader
    * Transforms
    * Normalize

In [2]:
# define hyper parameters
DATA_DIR = 'data/'
TRAIN = 'train'
VAL = 'valid'
TEST = 'test'
KERNEL_SIZE = (5, 9)
SIGMA = (0.1, 2.5)
SHARPNESS = 2
ROTATION = 30
RESIZE = (256, 256)
CROP = (224, 224)
BATCH_SIZE = 16
EPOCHS = 20
LR = 1e-4

data_transforms = { 
    TRAIN: transforms.Compose([ # define train set augmentations
        transforms.Resize(RESIZE),
        transforms.RandomCrop(CROP),
        transforms.RandomVerticalFlip(), # default .5 probability
        transforms.RandomRotation(ROTATION),
        transforms.RandomHorizontalFlip(),
        transforms.GaussianBlur(KERNEL_SIZE, sigma=SIGMA),
        transforms.RandomAdjustSharpness(sharpness_factor=SHARPNESS), # default .5 probability
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]),
    VAL: transforms.Compose([ # no need to perform any augmentation on the validation data
        transforms.Resize(RESIZE),
        transforms.CenterCrop(CROP),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]),
    TEST: transforms.Compose([ # no need to perform any augmentation on the test data
        transforms.Resize(RESIZE),
        transforms.CenterCrop(CROP),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
}

image_datasets = {
    x: datasets.ImageFolder(
        os.path.join(DATA_DIR, x), 
        transform=data_transforms[x]
    )
    for x in [TRAIN, VAL, TEST]
}

# create data loaders dictionary
loaders = {
    x: torch.utils.data.DataLoader(
        image_datasets[x], batch_size=BATCH_SIZE,
        shuffle=True, num_workers=0  # turn on shuffle (though not needed for testing and validation)
    ) 
    for x in [TRAIN, VAL, TEST]
}

# sanity check
for dataset in image_datasets:
    n_images = len(image_datasets[dataset])
    print(f'Dataset {dataset} contains {n_images} images')

Dataset train contains 5392 images
Dataset valid contains 320 images
Dataset test contains 1311 images


## Train and Validate, and Test the Model

In [3]:
# use pre-trained EfficientNet
model = models.efficientnet_b0(pretrained=True)

# freeze intermediate layers
for param in model.features.parameters():
    param.require_grad = False
    
# remove last layer
classifier_block = model.classifier

num_features = model.classifier[-1].in_features # save the number of in features in the last layer
classifier_block = list(classifier_block[:-1]) # remove last layer

# replace the layer with a new output with the number of classes
classifier_block.extend([nn.Linear(num_features, 4)])

# replace classifier layer
model.classifier = nn.Sequential(*classifier_block)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


## Model Training

In [4]:
# multiclass classification - PyTorch implemntation of CrossEntropyLoss applies Softmax and NLLLoss
criterion = nn.CrossEntropyLoss()

# Adam optimizer typically outperforms other optimizers (true for the time this was written)
optimizer = optim.Adam(model.parameters(), LR)

# train the model
losses = train_image_classifier(EPOCHS, loaders, model, optimizer, criterion, device, 'tumor_classifier.pt')

Epoch: 1	Batch 336 out of 337	Classified correctly: 0.83902
Epoch: 1 	Training Loss: 0.473401 	Validation Loss: 0.460537 	 Training accuracy: 0.839021

Validation loss improved from inf to 0.46053702804351154
Epoch: 2	Batch 336 out of 337	Classified correctly: 0.93157
Epoch: 2 	Training Loss: 0.196969 	Validation Loss: 0.188067 	 Training accuracy: 0.931565

Validation loss improved from 0.46053702804351154 to 0.18806735373500064
Epoch: 3	Batch 336 out of 337	Classified correctly: 0.94955
Epoch: 3 	Training Loss: 0.143909 	Validation Loss: 0.140950 	 Training accuracy: 0.949555

Validation loss improved from 0.18806735373500064 to 0.14095024231901446
Epoch: 4	Batch 336 out of 337	Classified correctly: 0.96402
Epoch: 4 	Training Loss: 0.110224 	Validation Loss: 0.105531 	 Training accuracy: 0.964021

Validation loss improved from 0.14095024231901446 to 0.10553073459256511
Epoch: 5	Batch 336 out of 337	Classified correctly: 0.97033
Epoch: 5 	Training Loss: 0.091308 	Validation Loss: 0.08

In [5]:
# create class dictionary to access labels more easily later
## list of class names by index, i.e. a name can be accessed like class_names[0]
class_names = [item for item in image_datasets['train'].classes]
class_names = {i: class_name for i, class_name in enumerate(class_names)}
jb.dump(class_names, 'class_names_dict.pkl')

['class_names_dict.pkl']

## Evaluate

In [29]:
# load the model that got the best validation accuracy 
model_testing = torch.load('tumor_classifier.pt')
metrics = test_image_classifier(loaders["test"], model_testing, criterion, device)
print(json.dumps(metrics, indent=2))

{
  "Test Results": {
    "Loss": 0.015551043674349785,
    "Accuracy": "99.619%",
    "Rcall": "99.6%",
    "Precision": "99.7%",
    "F1": "99.6%",
    "Correct": 1306,
    "Total": 1311
  }
}


In [30]:
losses

[(0.016983803757470703, 0.016156454150627286)]

In [None]:
fig, ax = plt.subplots()
total_loss = np.array(total_loss)
plt.plot(total_loss.T[0], label='Discriminator', alpha=0.5)
plt.plot(total_loss.T[1], label='Generator', alpha=0.5)
plt.title("Training Losses")
plt.legend()
plt.show()