In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [3]:
ISICDATA = '/content/drive/MyDrive/ISIC'
TF_LOGDIR = '/content/drive/MyDrive/runs'
METADATA = 'metadata_combined.csv'

In [None]:
installer = !pip install torchmetrics
installer

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T
import torch.nn.functional as F
from torch.cuda.amp import autocast, GradScaler
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
import os
from PIL import Image

from torchmetrics import MetricCollection
from torchmetrics.classification import Accuracy, AUROC, Precision


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

DataSet Implementation

In [5]:
class FamilyHistoryDataSet(torch.utils.data.Dataset):
  def __init__(self, metadata, root_dir, transforms=None, data_col=None, ylabel_col=None):
    self.root_dir = root_dir
    self.transforms = transforms
    self.annotations = pd.read_csv(os.path.join(root_dir, metadata))
    self.xdata_col = self.annotations.columns.get_loc(data_col)
    self.ylabel_col = self.annotations.columns.get_loc(ylabel_col)

  def __len__(self):
    return len(self.annotations)

  def __getitem__(self, index):
    img_path = os.path.join(self.root_dir, self.annotations.iloc[index, self.xdata_col])
    image = Image.open(img_path)
    y_label = torch.tensor(int(self.annotations.iloc[index, self.ylabel_col]))
    if self.transforms:
      image = self.transforms(image)
    return (image, y_label)

  def get_splits(self, splits=[0.8, 0.2]):
    train_split = round(len(self.annotations)*splits[0])
    test_split = len(self.annotations) - train_split
    return (train_split, test_split)

Hyperparameters, Dataloader & Split

In [14]:
learning_rate = 1e-3
batch_size = 256
epochs = 100
img_crop_size = 85
# data
ISIC_MEAN = [1.2721, 0.3341, -0.0479]
ISIC_STD = [0.2508, 0.2654, 0.3213]

dataset = FamilyHistoryDataSet(
    metadata='family_history.csv',
    root_dir = ISICDATA,
    transforms=T.Compose(
        [T.CenterCrop(img_crop_size),
    	T.ToTensor(),
        T.Normalize(ISIC_MEAN, ISIC_STD)]
        ),
    data_col='filename',
    ylabel_col='family_history')

train_split, test_split = dataset.get_splits()
train_set, test_set = torch.utils.data.random_split(dataset, [train_split, test_split])
train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)


Model

In [None]:
class CNN(nn.Module):
    def __init__(self, n_classes, in_features):
        super().__init__()
        self.conv1 = nn.Conv2d(in_features, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(6480, 32)
        self.fc2 = nn.Linear(32, n_classes)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=True)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# model params
n_classes = 2
in_features = 3

model = CNN(n_classes, in_features)
model.to(device)

Training

In [18]:
def basic_training_loop(train_loader, model, loss_func, optimizer, metrics, scaler, device):
    for batch_idx, (data, labels) in enumerate(train_loader):
        data = data.to(device)
        labels = labels.to(device)
        with autocast():
            prediction = model(data)
            loss = loss_func(prediction, labels)
        
        _, pred_labels = prediction.max(dim=1)
        metrics.update(pred_labels, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()
        if batch_idx % 10 == 0:
            print(f'batch_idx: {batch_idx}')

Evaluation

In [13]:
def metrics_validation(test_loader, model, metrics, device):
    model.eval()
    with torch.no_grad():
        for batch_idx, (x, y) in enumerate(test_loader):
            x = x.to(device=device)
            y = y.to(device=device)
            pred = model(x)
            _, pred_labels = pred.max(dim=1)
            metrics.update(pred_labels, y)
            if batch_idx % 10 == 0:
                print(f'batch_idx: {batch_idx}')
    model.train()

Loss + Optimization Algorithm

In [17]:
class OptimizationLoop:
    def __init__(self, params) -> None:
        self.n_epochs = params['n_epochs']
        self.training = params['train_loop']
        self.validation = params['validation_loop']
        
        self.model = params['model']
        self.train_loader = params['train_loader']
        self.test_loader = params['test_loader']
        self.loss_func = params['loss']
        self.optimizer = params['optim']
        self.device = params['device']
        self.train_metrics = params['metrics']['train'].to(self.device)
        self.valid_metrics = params['metrics']['valid'].to(self.device)
        self.scaler = GradScaler()
        self.writer = SummaryWriter(TF_LOGDIR)

    def optimize(self) -> None:
        for epoch in range(self.n_epochs):
            self.training(
                self.train_loader, self.model,
                self.loss_func, self.optimizer,
                self.train_metrics, self.scaler, self.device)
            self.validation(
                self.test_loader, self.model,
                self.valid_metrics, self.device)
            total_train_metrics = self.train_metrics.compute()
            total_valid_metrics = self.valid_metrics.compute()
            print(f"Training metrics for epoch {epoch}: {total_train_metrics}")
            print(f"Validation metrics for epoch {epoch}: {total_valid_metrics}")

            for metric, value in total_train_metrics.items():
                self.writer.add_scalar(f'Train/{metric}', value, epoch)
            for metric, value in total_valid_metrics.items():
                self.writer.add_scalar(f'Test/{metric}', value, epoch)

            self.train_metrics.reset()
            self.valid_metrics.reset()

In [None]:
params = {
    'n_epochs': epochs,
    'train_loop': basic_training_loop,
    'validation_loop': metrics_validation,
    'model': model,
    'train_loader': train_loader,
    'test_loader': test_loader,
    'loss': nn.CrossEntropyLoss(),
    'optim': optim.SGD(model.parameters(), lr=learning_rate),
    'metrics' : {
        'train' : MetricCollection([
            Accuracy(task='binary'),
            AUROC(task='binary'),
            Precision(task='binary')]),
        'valid' :  MetricCollection([
            Accuracy(task='binary'),
            AUROC(task='binary'),
            Precision(task='binary')])
    },
    'device': device
}

In [None]:
%load_ext tensorboard

In [None]:
%reload_ext tensorboard

In [None]:
%tensorboard --logdir=TF_LOGDIR

In [None]:
optim_loop = OptimizationLoop(params)
optim_loop.optimize()