In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [3]:
ISICDATA = '/content/drive/MyDrive/ISIC'
METADATA = 'metadata_combined.csv'

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as T
import torch.nn.functional as F
import numpy as np
import pandas as pd
import os
from skimage import io
from PIL import Image
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

DataSet Implementation

In [5]:
class FamilyHistoryDataSet(torch.utils.data.Dataset):
  def __init__(self, csv_file, root_dir, transforms=None):
    self.annotations = pd.read_csv(os.path.join(root_dir, csv_file))
    self.root_dir = root_dir
    self.transforms = transforms

  def __len__(self):
    return len(self.annotations)

  def __getitem__(self, index):
    img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
    image = Image.open(img_path)
    y_label = torch.tensor(int(self.annotations.iloc[index, 1]))

    if self.transforms:
      image = self.transforms(image)

    return (image, y_label)

  def get_splits(self, splits=[0.8, 0.2]):
    train_split = round(len(self.annotations)*splits[0])
    test_split = len(self.annotations) - train_split
    return (train_split, test_split)

Hyperparameters, Dataloader & Split

In [14]:
learning_rate = 1e-3
batch_size = 64
num_epochs = 100
img_crop_size = 85
# model params
n_classes = 2
in_features = 3
# data
ISIC_MEAN = [1.2721, 0.3341, -0.0479]
ISIC_STD = [0.2508, 0.2654, 0.3213]

dataset = FamilyHistoryDataSet(csv_file= 'family_history.csv', root_dir = ISICDATA,
                               transforms=T.Compose(
                                  [T.CenterCrop(img_crop_size),
                                   T.ToTensor(),
                                   T.Normalize(ISIC_MEAN, ISIC_STD)]))

# Test on 1/5 of data
train_split, test_split = dataset.get_splits()
train_set, test_set = torch.utils.data.random_split(dataset, [train_split, test_split])
train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True, pin_memory=True)


Model

In [None]:
class CNN(nn.Module):
    def __init__(self, n_classes, in_features):
        super().__init__()
        self.conv1 = nn.Conv2d(in_features, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(6480, 32)
        self.fc2 = nn.Linear(32, n_classes)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=True)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


model = CNN(n_classes, in_features)
model.to(device)

Training

In [18]:
def basic_training_loop(loader, model, loss_func, optimizer, device):
    for batch_idx, (data, labels) in tqdm(enumerate(loader), total=len(loader), leave=False):
        data = data.to(device)
        labels = labels.to(device)

        scores = model(data)
        loss = loss_func(scores, labels)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

Evaluation

In [13]:
def basic_validation(loader, model, loss_func, device):
    num_correct, num_samples = 0, 0
    test_loss = 0
    model.eval()

    with torch.no_grad():
        for batch_idx, (x, y) in tqdm(enumerate(loader), total=len(loader), leave=False):
            x = x.to(device=device)
            y = y.to(device=device)

            pred = model(x)
            test_loss += loss_func(pred, y).item()
            _, predictions = pred.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
        )

    model.train()

Loss + Optimization Algorithm

In [17]:
class OptimizationLoop:
    def __init__(self, params) -> None:
        self.n_epochs = params['n_epochs']
        self.training = params['train_loop']
        self.validation = params['validation_loop']
        
        self.model = params['model']
        self.train_loader = params['train_loader']
        self.test_loader = params['test_loader']
        self.loss_func = params['loss']
        self.optimizer = params['optim']
        self.device = params['device']

    def optimize(self) -> None:
        for _ in range(self.n_epochs):
            self.training(
               self.train_loader, self.model,
               self.loss_func, self.optimizer, self.device)
            self.validation(
                self.test_loader, self.model,
                self.loss_func, self.device)

In [None]:
params = {
    'n_epochs': num_epochs,
    'train_loop': basic_training_loop,
    'validation_loop': basic_validation,
    'model': model,
    'train_loader': train_loader,
    'test_loader': test_loader,
    'loss': nn.CrossEntropyLoss(),
    'optim': optim.SGD(model.parameters(), lr=learning_rate),
    'device': device
}
optim_loop = OptimizationLoop(params)
optim_loop.optimize()