### IMPORT

In [1]:
# Importing the required libraries
import torch # Import the Pytorch library
import torchvision # Import the torchvision library
from torchvision import datasets, transforms # Import the transforms module from torchvision


import numpy as np
from PIL import Image # Import the Image module from the Python Imaging Library (PIL)
import matplotlib.pyplot as plt


import urllib # Import the urllib library for URL handling
import sys
from tqdm import tqdm
from customDataset import ISICDataset

# HELPER FUNCTIONS
from data_exploration_helper import dataset_overview

### CONSTANTS
Contains the `2018` and `2019` datasets, the `2018` dataset is split up into train and test. The `2019` dataset is not been split up into train and test.

In [2]:
# Dataset 2018
TRAIN_2018_LABELS: str = "./data/ISIC2018_Training_GroundTruth.csv"
TRAIN_2018_ROOT_DIR: str = "./data/ISIC2018_Training_Input"

TEST_2018_LABELS: str = "./data/ISIC2018_Validation_GroundTruth.csv"
TEST_2018_ROOT_DIR: str = "./data/ISIC2018_Validation_Input"

# Dataset 2019 - has not been split into train and test
DATASET_2019_LABELS: str = "./data/ISIC_2019_Training_GroundTruth.csv"
DATASET_2019_ROOT_DIR: str = "./data/ISIC_2019_Training_Input"

#### Transforms

In [3]:
# Define the image pre-processing steps
preprocess_resnet18 = transforms.Compose([
    transforms.ToPILImage(), # Removes potential errors in Inception V3, may need it here also
    transforms.Resize(256),  # Resize the image to 256x256 pixels
    transforms.CenterCrop(224), # Crop the image to 224x224 pixels (removing any extra pixels)
    transforms.ToTensor(), # Convert the image to a Pytorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalize the image using the pre-trained model's mean and standard deviation
])


#### Custom Dataset class & Dataloader

In [4]:
# Training set 2018 - custom class
train_dataset_2018 = ISICDataset(
    csv_file=TRAIN_2018_LABELS, 
    root_dir=TRAIN_2018_ROOT_DIR, 
    transform=preprocess_resnet18,
    image_file_type="jpg",
    nrows=5000 # defines the number of rows used, utilized this for testing purposes
    )

# Define the data loader for the 2018 training set
data_loader = torch.utils.data.DataLoader(train_dataset_2018, batch_size=32, shuffle=True)

### PRETRAINED RESNET 18 MODEL IMPLEMENTATION

In [5]:
# Load a pre-trained ResNet-18 model from the Pytorch hub
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True);

# The following lines are alternative ways to load different variants of the ResNet model
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet34', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet101', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet152', pretrained=True);

Using cache found in /Users/fritt/.cache/torch/hub/pytorch_vision_v0.10.0


In [6]:
# Freeze the model parameters to prevent backpropagation
for param in model.parameters():
    param.requires_grad = False

# Replace the final layer with a new layer that matches the number of classes in the dataset
num_classes = len(train_dataset_2018.annotations.columns)-1
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

# Train the model
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)

for epoch in tqdm(range(10)):
    running_loss = 0.0
    for i, data in enumerate(data_loader, 0):
        inputs, labels = data
        labels = torch.tensor(labels, dtype=torch.float)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print('Epoch {} loss: {:.4f}'.format(epoch + 1, running_loss / (i + 1)))

print('Finished training')


  labels = torch.tensor(labels, dtype=torch.float)
 10%|█         | 1/10 [03:01<27:09, 181.06s/it]

Epoch 1 loss: 0.9255


 20%|██        | 2/10 [06:00<24:01, 180.24s/it]

Epoch 2 loss: 0.7338


 30%|███       | 3/10 [08:56<20:48, 178.37s/it]

Epoch 3 loss: 0.6743


 40%|████      | 4/10 [11:58<17:57, 179.59s/it]

Epoch 4 loss: 0.6435


 50%|█████     | 5/10 [15:03<15:08, 181.66s/it]

Epoch 5 loss: 0.6158


 60%|██████    | 6/10 [17:58<11:56, 179.20s/it]

Epoch 6 loss: 0.6012


 70%|███████   | 7/10 [20:48<08:48, 176.17s/it]

Epoch 7 loss: 0.5762


 80%|████████  | 8/10 [23:36<05:47, 173.73s/it]

Epoch 8 loss: 0.5644


 90%|█████████ | 9/10 [26:38<02:56, 176.43s/it]

Epoch 9 loss: 0.5631


100%|██████████| 10/10 [29:44<00:00, 178.44s/it]

Epoch 10 loss: 0.5471
Finished training





In [7]:
# Test set 2018 - custom class
test_dataset_2018 = ISICDataset(
    csv_file=TEST_2018_LABELS, 
    root_dir=TEST_2018_ROOT_DIR, 
    transform=preprocess_resnet18,
    image_file_type="jpg",
    # nrows=100 # defines the number of rows used, utilized this for testing purposes
    )

In [10]:
# Load the test set
test_dataset = test_dataset_2018 # Define the test set in the same way as the training set
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for data in test_loader:
        inputs, labels = data
        labels = torch.tensor(labels, dtype=torch.float)
        outputs = model(inputs)
        labels = torch.argmax(labels)
        predicted = torch.argmax(outputs.data)
        total += 1
        if labels==predicted:
            correct += 1
    accuracy = 100 * correct / total
    print('Accuracy of the model on the test set: {:.2f}%'.format(accuracy))


  labels = torch.tensor(labels, dtype=torch.float)


Accuracy of the model on the test set: 77.20%
