# **CNN for classification of skin cancer images**

**Goal**: Train a CNN model to classify 7 types of skin cancer based on dermatology images

# ðŸ“‚ **Data Access & Setup**

This project uses **HAM10000 ("Human Against Machine with 10000 training images")** dataset

It consists of 10015 dermatoscopic images which are released as a training set for academic machine learning purposes and are publicly available through the ISIC archive. This benchmark dataset can be used for machine learning and for comparisons with human experts.

It has 7 different classes of skin cancer which are listed below:

**1. Melanocytic nevi <br>**
**2. Melanoma <br>**
**3. Benign keratosis-like lesions <br>**
**4. Basal cell carcinoma <br>**
**5. Actinic keratoses <br>**
**6. Vascular lesions <br>**
**7. Dermatofibroma<br>**

We will follow these steps to classify moles into 7 classes.

**1. Project Configuration**<br>
**2. Making Dictionary of labels** <br>
**3. Reading and Processing Data** <br>
**4. Exploratory data analysis (EDA)** <br>
**5. Loading the images** <br>
**6. Dataset Preparation**<br>
**7. Normalization**<br>
**8. Train validation split** <br>
**9. Model Building (CNN)** <br>
**10. Training Configuration** <br>
**11. Data Augmentation** <br>
**12. Training the model**<br>
**13. Model Evaluation** <br>


## 1. Project Configuration

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import os, sys, gdown, tarfile, io, copy

from PIL import Image
from time import time

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import WeightedRandomSampler

from torchvision import transforms
from torchsummary import summary

np.random.seed(123) # fix random seed

print('PyTorch version: {}'.format(torch.__version__))

In [None]:
# Create the exercise_functions.py file

exercise_functions_code = '''

def plot_training_history(history):
    """Plot training and validation loss and accuracy"""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Plot loss
    ax1.plot(history['train_loss'], label='Train Loss')
    ax1.plot(history['val_loss'], label='Validation Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.set_title('Training and Validation Loss')
    ax1.legend()
    ax1.grid(True)

    # Plot accuracy
    ax2.plot(history['train_acc'], label='Train Accuracy')
    ax2.plot(history['val_acc'], label='Validation Accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.set_title('Training and Validation Accuracy')
    ax2.legend()
    ax2.grid(True)

    plt.tight_layout()
    plt.show()

def plot_confusion_matrix(y_true, y_pred, classes):
    """Plot confusion matrix"""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=classes, yticklabels=classes)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.show()

def get_device():
    """Get the device (GPU if available, else CPU)"""
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'Using device: {device}')
    return device

def train_one_epoch(model, train_loader, criterion, optimizer, device):
    """Train for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    return epoch_loss, epoch_acc

def validate(model, val_loader, criterion, device):
    """Validate the model"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    return epoch_loss, epoch_acc
'''

# Determine where to save the file
try:
    from google.colab import drive
    # If in Colab, save to the Colab Notebooks directory
    save_path = ''  # Write your path
except:
    # If local, save to current directory
    save_path = 'exercise_functions.py'

# Write the file
with open(save_path, 'w') as f:
    f.write(exercise_functions_code)

print(f'âœ“ Created exercise_functions.py at: {save_path}')
print('âœ“ You can now run your original code without errors!')

Check if a GPU device is available.

In [None]:
if not torch.cuda.is_available():
    raise SystemError('No GPU device found')

num_device = torch.cuda.current_device()
device = torch.cuda.device(num_device)
print('GPU device found. Current: [{}] {}'.format(num_device, device))

!nvidia-smi

**Your data will be stored in your google drive space. You need to mount your drive to access it from the notebook.**

In [None]:
project_folder_name = 'Skin_Cancer_class'

base_working_dir = '/content/drive/My Drive/Colab Notebooks'

try:
    from google.colab import drive
    drive.mount('/content/drive')
except:
    base_working_dir = os.getcwd()
    pass

sys.path.append(base_working_dir)
from exercise_functions import *

base_working_dir = os.path.join(base_working_dir, project_folder_name)

if not os.path.exists(base_working_dir):
    os.makedirs(base_working_dir)

print('Working directory: {}'.format(base_working_dir))

# 2. Making Dictionary of labels

In [None]:
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

#3. Reading & Processing data

We made some new columns which is easily understood for later reference such as the path to the image_id, cell_type which contains the short name of lesion type.

We convert the categorical column cell_type_idx in which we have categorize the lesion type in to codes from 0 to 6

In [None]:
csv_name = 'HAM10000_metadata.csv'

file_path = os.path.join(base_working_dir, csv_name)

if not os.path.isfile(file_path):
    url = 'https://drive.google.com/uc?authuser=0&id=1jdCVOmeJXI6bhWIfFVL7RqwyuOHiRjaE&export=download'
    gdown.download(url, file_path, quiet=False)

skin_df = pd.read_csv(file_path)

# Creating New Columns for better readability
skin_df['cell_type'] = skin_df['dx'].map(lesion_type_dict.get)
skin_df['cell_type_idx'] = pd.Categorical(skin_df['cell_type']).codes

In [None]:
# tile_dfnewly made columns and shape
skin_df.head()

print(skin_df.shape)

#4. Exploratory Data Analysis


In [None]:
fig, ax1 = plt.subplots(1, 1, figsize=(10, 5))
skin_df['cell_type'].value_counts().plot(kind='bar', ax=ax1)
plt.show()

In [None]:
fig, ax2 = plt.subplots(1, 1, figsize=(10, 5))
skin_df['dx_type'].value_counts().plot(kind='bar', ax=ax2)
ax2.set_title('Distribution of Validation Methods')
ax2.set_xlabel('Validation Method')
ax2.set_ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

print("\nValidation method counts:")
print(skin_df['dx_type'].value_counts())

In [None]:
fig, ax3 = plt.subplots(1, 1, figsize=(12, 6))
skin_df['localization'].value_counts().plot(kind='bar', ax=ax3)
ax3.set_title('Distribution of Cancer Location on Body')
ax3.set_xlabel('Body Location')
ax3.set_ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

print("\nTop 5 body locations:")
print(skin_df['localization'].value_counts().head())

In [None]:
fig, ax4 = plt.subplots(1, 1, figsize=(10, 5))
skin_df['age'].hist(bins=30, ax=ax4, edgecolor='black')
ax4.set_title('Age Distribution')
ax4.set_xlabel('Age')
ax4.set_ylabel('Count')
plt.tight_layout()
plt.show()

print("\nAge statistics:")
print(skin_df['age'].describe())

In [None]:
fig, ax5 = plt.subplots(1, 1, figsize=(8, 5))
skin_df['sex'].value_counts().plot(kind='bar', ax=ax5)
ax5.set_title('Distribution by Sex')
ax5.set_xlabel('Sex')
ax5.set_ylabel('Count')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

print("\nSex distribution:")
print(skin_df['sex'].value_counts())

In [None]:
# Age-wise distribution of skin cancer types using scatterplot
fig, ax = plt.subplots(1, 1, figsize=(14, 8))

sns.scatterplot(data=skin_df, x='age', y='cell_type',
                hue='cell_type', s=50, alpha=0.6, ax=ax)

ax.set_title('Age-wise Distribution of Skin Cancer Types', fontsize=14)
ax.set_xlabel('Age', fontsize=12)
ax.set_ylabel('Cell Type', fontsize=12)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', title='Cell Type')
plt.tight_layout()
plt.show()

# 5. Loading the images

In the three following steps cancer images will be loaded from a shared archive to your personal Google Drive storage space. After this step a new file "HAM10000_images.tar" should be created in your Google Drive.

1) High-resolution images and the relief of the lesion are important featuers for dermatologists to examine visually skin moles.

2) We do not have access to the 3D relief using dermatoscopic images.

3) We will use on downsampled images.

*Interested in trying later on mid-sized images? Use the file 'HAM10000_images_300x225.tar' (you will need to download it manually to the Skin_Cancer_class directory in your Gdrive first).*



In [None]:
# select below to work with small images or large images
work_with_small_images:bool = True

if work_with_small_images:
    # use small 150x100 images first
    url = 'https://drive.google.com/uc?authuser=0&id=1--oGquD0y48lW-6WRz5ldM1rGqbJKOez&export=download'
    tgz_name = 'HAM10000_images_150x100.tar'
else:
    # high resolution is important for dermatologists to assess skin moles visually
    # interested in training on larger images 300x250?
    # Download this file manually to your Google drive first!
    url = 'https://drive.google.com/uc?authuser=0&id=1-4fKAGB_rpzp6eFFEzAYOVJgXtkDzLoW&export=download'
    tgz_name = 'HAM10000_images_300x225.tar'

Download (if necessary) and load the .tar file containing compressed images.

In [None]:
file_path = os.path.join(base_working_dir, tgz_name)

if not os.path.isfile(file_path):
    gdown.download(url, file_path, quiet=False)

t = tarfile.open(os.path.join(base_working_dir, tgz_name), 'r')

The images will be decompressed and loaded into the datasheet from the downloaded archive.  Loading 10000 images takes several seconds.

In [None]:
# read each image and assign it as a cell content
t1 = time()
skin_df['image'] = skin_df['image_id'].map(lambda x: np.asarray(Image.open(io.BytesIO(t.extractfile(os.path.join('HAM10000_images',x+'.jpg')).read()))))
print('{} images read in {} seconds.'.format(len(skin_df['image']), np.round(time()-t1, decimals=2)))

Now we have 10015 colour images of size 100x75 loaded into the computer memory.

Let's check a few sample images to see each cancer type

In [None]:
n_samples = 5

fig, m_axs = plt.subplots(7, n_samples, figsize=(4*n_samples, 3*7))
for n_axs, (type_name, type_rows) in zip(m_axs, skin_df.sort_values(['cell_type']).groupby('cell_type')):
    n_axs[0].set_title(type_name, fontsize=14)
    for c_ax, (_, c_row) in zip(n_axs, type_rows.sample(n_samples, random_state=1234).iterrows()):
        image_id = c_row['image_id']

        try:
            possible_names = [
                f'{image_id}.jpg',
                f'HAM10000_images/{image_id}.jpg',
                f'{image_id}.jpeg',
                f'HAM10000_images/{image_id}.jpeg'
            ]

            image_file = None
            for name in possible_names:
                try:
                    image_file = t.extractfile(name)
                    break
                except KeyError:
                    continue

            if image_file is None:
                raise KeyError(f"Image {image_id} not found")

            img = Image.open(image_file)

            c_ax.imshow(img)
            c_ax.axis('off')

        except Exception as e:
            c_ax.text(0.5, 0.5, f'Error loading\n{image_id}',
                     ha='center', va='center', fontsize=8)
            c_ax.axis('off')

plt.tight_layout()
fig.savefig(os.path.join(base_working_dir,'category_samples.png'), dpi=300)
plt.show()

# 6. Dataset preparation
Convert the targets (the **cell_type_idx** column of the datasheet) into the one-hot encoding format. The final data need to be PyTorch tensors with same data-type.

In [None]:
features = np.asarray(skin_df['image'].tolist())
targets = np.asarray(skin_df['cell_type_idx'].tolist())

targets_reshaped = targets.reshape(-1, 1)

encoder = OneHotEncoder(sparse_output=False)
targets_onehot = encoder.fit_transform(targets_reshaped)

features_tensor = torch.tensor(features, dtype=torch.float32)
targets_tensor = torch.tensor(targets_onehot, dtype=torch.float32)

print("Features shape:", features_tensor.shape)
print("Targets shape (one-hot encoded):", targets_tensor.shape)
print("Number of classes:", targets_onehot.shape[1])

In [None]:
print("Original data shape: {}".format(tuple(features_tensor.shape)))

features_tensor = features_tensor.transpose(1, 3).transpose(2, 3)

print("New data shape: {}".format(tuple(features_tensor.shape)))

In [None]:
x_train_o, x_test_o, y_train, y_test = train_test_split(features_tensor, targets_tensor,
                                                          test_size=0.2, random_state=123)

# 7. Normalization

In [None]:
mean = x_train_o.mean()
std = x_train_o.std()

x_train = (x_train_o - mean) / std
x_test = (x_test_o - mean) / std

print("Mean:", mean.item())
print("Std:", std.item())
print("Train data - Mean after normalization:", x_train.mean().item())
print("Train data - Std after normalization:", x_train.std().item())

# 8. Splitting training and validation

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train,
                                                    test_size=0.1, random_state=123)

print("Training set size:", x_train.shape[0])
print("Validation set size:", x_val.shape[0])
print("Test set size:", x_test.shape[0])

# 9. Model Building - CNN

In [None]:
from typing import Tuple, Literal

# Function to compute the new size of input images after being filtered (Conv2D or MaxPooling)
def get_output_size(
        input_size:torch.Size,
        kernel_size:Tuple|int,
        kernel_depth:int=1,
        padding:Tuple|int|Literal['valid','same']='valid',
        stride:Tuple|int=(1,1),
        dilation:Tuple|int=(1,1)
) -> torch.Size:
    if type(padding) is str:
        if padding == 'valid':
            padding = (0,) * 2
        elif padding == 'same':
            padding = ((kernel_size[-2]-1)/2, (kernel_size[-1]-1)/2)
        else: raise ValueError("Argument 'padding', if string, must be either 'valid' or 'same'")
    elif not np.iterable(padding):
        padding = (padding,) * 2
    if not np.iterable(stride):
        stride = (stride,) * 2
    if not np.iterable(dilation):
        dilation = (dilation,) * 2
    if not np.iterable(kernel_size):
        kernel_size = (kernel_size,) * 2
    new_height = int((input_size[-2] + 2 * padding[-2] - dilation[-2] * (kernel_size[-2] - 1) - 1) / stride[-2] + 1)
    new_width_ = int((input_size[-1] + 2 * padding[-1] - dilation[-1] * (kernel_size[-1] - 1) - 1) / stride[-1] + 1)
    return torch.Size((kernel_depth, new_height, new_width_))

#### Hyperparameter set-up

1. depths of convolutional kernels (conv2D_depth_.);
2. filter kernel sizes (conv2D and maxPool);
3. filter paddings (conv2D and maxPool);
4. filter strides (conv2D and maxPool);
5. size of the first dense layer (size_dense_1);
6. dropout probabilities (2D and Dense).

In [None]:
# HYPERPARAMETERS

input_channels = x_train.size(1)
input_size2D = x_train.shape[2:]

conv2D_depth_1 = 32
conv2D_depth_2 = 64
conv2D_depth_3 = 128
conv2D_depth_4 = 256

conv2D__kernel_size = (3,) * 2
conv2D__padding = 'same'
conv2D__stride = 1

maxPool_kernel_size = (3,) * 2
maxPool_padding = 1
maxPool_stride = 2

size_dense_1 = 512
num_classes  = 7

dropout_probability_2D = 0.25
dropout_probability_Dense = 0.5

print("Input channels:", input_channels)
print("Input size:", input_size2D)
print("Conv depths:", conv2D_depth_1, conv2D_depth_2, conv2D_depth_3, conv2D_depth_4)
print("Dense layer size:", size_dense_1)
print("Number of classes:", num_classes)

#### Tensor size computation

In [None]:
# Computation of the image sizes through the successive layers

block_1_in_size = (input_channels, *input_size2D)

block_2_in_size = get_output_size(block_1_in_size, kernel_size=conv2D__kernel_size, kernel_depth=conv2D_depth_1, padding=conv2D__padding, stride=conv2D__stride)
block_2_in_size = get_output_size(block_2_in_size, kernel_size=conv2D__kernel_size, kernel_depth=conv2D_depth_1, padding=conv2D__padding, stride=conv2D__stride)
block_2_in_size = get_output_size(block_2_in_size, kernel_size=maxPool_kernel_size, kernel_depth=conv2D_depth_1, padding=maxPool_padding, stride=maxPool_stride)

block_3_in_size = get_output_size(block_2_in_size, kernel_size=conv2D__kernel_size, kernel_depth=conv2D_depth_2, padding=conv2D__padding, stride=conv2D__stride)
block_3_in_size = get_output_size(block_3_in_size, kernel_size=conv2D__kernel_size, kernel_depth=conv2D_depth_2, padding=conv2D__padding, stride=conv2D__stride)
block_3_in_size = get_output_size(block_3_in_size, kernel_size=maxPool_kernel_size, kernel_depth=conv2D_depth_2, padding=maxPool_padding, stride=maxPool_stride)

block_4_in_size = get_output_size(block_3_in_size, kernel_size=conv2D__kernel_size, kernel_depth=conv2D_depth_3, padding=conv2D__padding, stride=conv2D__stride)
block_4_in_size = get_output_size(block_4_in_size, kernel_size=conv2D__kernel_size, kernel_depth=conv2D_depth_3, padding=conv2D__padding, stride=conv2D__stride)
block_4_in_size = get_output_size(block_4_in_size, kernel_size=maxPool_kernel_size, kernel_depth=conv2D_depth_3, padding=maxPool_padding, stride=maxPool_stride)

fully_connected_in_size = get_output_size(block_4_in_size, kernel_size=conv2D__kernel_size, kernel_depth=conv2D_depth_4, padding=conv2D__padding, stride=conv2D__stride)
fully_connected_in_size = get_output_size(fully_connected_in_size, kernel_size=conv2D__kernel_size, kernel_depth=conv2D_depth_4, padding=conv2D__padding, stride=conv2D__stride)
fully_connected_in_size = get_output_size(fully_connected_in_size, kernel_size=maxPool_kernel_size, kernel_depth=conv2D_depth_4, padding=maxPool_padding, stride=maxPool_stride)

flatten_size = fully_connected_in_size[0] * fully_connected_in_size[1] * fully_connected_in_size[2]

print("Flatten size:", flatten_size)
print("Fully connected input size:", fully_connected_in_size)

#### Building of the model

This CNN architechture is

$(In) \rightarrow [[\text{Conv2D} \rightarrow \text{ReLU}] \times 2 \rightarrow \text{BatchNorm2D} \rightarrow \text{MaxPool2D} \rightarrow \text{Dropout}] \times 4 \rightarrow \text{Flatten} \rightarrow \text{Dense} + \text{Activation} \rightarrow \text{Dropout} \rightarrow \text{Dense} + \text{Activation} ~(Out)$

In [None]:
# Model construction

model = nn.Sequential(
    nn.Conv2d(in_channels=input_channels, out_channels=conv2D_depth_1, kernel_size=conv2D__kernel_size, padding=conv2D__padding, stride=conv2D__stride),
    nn.ReLU(),
    nn.Conv2d(in_channels=conv2D_depth_1, out_channels=conv2D_depth_1, kernel_size=conv2D__kernel_size, padding=conv2D__padding, stride=conv2D__stride),
    nn.ReLU(),
    nn.BatchNorm2d(conv2D_depth_1),
    nn.MaxPool2d(kernel_size=maxPool_kernel_size, padding=maxPool_padding, stride=maxPool_stride),
    nn.Dropout(dropout_probability_2D),

    nn.Conv2d(in_channels=conv2D_depth_1, out_channels=conv2D_depth_2, kernel_size=conv2D__kernel_size, padding=conv2D__padding, stride=conv2D__stride),
    nn.ReLU(),
    nn.Conv2d(in_channels=conv2D_depth_2, out_channels=conv2D_depth_2, kernel_size=conv2D__kernel_size, padding=conv2D__padding, stride=conv2D__stride),
    nn.ReLU(),
    nn.BatchNorm2d(conv2D_depth_2),
    nn.MaxPool2d(kernel_size=maxPool_kernel_size, padding=maxPool_padding, stride=maxPool_stride),
    nn.Dropout(dropout_probability_2D),

    nn.Conv2d(in_channels=conv2D_depth_2, out_channels=conv2D_depth_3, kernel_size=conv2D__kernel_size, padding=conv2D__padding, stride=conv2D__stride),
    nn.ReLU(),
    nn.Conv2d(in_channels=conv2D_depth_3, out_channels=conv2D_depth_3, kernel_size=conv2D__kernel_size, padding=conv2D__padding, stride=conv2D__stride),
    nn.ReLU(),
    nn.BatchNorm2d(conv2D_depth_3),
    nn.MaxPool2d(kernel_size=maxPool_kernel_size, padding=maxPool_padding, stride=maxPool_stride),
    nn.Dropout(dropout_probability_2D),

    nn.Conv2d(in_channels=conv2D_depth_3, out_channels=conv2D_depth_4, kernel_size=conv2D__kernel_size, padding=conv2D__padding, stride=conv2D__stride),
    nn.ReLU(),
    nn.Conv2d(in_channels=conv2D_depth_4, out_channels=conv2D_depth_4, kernel_size=conv2D__kernel_size, padding=conv2D__padding, stride=conv2D__stride),
    nn.ReLU(),
    nn.BatchNorm2d(conv2D_depth_4),
    nn.MaxPool2d(kernel_size=maxPool_kernel_size, padding=maxPool_padding, stride=maxPool_stride),
    nn.Dropout(dropout_probability_2D),

    nn.Flatten(),
    nn.Linear(flatten_size, size_dense_1),
    nn.ReLU(),
    nn.Dropout(dropout_probability_Dense),
    nn.Linear(size_dense_1, num_classes),
    nn.Softmax(dim=1)
)

print(model)

#### Send the model to GPU

Send the model to your Cuda GPU to accelerate training, by simply using the model.cuda() function.

In [None]:
# Send the model to Cuda GPU
if torch.cuda.is_available():
    model.cuda()

#### Model summary

Print the model architecture and weights using the summary() function provided in the torchsummary library.

In [None]:
%pip install torchsummary
from torchsummary import summary

summary(model, input_size=(input_channels, *input_size2D))

# 10. Training configuration

In [None]:
# Define the criterion (loss function)

criterion = nn.CrossEntropyLoss()

In [None]:
# Define the optimizer (take a relevant learning rate, e.g., lr=1e-4)

optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
# Define the evaluation metric

# A function that returns the accuracy from two Tensor matrices: the predictions (y_hat) and the true labels (y)
def accuracy(y_hat:torch.Tensor, y:torch.Tensor) -> float:
    _, predicted = torch.max(y_hat, 1)
    _, true_labels = torch.max(y, 1)
    correct = (predicted == true_labels).sum().item()
    total = y.size(0)
    return correct / total

# 11. Data Augmentation

Define the list of random transfom functions in a transforms.Compose() object.

In [None]:
# Define the object for random image transforms

my_transforms = transforms.Compose([
    # random affine transform: rotation + translation + zoom
    transforms.RandomAffine(degrees=180, translate=(0.3,)*2, scale=(0.8, 1.2)),
    # random horizontal flip
    transforms.RandomHorizontalFlip(),
    # random vertical flip
    transforms.RandomVerticalFlip(),
])

Visualize the effect of your random image transformer.



In [None]:
num_img = 4

def normalized(img:torch.Tensor) -> torch.Tensor:
    return (img-img.min())/(img.max()-img.min()) if (img.max()-img.min()).abs()>1e-6 else img-img.min()

img_original_show = normalized(x_train[num_img])
img_transfor_show = my_transforms(img_original_show)

_, axs = plt.subplots(1,2,figsize=(12,6))
axs[0].imshow(img_original_show.transpose(0,1).transpose(1,2))
axs[1].imshow(img_transfor_show.transpose(0,1).transpose(1,2))
axs[0].set_title('Original'); axs[1].set_title('Transformed')
axs[0].axis('off'); axs[1].axis('off')
plt.show()

Build a custom TensorDataset class to automatically apply image transforms when calling the images.

In [None]:
# Custom TensorDataset class that applies data augmentation on the fly

class TensorDatasetWithTransform(TensorDataset):
    def __init__(self, x, y=None, transforms=None):
        self.x = x
        self.y = y
        self.transforms = transforms

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        image = self.x[idx]
        if self.transforms is not None:
            image = self.transforms(image)
        if self.y is not None:
            return image, self.y[idx]
        return image

Define the occurrence probabilities of the classes in the train and validation datasets, to make the classes well balanced when splitted into batches, using WeightedRandomSampler.

In [None]:
class_counts  = y_train.sum(axis=0)
class_weights = 1 / class_counts
class_weights/= class_weights.sum()

train_sampler = WeightedRandomSampler(weights=class_weights[y_train.argmax(dim=-1)], num_samples=len(y_train), replacement=True)
val_sampler   = WeightedRandomSampler(weights=class_weights[y_val  .argmax(dim=-1)], num_samples=len(y_val  ), replacement=True)

Put train and validation datasets in two custom TensorDataset with our transformer object, and split them into batches using DataLoader and weighted samplers as argument.

In [None]:
batch_size = 64

train_loader = DataLoader(TensorDatasetWithTransform(x_train, y_train, my_transforms), batch_size=batch_size, sampler=train_sampler)
val_loader   = DataLoader(TensorDatasetWithTransform(x_val,   y_val,   my_transforms), batch_size=batch_size, sampler=val_sampler)
test_loader  = DataLoader(TensorDatasetWithTransform(x_test,  y_test), batch_size=batch_size)

print(f"Train batches: {len(train_loader)}")
print(f"Validation batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

# 12. Training the model

A 50-epoch training lasts around 16 minutes.

You can also allow training just for 20 epochs. In 20 epochs (~7minutes) your model should almost converge.

In [None]:
epochs = 50
patience = 10

save_path = os.path.join(base_working_dir, 'model_best.pth')

best_val_acc = 0.0
epochs_no_improve = 0
early_stop = False

train_losses, val_losses = [], []
train_accs, val_accs = [], []

best_model_wts = copy.deepcopy(model.state_dict())

train_loop_verbose = True
early_stop_verbose = False

for epoch in range(epochs):

    if train_loop_verbose:
        print(f"\nEpoch {epoch+1}/{epochs}")

    model.train()
    train_loss, train_acc = 0.0, 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.cuda(), labels.cuda()

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        train_acc  += accuracy(outputs, labels) * inputs.size(0)

    train_loss /= len(train_loader.dataset)
    train_acc  /= len(train_loader.dataset)

    model.eval()
    val_loss, val_acc = 0.0, 0.0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.cuda(), labels.cuda()

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * inputs.size(0)
            val_acc  += accuracy(outputs, labels) * inputs.size(0)

    val_loss /= len(val_loader.dataset)
    val_acc  /= len(val_loader.dataset)

    train_losses.append(train_loss)
    train_accs.append(train_acc)
    val_losses.append(val_loss)
    val_accs.append(val_acc)

    if train_loop_verbose:
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save(model.state_dict(), save_path)
        if early_stop_verbose:
            print(f"OK: Model improved and saved to {save_path}")
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if early_stop_verbose:
            print(f"No improvement for {epochs_no_improve} epochs.")

    if epochs_no_improve >= patience:
        if early_stop_verbose:
            print(f"ES: Early stopping at epoch {epoch+1}")
        early_stop = True
        break

if early_stop_verbose and not early_stop:
    print("Training completed without early stopping.")

model.load_state_dict(best_model_wts)
print(f"Best validation accuracy: {best_val_acc:.4f}")

# 13. Model Evaluation

In [None]:
# Summarize training history for loss and accuracy
history = {
    'train_loss': train_losses,
    'val_loss': val_losses,
    'train_acc': train_accs,
    'val_acc': val_accs
}

plot_training_history(history)

### **Evaluate the model on the test dataset**

In [None]:
# Predict the values from the test dataset
model.eval()
y_pred_list = []
y_true_list = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.cuda()
        outputs = model(inputs)
        y_pred_list.append(outputs.cpu())
        y_true_list.append(labels)

y_pred = torch.cat(y_pred_list, dim=0)
y_true = torch.cat(y_true_list, dim=0)

test_acc = accuracy(y_pred, y_true)
print(f"Test Accuracy: {test_acc:.4f}")

Compute the loss and accuracy of the model on the test dataset.

In [None]:
test_loss = criterion(y_pred, y_true).item()
test_acc = accuracy(y_pred, y_true)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

Plot the confusion matrix and check the missclassified count of each type.



In [None]:
y_pred_classes = y_pred.argmax(dim=1)
y_true_classes = y_true.argmax(dim=1)

confusion_mtx = confusion_matrix(y_true_classes, y_pred_classes)

cell_types = ['Melanocytic nevi', 'Melanoma', 'Benign keratosis-like lesions',
              'Basal cell carcinoma', 'Actinic keratoses', 'Vascular lesions', 'Dermatofibroma']

plot_confusion_matrix(y_true_classes, y_pred_classes, cell_types)

print("\nConfusion Matrix:")
print(confusion_mtx)
print("\n" + "="*70)
print("MISCLASSIFICATION ANALYSIS")
print("="*70)

for i, cell_type in enumerate(cell_types):
    total = confusion_mtx[i].sum()
    correct = confusion_mtx[i, i]
    misclassified = total - correct
    accuracy = correct / total if total > 0 else 0

    print(f"\n{cell_type}:")
    print(f"  Total samples: {total}")
    print(f"  Correctly classified: {correct}")
    print(f"  Misclassified: {misclassified}")
    print(f"  Accuracy: {accuracy*100:.2f}%")

total_samples = confusion_mtx.sum()
total_correct = confusion_mtx.diagonal().sum()
total_misclassified = total_samples - total_correct

print("\n" + "="*70)
print(f"OVERALL: {total_correct}/{total_samples} correct, {total_misclassified} misclassified")
print("="*70)

Now, lets see which category has most incorrect predictions

In [None]:
label_frac_error = 1 - np.diag(confusion_mtx) / np.sum(confusion_mtx, axis=1)

plt.bar(np.arange(y_test.size(-1)),label_frac_error)
plt.xlabel('True Label')
plt.ylabel('Fraction classified incorrectly')
plt.show()