In [24]:
import numpy as np
import torch
from torchvision import datasets, transforms, models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Subset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.optim as optim
import os
import torchvision
import torchvision.transforms as transforms

<H2> Data prep

In [25]:
#transform to 224, 224 since VGG expect that size
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),           # Convert images to PyTorch tensors
])

# Define paths to your train and validation data
train_path = "train"
val_path = "val"

#define train and valdataset again but with transoformers this time
train_dataset = ImageFolder(train_path, transform=transform)
val_dataset = ImageFolder(val_path, transform=transform)

#8 batch size to reduce training time
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

#20% of training since otherwise it would have taken a day to train
subset_size = int(0.2 * len(train_dataset))

# Randomly select a subset of indices from the training dataset
subset_indices = np.random.choice(len(train_dataset), subset_size, replace=False)

# Create a subset of the training dataset using the selected indices
train_subset = Subset(train_dataset, subset_indices)

# Use DataLoader to load the subset of data in batches
train_loader_subset = DataLoader(train_subset, batch_size=batch_size, shuffle=True)

# Print dataset details
print("Number of classes:", len(train_dataset.classes))
print("Classes:", train_dataset.classes)
print("Number of training images:", len(train_dataset))
print("Number of training subset images:", len(train_subset))
print("Number of validation images:", len(val_dataset))

Number of classes: 2
Classes: ['MEL', 'NV']
Number of training images: 6426
Number of training subset images: 1285
Number of validation images: 1252


<h2> Some baseline model

In [26]:
import torch.nn as nn
import torch.nn.functional as F

In [27]:
class SimpleCNN(nn.Module):
    
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 28 * 28, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [28]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    # Training phase
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    with tqdm(train_loader_subset, desc=f"Epoch {epoch+1}/{num_epochs} (Training)", unit="batch") as t:
        for inputs, labels in t:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            t.set_postfix(loss=running_loss / total, accuracy=correct / total)

    # Print training statistics
    epoch_loss = running_loss / len(train_subset)
    epoch_acc = correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}, Training Accuracy: {epoch_acc:.2%}")

    # Validation phase
    model.eval()
    val_correct = 0
    val_total = 0
    val_loss = 0.0
    with torch.no_grad():
        for val_inputs, val_labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Validation)", unit="batch"):
            val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
            val_outputs = model(val_inputs)
            val_loss += criterion(val_outputs, val_labels).item() * val_inputs.size(0)
            _, val_predicted = torch.max(val_outputs, 1)
            val_total += val_labels.size(0)
            val_correct += (val_predicted == val_labels).sum().item()

    # Print validation statistics
    val_epoch_loss = val_loss / len(val_dataset)
    val_epoch_acc = val_correct / val_total
    print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_acc:.2%}")


Epoch 1/5 (Training): 100%|██████████| 161/161 [00:39<00:00,  4.09batch/s, accuracy=0.735, loss=0.538]


Epoch [1/5], Training Loss: 0.5380, Training Accuracy: 73.54%


Epoch 1/5 (Validation): 100%|██████████| 157/157 [00:14<00:00, 10.47batch/s]


Epoch [1/5], Validation Loss: 0.4880, Validation Accuracy: 73.96%


Epoch 2/5 (Training): 100%|██████████| 161/161 [00:38<00:00,  4.15batch/s, accuracy=0.742, loss=0.539]


Epoch [2/5], Training Loss: 0.5386, Training Accuracy: 74.16%


Epoch 2/5 (Validation): 100%|██████████| 157/157 [00:13<00:00, 11.59batch/s]


Epoch [2/5], Validation Loss: 0.4880, Validation Accuracy: 73.96%


Epoch 3/5 (Training): 100%|██████████| 161/161 [00:39<00:00,  4.11batch/s, accuracy=0.742, loss=0.532]


Epoch [3/5], Training Loss: 0.5324, Training Accuracy: 74.16%


Epoch 3/5 (Validation): 100%|██████████| 157/157 [00:14<00:00, 10.88batch/s]


Epoch [3/5], Validation Loss: 0.4880, Validation Accuracy: 73.96%


Epoch 4/5 (Training): 100%|██████████| 161/161 [00:40<00:00,  3.97batch/s, accuracy=0.733, loss=0.54] 


Epoch [4/5], Training Loss: 0.5396, Training Accuracy: 73.31%


Epoch 4/5 (Validation): 100%|██████████| 157/157 [00:13<00:00, 11.31batch/s]


Epoch [4/5], Validation Loss: 0.4880, Validation Accuracy: 73.96%


Epoch 5/5 (Training): 100%|██████████| 161/161 [00:40<00:00,  3.98batch/s, accuracy=0.738, loss=0.533]


Epoch [5/5], Training Loss: 0.5331, Training Accuracy: 73.77%


Epoch 5/5 (Validation): 100%|██████████| 157/157 [00:13<00:00, 11.27batch/s]

Epoch [5/5], Validation Loss: 0.4880, Validation Accuracy: 73.96%





<h2> VGG16 Model

In [20]:
num_cpu_cores = os.cpu_count()
print(num_cpu_cores)

8


In [21]:
random_transforms = transforms.RandomApply([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.15, saturation=0.1, hue=0.1)
], p=0.2)  # Apply each transformation with 20% probability

# Use the provided vggtransforms for preprocessing
transform = transforms.Compose([
    random_transforms,               # Apply random transformations for data augmentation
    transforms.Resize((224, 224)),   # Resize images to 224x224
    transforms.ToTensor(),           # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  #preprocess images according to
    #imagenet numbers
])

#define train and valdataset again but with complex transformers
#need to redefine everything so the new transformers are applied correctly
train_dataset = ImageFolder(train_path, transform=transform)
val_dataset = ImageFolder(val_path, transform=transform)

batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_cpu_cores)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_cpu_cores)

subset_size = int(0.2 * len(train_dataset))

subset_indices = np.random.choice(len(train_dataset), subset_size, replace=False)

train_subset = Subset(train_dataset, subset_indices)

train_loader_subset = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=num_cpu_cores)

In [22]:
#load in weights from the model
weights_id = torchvision.models.VGG16_Weights.IMAGENET1K_V1

vggmodel = models.vgg16(weights=weights_id)
vggmodel.eval()  # Set the model to evaluation mode

for param in resnet_model.fc.parameters():
    param.requires_grad = True

# Modify the classifier layer
num_features = vggmodel.classifier[6].in_features
vggmodel.classifier[6] = nn.Linear(num_features, len(train_dataset.classes))

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vgg_model = vggmodel.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vggmodel.parameters(), lr=0.001)

In [23]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    vgg_model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    
    # Initialize tqdm progress bar
    with tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as t:
        for inputs, labels in t:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = vgg_model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

            # Update tqdm progress bar
            t.set_postfix(loss=running_loss / total_train, accuracy=correct_train / total_train)

    # Calculate training accuracy
    train_accuracy = correct_train / total_train

    # Validation phase
    vgg_model.eval()
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = vgg_model(inputs)
            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    # Calculate validation accuracy
    val_accuracy = correct_val / total_val

    # Print epoch statistics
    epoch_loss = running_loss / len(train_subset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Train Accuracy: {train_accuracy:.2%}, Val Accuracy: {val_accuracy:.2%}")

Epoch 1/5: 100%|██████████| 804/804 [24:25<00:00,  1.82s/batch, accuracy=0.796, loss=0.462]


Epoch [1/5], Loss: 2.3113, Train Accuracy: 79.55%, Val Accuracy: 84.58%


Epoch 2/5: 100%|██████████| 804/804 [23:28<00:00,  1.75s/batch, accuracy=0.807, loss=0.457]


Epoch [2/5], Loss: 2.2853, Train Accuracy: 80.66%, Val Accuracy: 83.95%


Epoch 3/5: 100%|██████████| 804/804 [23:53<00:00,  1.78s/batch, accuracy=0.804, loss=0.474]


Epoch [3/5], Loss: 2.3708, Train Accuracy: 80.42%, Val Accuracy: 82.35%


Epoch 4/5: 100%|██████████| 804/804 [22:53<00:00,  1.71s/batch, accuracy=0.807, loss=0.471]


Epoch [4/5], Loss: 2.3575, Train Accuracy: 80.72%, Val Accuracy: 84.50%


Epoch 5/5: 100%|██████████| 804/804 [24:06<00:00,  1.80s/batch, accuracy=0.809, loss=0.477]


Epoch [5/5], Loss: 2.3846, Train Accuracy: 80.91%, Val Accuracy: 80.43%


Epoch [5/5], Loss: 2.3846, Train Accuracy: 80.91%, Val Accuracy: 80.43% all transformations

In [11]:
# Set the model to evaluation mode
vggmodel.eval()

# Variables to keep track of accuracy and loss
val_loss = 0.0
val_correct = 0
val_total = 0

# Use tqdm for progress bar
with tqdm(val_loader, desc="Validation", unit="batch") as t:
    # Disable gradient computation for validation
    with torch.no_grad():
        # Iterate over the validation set
        for inputs, labels in t:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = vggmodel(inputs)
            loss = criterion(outputs, labels)

            # Compute loss
            val_loss += loss.item() * inputs.size(0)

            # Compute accuracy
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

            # Update tqdm progress bar
            t.set_postfix(loss=val_loss / val_total, accuracy=val_correct / val_total)

# Calculate average loss and accuracy
avg_val_loss = val_loss / len(val_dataset)
val_accuracy = val_correct / val_total

# Print validation results
print(f"Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.2%}")


Validation: 100%|██████████| 157/157 [04:57<00:00,  1.90s/batch, accuracy=0.811, loss=0.395]

Validation Loss: 0.3952, Validation Accuracy: 81.07%





<H2> ResNet Try

In [60]:
import torchvision.models as models
import torchvision.transforms.functional as TF

In [61]:
# Define settings
use_data_augmentation = True
use_batch_norm = True
use_weight_decay = True
use_subset_training = True
batch_size = 8
lr = 0.0001
num_epochs = 5

In [62]:
# Define transforms
random_transforms = transforms.RandomApply([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.15, saturation=0.1, hue=0.1)
], p=0.2) if use_data_augmentation else None

transform = transforms.Compose([
    random_transforms,
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [63]:
train_dataset = ImageFolder(train_path, transform=transform)
val_dataset = ImageFolder(val_path, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_cpu_cores)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_cpu_cores)

# Subset training data if enabled
if use_subset_training:
    subset_size = int(0.2 * len(train_dataset))
    subset_indices = np.random.choice(len(train_dataset), subset_size, replace=False)
    train_subset = Subset(train_dataset, subset_indices)
    train_loader_subset = DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=num_cpu_cores)
else:
    train_loader_subset = train_loader

In [65]:
# Load pre-trained ResNet model
resnet_model = models.resnet18(weights=True)

# Freeze the parameters of the model
if not use_batch_norm:
   # Unfreeze last layer
    for param in resnet_model.fc.parameters():
        param.requires_grad = True

# Add batch normalization if enabled
if use_batch_norm:
    for param in resnet_model.fc.parameters():
        param.requires_grad = True
    for module in resnet_model.modules():
        if isinstance(module, nn.Conv2d):
            module.add_module('batch_norm', nn.BatchNorm2d(module.out_channels))

In [66]:
# Modify the classifier layer
num_features = resnet_model.fc.in_features
resnet_model.fc = nn.Linear(num_features, len(train_dataset.classes))

# Move the model to the appropriate device
resnet_model = resnet_model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_params = {'lr': lr}
if use_weight_decay:
    optimizer_params['weight_decay'] = 0.0001
optimizer = optim.Adam(resnet_model.parameters(), **optimizer_params)

In [None]:
# Training loop
for epoch in range(num_epochs):
    resnet_model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    # Initialize tqdm progress bar
    with tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as t:
        for inputs, labels in t:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = resnet_model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

            # Update tqdm progress bar
            t.set_postfix(loss=running_loss / total_train, accuracy=correct_train / total_train)

    # Calculate training accuracy
    train_accuracy = correct_train / total_train

    # Validation phase
    resnet_model.eval()
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = resnet_model(inputs)
            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    # Calculate validation accuracy
    val_accuracy = correct_val / total_val

    # Print epoch statistics
    epoch_loss = running_loss / len(train_subset) if use_subset_training else running_loss / len(train_dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Train Accuracy: {train_accuracy:.2%}, Val Accuracy: {val_accuracy:.2%}")


Epoch 1/5: 100%|██████████| 804/804 [12:35<00:00,  1.06batch/s, accuracy=0.811, loss=0.414]


Epoch [1/5], Loss: 2.0690, Train Accuracy: 81.08%, Val Accuracy: 85.30%


Epoch 2/5: 100%|██████████| 804/804 [12:03<00:00,  1.11batch/s, accuracy=0.852, loss=0.337]


Epoch [2/5], Loss: 1.6858, Train Accuracy: 85.20%, Val Accuracy: 84.90%


Epoch 3/5: 100%|██████████| 804/804 [17:40<00:00,  1.32s/batch, accuracy=0.874, loss=0.292]


Epoch [3/5], Loss: 1.4610, Train Accuracy: 87.41%, Val Accuracy: 88.82%


Epoch 4/5: 100%|██████████| 804/804 [18:32<00:00,  1.38s/batch, accuracy=0.899, loss=0.252]


Epoch [4/5], Loss: 1.2588, Train Accuracy: 89.90%, Val Accuracy: 89.30%


Epoch 5/5:  27%|██▋       | 215/804 [05:18<14:00,  1.43s/batch, accuracy=0.922, loss=0.21] 

Epoch [5/5], Loss: 0.4807, Train Accuracy: 77.04%, Val Accuracy: 78.51% No transformers or pre-processing TRAIN SUBSET

Epoch [5/5], Loss: 0.4041, Train Accuracy: 79.77%, Val Accuracy: 83.15% transformers but no mean/std transform TRAIN SUBSET

Epoch [5/5], Loss: 0.3981, Train Accuracy: 81.32%, Val Accuracy: 81.39% Transformers with Mean/std transform TRAIN SUBSET

Epoch [5/5], Loss: 0.4406, Train Accuracy: 79.38%, Val Accuracy: 84.03% Transformers with Mean/std transform AND weight_decay in optimizer TRAIN SUBSET

Epoch [5/5], Loss: 2.1635, Train Accuracy: 79.46%, Val Accuracy: 82.99% All transformers and whole training set "higher" values on image augmentation

Epoch [5/5], Loss: 2.0784, Train Accuracy: 81.03%, Val Accuracy: 82.83%
Lowered image tranformations, lots of the misclassified images were very altered. Also lower % of pictures tranformed to 20%. WHOLE TRAINING SET 

Epoch [5/5], Loss: 2.0262, Train Accuracy: 81.61%, Val Accuracy: 82.11% Resized to 128, 128, batch_size 16. All transformers left as they were. Lower values on the image alteration. WHOLE TRAINING SET

Epoch [5/5], Loss: 1.9423, Train Accuracy: 82.56%, Val Accuracy: 84.19% Resize 224, 224, batch size 16, all transformers, added Batch_normalization after resnet_model is defined. WHOLE TRAINING SET

Epoch [5/5], Loss: 1.9692, Train Accuracy: 81.23%, Val Accuracy: 83.07%
Same as above but added a fully connected layer at the end. WHOLE TRAINING SET.

In [None]:
# Create a directory to save misclassified images
output_dir = 'misclassified_images'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Lists to store misclassified image paths, labels, and predictions
misclassified_image_paths = []
misclassified_labels = []
misclassified_predictions = []

# Validation phase
resnet_model.eval()
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = resnet_model(inputs)
        _, predicted = torch.max(outputs, 1)
        total_val += labels.size(0)
        correct_val += (predicted == labels).sum().item()
        
        # Find misclassified images
        misclassified_mask = predicted != labels
        misclassified_images = inputs[misclassified_mask]
        misclassified_labels.extend(labels[misclassified_mask].tolist())
        misclassified_predictions.extend(predicted[misclassified_mask].tolist())
        
        # Save misclassified images
        for i, image in enumerate(misclassified_images):
            image_path = os.path.join(output_dir, f'misclassified_{len(misclassified_image_paths) + 1}.png')
            misclassified_image_paths.append(image_path)
            
            # Normalize image pixel values to the range [0, 1]
            image = image.cpu().numpy().transpose((1, 2, 0))
            image = (image - image.min()) / (image.max() - image.min())
            
            plt.imsave(image_path, image)