In [1]:
!pip install opencv-python


Collecting opencv-python
  Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (19 kB)
Downloading opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (67.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opencv-python
Successfully installed opencv-python-4.12.0.88


In [2]:
# Standard library imports
import os
import sys
import json
import pickle
import random
import re
from glob import glob
from pathlib import Path

# Third-party library imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
import cv2
from tqdm import tqdm
import scipy

# PyTorch imports
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

# torchvision imports
import torchvision
from torchvision import transforms
from torchvision.transforms import v2
from torchvision.datasets import VisionDataset
import torchvision.transforms as T

# PIL imports
from PIL import Image

import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import Subset, DataLoader
from sklearn.metrics import roc_auc_score, accuracy_score, recall_score, precision_score
import scipy.stats as stats

In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Data Download and Preprocessing

In [4]:
os.chdir('/content/drive/MyDrive/dermoscopic_artifacts')
sys.path.append('/content/drive/MyDrive/dermoscopic_artifacts')
import importlib
import datasets
importlib.reload(datasets)
from datasets import ISICDataset, HAM10000Dataset, PH2Dataset, BCN20000Dataset

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32

In [14]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [9]:
import os
import shutil

src1 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1"
src2 = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2"
dst  = "/kaggle/working/HAM10000_images"

os.makedirs(dst, exist_ok=True)

def copy_all(src, dst):
    for filename in os.listdir(src):
        shutil.copy2(os.path.join(src, filename),
                     os.path.join(dst, filename))

copy_all(src1, dst)
copy_all(src2, dst)

print("Merged successfully into", dst)


Merged successfully into /kaggle/working/HAM10000_images


In [8]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("kmader/skin-cancer-mnist-ham10000")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'skin-cancer-mnist-ham10000' dataset.
Path to dataset files: /kaggle/input/skin-cancer-mnist-ham10000


# Eval on HAM10000 - Mode "whole"


In [30]:
class HAM10000Dataset(Dataset):
    def __init__(self, df, image_dir, mask_dir, transform=None, mode="whole", return_pil=False):
        """
        Args:
            df (pd.DataFrame): DataFrame containing image names and labels.
            image_dir (str): Directory containing original images.
            mask_dir (str): Directory containing ground truth segmentations.
            transform (callable, optional): Optional transform to apply to images.
            mode (str): One of "whole", "lesion", "background", "bbox", "bbox70",
                        "bbox90", "high_whole", "low_whole", "high_lesion",
                        "low_lesion", "high_background", "low_background".
        """
        self.df = df
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.mode = mode
        self.return_pil = return_pil

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # Load image
        img_name = self.df.iloc[idx]['image_id']

        # le = LabelEncoder()
        # df['label'] = le.fit_transform(df['dx'])
        # # print(le.classes_)  # saves mapping for inference
        # label = self.df.iloc[idx]['label']
        # print(label)

        # Binary label: 1 = melanoma, 0 = all other classes
        df['label'] = (df['dx'] == 'mel').astype(int)

        label = self.df.iloc[idx]['label']


        img_path = os.path.join(self.image_dir, f"{img_name}.jpg")
        mask_path = os.path.join(self.mask_dir, f"{img_name}_segmentation.png")

        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)  # Load segmentation mask

        # Ensure images and masks are the same size
        if image.shape[:2] != mask.shape:
            mask = cv2.resize(mask, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)

        # Binarize mask
        mask = (mask > 0).astype(np.uint8)

        if self.mode == "whole":
            processed_image = image

        elif self.mode == "lesion":
            processed_image = image * mask[:, :, np.newaxis]

        elif self.mode == "background":
            processed_image = image * (1 - mask[:, :, np.newaxis])

        elif self.mode in ["bbox", "bbox70", "bbox90"]:
            # Compute bounding box around lesion
            y_idxs, x_idxs = np.where(mask > 0)
            if len(y_idxs) == 0 or len(x_idxs) == 0:  # If no lesion
                processed_image = image * 0  # Blackout image
            else:
                y_min, y_max = y_idxs.min(), y_idxs.max()
                x_min, x_max = x_idxs.min(), x_idxs.max()

                # Compute the original bbox (for `bbox`)
                if self.mode == "bbox":
                    processed_image = image.copy()
                    cv2.rectangle(processed_image, (x_min, y_min), (x_max, y_max), (0, 0, 0), thickness=-1)

                # Expand bbox for bbox70 and bbox90
                else:
                    expand_ratio = 0.7 if self.mode == "bbox70" else 0.9

                    img_h, img_w = image.shape[:2]
                    bbox_h = y_max - y_min
                    bbox_w = x_max - x_min

                    # Calculate expansion to reach desired percentage of total image
                    target_area = expand_ratio * img_h * img_w
                    bbox_center_y, bbox_center_x = (y_min + y_max) // 2, (x_min + x_max) // 2

                    # Compute new bbox size
                    new_bbox_h = int(np.sqrt(target_area * (bbox_h / bbox_w)))  # Keep aspect ratio
                    new_bbox_w = int(np.sqrt(target_area * (bbox_w / bbox_h)))

                    # Ensure it fits within image boundaries
                    y_min = max(0, bbox_center_y - new_bbox_h // 2)
                    y_max = min(img_h, bbox_center_y + new_bbox_h // 2)
                    x_min = max(0, bbox_center_x - new_bbox_w // 2)
                    x_max = min(img_w, bbox_center_x + new_bbox_w // 2)

                    processed_image = image.copy()
                    cv2.rectangle(processed_image, (x_min, y_min), (x_max, y_max), (0, 0, 0), thickness=-1)

        elif self.mode.startswith("high_") or self.mode.startswith("low_"):
            base_image = None

            if "whole" in self.mode:
                base_image = image
            elif "lesion" in self.mode:
                base_image = image * mask[:, :, np.newaxis]
            elif "background" in self.mode:
                base_image = image * (1 - mask[:, :, np.newaxis])

            if base_image is not None:
                if "high_" in self.mode:
                    # processed_image = high_pass_filter(base_image)
                    processed_image = high_pass_filter(base_image, sigma=3, grayscale=True)
                else:
                    processed_image = low_pass_filter(base_image, sigma=3)

        if self.return_pil:
            processed_image = Image.fromarray(processed_image.astype(np.uint8))
        else:
            if self.transform:
                processed_image = Image.fromarray(processed_image)
                processed_image = self.transform(processed_image)
        label = torch.tensor(label, dtype=torch.long)

        return processed_image, label

# Define paths
image_dir = "/kaggle/working/HAM10000_images"
mask_dir = "/content/drive/MyDrive/dermoscopic_artifacts/HAM10000_segmentations_lesion_tschandl"

# Create dataset instances for each mode
dataset_modes = ["whole"]
#  ["whole", "lesion", "background", "bbox", "bbox70", "bbox90",
                #  "high_whole", "low_whole", "high_lesion", "low_lesion", "high_background", "low_background"]

In [31]:
df = pd.read_csv("/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv", index_col=0)

In [32]:
print(len(df))

10015


In [33]:
dataset_mode = "whole"
full_dataset = HAM10000Dataset(df, image_dir, mask_dir, transform=transform, mode=dataset_mode, return_pil=True)

In [34]:
dataset_mode = "whole"  # Change this as needed

all_metrics = {
    "AUROC": [],
    "Accuracy": [],
    "Recall": [],
    "Precision": []
}

# Directory containing saved models (ISIC models)
save_dir = f"/content/drive/MyDrive/dermoscopic_artifacts/classifiers/{dataset_mode}"

# Load dataset
full_ham_dataset = HAM10000Dataset(df, image_dir, mask_dir, transform=transform, mode=dataset_mode, return_pil=False)

ham_loader = DataLoader(full_ham_dataset, batch_size=32, shuffle=False)

store_preds = {}
store_labels = {}

# Loop through each split
for split in range(1, 6):
    print(f"\nEvaluating HAM dataset {dataset_mode} with ISIC Split {split} model")
    # print(df)

    # Get test indices
    # print(df)
    # test_indices = df[df[f"split_{split}"] == "test"].index.tolist()

    # # Create test dataset and DataLoader
    # test_dataset = full_dataset
    # # Subset(full_dataset, test_indices)
    # test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Load model
    model = models.resnet50(pretrained=False)  # Load model architecture
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 1),
        nn.Sigmoid()
    )

    model.load_state_dict(torch.load(f"{save_dir}/resnet50_split_{split}.pth"))
    model = model.to(device)
    model.eval()

    # Lists to store predictions and labels
    all_preds = []
    all_labels = []

    # Evaluation loop
    with torch.no_grad():
        for images, labels in tqdm(ham_loader, desc=f"Evaluating Split {split}"):
            images = images.to(device)
            labels = labels.cpu().numpy()  # Convert labels to NumPy array
            # print(labels)

            outputs = model(images).cpu().numpy()  # Get model predictions
            preds = outputs.flatten()  # Flatten predictions

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Convert lists to NumPy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Compute metrics
    auroc = roc_auc_score(all_labels, all_preds)
    acc = accuracy_score(all_labels, all_preds >= 0.5)
    recall = recall_score(all_labels, all_preds >= 0.5)
    precision = precision_score(all_labels, all_preds >= 0.5)

    # Store metrics
    all_metrics["AUROC"].append(auroc)
    all_metrics["Accuracy"].append(acc)
    all_metrics["Recall"].append(recall)
    all_metrics["Precision"].append(precision)

    store_preds[split] = all_preds
    store_labels[split] = all_labels

    print(f"Split {split} - AUROC: {auroc:.4f}, Accuracy: {acc:.4f}, Recall: {recall:.4f}, Precision: {precision:.4f}")

# Compute mean metrics across splits
mean_metrics = {metric: np.mean(values) for metric, values in all_metrics.items()}

# Print final results
print("\n===== Final Evaluation Results =====")
for metric, mean_value in mean_metrics.items():
    print(f"Mean {metric}: {mean_value:.4f}")


Evaluating HAM dataset whole with ISIC Split 1 model


Evaluating Split 1: 100%|██████████| 313/313 [1:02:22<00:00, 11.96s/it]


Split 1 - AUROC: 0.7272, Accuracy: 0.7896, Recall: 0.4924, Precision: 0.2622

Evaluating HAM dataset whole with ISIC Split 2 model


Evaluating Split 2: 100%|██████████| 313/313 [05:24<00:00,  1.04s/it]


Split 2 - AUROC: 0.7662, Accuracy: 0.8247, Recall: 0.4780, Precision: 0.3117

Evaluating HAM dataset whole with ISIC Split 3 model


Evaluating Split 3: 100%|██████████| 313/313 [05:31<00:00,  1.06s/it]


Split 3 - AUROC: 0.6934, Accuracy: 0.8356, Recall: 0.3684, Precision: 0.3030

Evaluating HAM dataset whole with ISIC Split 4 model


Evaluating Split 4: 100%|██████████| 313/313 [05:27<00:00,  1.05s/it]


Split 4 - AUROC: 0.7053, Accuracy: 0.8807, Recall: 0.1896, Precision: 0.4187

Evaluating HAM dataset whole with ISIC Split 5 model


Evaluating Split 5: 100%|██████████| 313/313 [05:10<00:00,  1.01it/s]

Split 5 - AUROC: 0.6737, Accuracy: 0.8531, Recall: 0.2767, Precision: 0.3162

===== Final Evaluation Results =====
Mean AUROC: 0.7132
Mean Accuracy: 0.8367
Mean Recall: 0.3610
Mean Precision: 0.3224





In [None]:
all_metrics


{'AUROC': [np.float64(0.7271907864471332),
  np.float64(0.7661955186181244),
  np.float64(0.6934470443158336),
  np.float64(0.7052605156719983),
  np.float64(0.673744939152755)],
 'Accuracy': [0.7896155766350474,
  0.8246630054917623,
  0.8356465302046929,
  0.8806789815277084,
  0.8531203195207189],
 'Recall': [0.49236298292902064,
  0.4779874213836478,
  0.3683737646001797,
  0.18957771787960467,
  0.27672955974842767],
 'Precision': [0.26220095693779905,
  0.31165787932044525,
  0.30303030303030304,
  0.41865079365079366,
  0.3162217659137577]}

In [None]:
with open (f"{save_dir}/all_metrics_ham10000.pkl", "wb") as f:
    pickle.dump(all_metrics, f)

In [None]:
with open (f"{save_dir}/store_preds_ham10000.pkl", "wb") as f:
    pickle.dump(store_preds, f)

In [None]:
with open (f"{save_dir}/store_labels_ham10000.pkl", "wb") as f:
    pickle.dump(store_labels, f)

In [None]:
dataset_modes = ["whole"]
# , "lesion", "background", "bbox", "bbox70", "bbox90",
#                  "high_whole", "low_whole", "high_lesion", "low_lesion", "high_background", "low_background"]

mean_results = {}

# Iterate through each artifact and compute mean metrics
for mode in dataset_modes:
    save_dir = f"/content/drive/MyDrive/dermoscopic_artifacts/classifiers/{mode}"
    with open(f"{save_dir}/all_metrics_ham10000.pkl", "rb") as f:
        results_dict = pickle.load(f)

    # Compute mean for each metric
    mean_results[mode] = {metric: np.mean(values) for metric, values in results_dict.items()}

# Convert to DataFrame for easy viewing
df_mean_results = pd.DataFrame.from_dict(mean_results, orient="index")

In [None]:
df_mean_results

Unnamed: 0,AUROC,Accuracy,Recall,Precision
whole,0.713168,0.836745,0.361006,0.322352


# Eval on HAM10000 - Mode "low_background"

In [35]:
class HAM10000Dataset(Dataset):
    def __init__(self, df, image_dir, mask_dir, transform=None, mode="low_background", return_pil=False):
        """
        Args:
            df (pd.DataFrame): DataFrame containing image names and labels.
            image_dir (str): Directory containing original images.
            mask_dir (str): Directory containing ground truth segmentations.
            transform (callable, optional): Optional transform to apply to images.
            mode (str): One of "whole", "lesion", "background", "bbox", "bbox70",
                        "bbox90", "high_whole", "low_whole", "high_lesion",
                        "low_lesion", "high_background", "low_background".
        """
        self.df = df
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.mode = mode
        self.return_pil = return_pil

    def __len__(self):
        return len(self.df)

    def low_pass_filter(self, image, sigma=1):
      return scipy.ndimage.gaussian_filter(image, sigma=sigma)

    def __getitem__(self, idx):
        # Load image
        img_name = self.df.iloc[idx]['image_id']

        # le = LabelEncoder()
        # df['label'] = le.fit_transform(df['dx'])
        # # print(le.classes_)  # saves mapping for inference
        # label = self.df.iloc[idx]['label']
        # print(label)

        # Binary label: 1 = melanoma, 0 = all other classes
        df['label'] = (df['dx'] == 'mel').astype(int)

        label = self.df.iloc[idx]['label']


        img_path = os.path.join(self.image_dir, f"{img_name}.jpg")
        mask_path = os.path.join(self.mask_dir, f"{img_name}_segmentation.png")

        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)  # Load segmentation mask

        # Ensure images and masks are the same size
        if image.shape[:2] != mask.shape:
            mask = cv2.resize(mask, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)

        # Binarize mask
        mask = (mask > 0).astype(np.uint8)

        if self.mode == "whole":
            processed_image = image

        elif self.mode == "lesion":
            processed_image = image * mask[:, :, np.newaxis]

        elif self.mode == "background":
            processed_image = image * (1 - mask[:, :, np.newaxis])

        elif self.mode in ["bbox", "bbox70", "bbox90"]:
            # Compute bounding box around lesion
            y_idxs, x_idxs = np.where(mask > 0)
            if len(y_idxs) == 0 or len(x_idxs) == 0:  # If no lesion
                processed_image = image * 0  # Blackout image
            else:
                y_min, y_max = y_idxs.min(), y_idxs.max()
                x_min, x_max = x_idxs.min(), x_idxs.max()

                # Compute the original bbox (for `bbox`)
                if self.mode == "bbox":
                    processed_image = image.copy()
                    cv2.rectangle(processed_image, (x_min, y_min), (x_max, y_max), (0, 0, 0), thickness=-1)

                # Expand bbox for bbox70 and bbox90
                else:
                    expand_ratio = 0.7 if self.mode == "bbox70" else 0.9

                    img_h, img_w = image.shape[:2]
                    bbox_h = y_max - y_min
                    bbox_w = x_max - x_min

                    # Calculate expansion to reach desired percentage of total image
                    target_area = expand_ratio * img_h * img_w
                    bbox_center_y, bbox_center_x = (y_min + y_max) // 2, (x_min + x_max) // 2

                    # Compute new bbox size
                    new_bbox_h = int(np.sqrt(target_area * (bbox_h / bbox_w)))  # Keep aspect ratio
                    new_bbox_w = int(np.sqrt(target_area * (bbox_w / bbox_h)))

                    # Ensure it fits within image boundaries
                    y_min = max(0, bbox_center_y - new_bbox_h // 2)
                    y_max = min(img_h, bbox_center_y + new_bbox_h // 2)
                    x_min = max(0, bbox_center_x - new_bbox_w // 2)
                    x_max = min(img_w, bbox_center_x + new_bbox_w // 2)

                    processed_image = image.copy()
                    cv2.rectangle(processed_image, (x_min, y_min), (x_max, y_max), (0, 0, 0), thickness=-1)

        elif self.mode.startswith("high_") or self.mode.startswith("low_"):
            base_image = None

            if "whole" in self.mode:
                base_image = image
            elif "lesion" in self.mode:
                base_image = image * mask[:, :, np.newaxis]
            elif "background" in self.mode:
                base_image = image * (1 - mask[:, :, np.newaxis])

            if base_image is not None:
                if "high_" in self.mode:
                    # processed_image = high_pass_filter(base_image)
                    processed_image = high_pass_filter(base_image, sigma=3, grayscale=True)
                else:
                    processed_image = self.low_pass_filter(base_image, sigma=3)

        if self.return_pil:
            processed_image = Image.fromarray(processed_image.astype(np.uint8))
        else:
            if self.transform:
                processed_image = Image.fromarray(processed_image)
                processed_image = self.transform(processed_image)
        label = torch.tensor(label, dtype=torch.long)

        return processed_image, label

# Define paths
image_dir = "/kaggle/working/HAM10000_images"
mask_dir = "/content/drive/MyDrive/dermoscopic_artifacts/HAM10000_segmentations_lesion_tschandl"

# Create dataset instances for each mode
dataset_modes = ["low_background"]
#  ["whole", "lesion", "background", "bbox", "bbox70", "bbox90",
                #  "high_whole", "low_whole", "high_lesion", "low_lesion", "high_background", "low_background"]

In [36]:
df = pd.read_csv("/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv", index_col=0)

In [37]:
dataset_mode = "low_background"
full_dataset = HAM10000Dataset(df, image_dir, mask_dir, transform=transform, mode=dataset_mode, return_pil=True)

In [38]:


all_metrics = {
    "AUROC": [],
    "Accuracy": [],
    "Recall": [],
    "Precision": []
}

# Directory containing saved models (ISIC models)
save_dir = f"/content/drive/MyDrive/classifiers/{dataset_mode}"

# Load dataset
full_ham_dataset = HAM10000Dataset(df, image_dir, mask_dir, transform=transform, mode=dataset_mode, return_pil=False)

ham_loader = DataLoader(full_ham_dataset, batch_size=32, shuffle=False)

store_preds = {}
store_labels = {}

# Loop through each split
for split in range(1, 6):
    print(f"\nEvaluating HAM dataset {dataset_mode} with ISIC Split {split} model")
    # print(df)

    # Get test indices
    # print(df)
    # test_indices = df[df[f"split_{split}"] == "test"].index.tolist()

    # # Create test dataset and DataLoader
    # test_dataset = full_dataset
    # # Subset(full_dataset, test_indices)
    # test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Load model
    model = models.resnet50(pretrained=False)  # Load model architecture
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 1),
        nn.Sigmoid()
    )

    model.load_state_dict(torch.load(f"{save_dir}/resnet50_split_{split}.pth"))
    model = model.to(device)
    model.eval()

    # Lists to store predictions and labels
    all_preds = []
    all_labels = []

    # Evaluation loop
    with torch.no_grad():
        for images, labels in tqdm(ham_loader, desc=f"Evaluating Split {split}"):
            images = images.to(device)
            labels = labels.cpu().numpy()  # Convert labels to NumPy array
            # print(labels)

            outputs = model(images).cpu().numpy()  # Get model predictions
            preds = outputs.flatten()  # Flatten predictions

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Convert lists to NumPy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Compute metrics
    auroc = roc_auc_score(all_labels, all_preds)
    acc = accuracy_score(all_labels, all_preds >= 0.5)
    recall = recall_score(all_labels, all_preds >= 0.5)
    precision = precision_score(all_labels, all_preds >= 0.5)

    # Store metrics
    all_metrics["AUROC"].append(auroc)
    all_metrics["Accuracy"].append(acc)
    all_metrics["Recall"].append(recall)
    all_metrics["Precision"].append(precision)

    store_preds[split] = all_preds
    store_labels[split] = all_labels

    print(f"Split {split} - AUROC: {auroc:.4f}, Accuracy: {acc:.4f}, Recall: {recall:.4f}, Precision: {precision:.4f}")

# Compute mean metrics across splits
mean_metrics = {metric: np.mean(values) for metric, values in all_metrics.items()}

# Print final results
print("\n===== Final Evaluation Results =====")
for metric, mean_value in mean_metrics.items():
    print(f"Mean {metric}: {mean_value:.4f}")


Evaluating HAM dataset low_background with ISIC Split 1 model


Evaluating Split 1: 100%|██████████| 313/313 [10:24<00:00,  1.99s/it]


Split 1 - AUROC: 0.6638, Accuracy: 0.7779, Recall: 0.3801, Precision: 0.2161

Evaluating HAM dataset low_background with ISIC Split 2 model


Evaluating Split 2: 100%|██████████| 313/313 [10:27<00:00,  2.00s/it]


Split 2 - AUROC: 0.5912, Accuracy: 0.7556, Recall: 0.2740, Precision: 0.1568

Evaluating HAM dataset low_background with ISIC Split 3 model


Evaluating Split 3: 100%|██████████| 313/313 [10:25<00:00,  2.00s/it]


Split 3 - AUROC: 0.6046, Accuracy: 0.8756, Recall: 0.0665, Precision: 0.2633

Evaluating HAM dataset low_background with ISIC Split 4 model


Evaluating Split 4: 100%|██████████| 313/313 [10:25<00:00,  2.00s/it]


Split 4 - AUROC: 0.6402, Accuracy: 0.6930, Recall: 0.5031, Precision: 0.1817

Evaluating HAM dataset low_background with ISIC Split 5 model


Evaluating Split 5: 100%|██████████| 313/313 [10:23<00:00,  1.99s/it]

Split 5 - AUROC: 0.6710, Accuracy: 0.8037, Recall: 0.3100, Precision: 0.2236

===== Final Evaluation Results =====
Mean AUROC: 0.6342
Mean Accuracy: 0.7811
Mean Recall: 0.3067
Mean Precision: 0.2083





In [39]:
all_metrics

{'AUROC': [np.float64(0.6638349438621161),
  np.float64(0.5911587854006983),
  np.float64(0.6046240656218063),
  np.float64(0.6401880171490987),
  np.float64(0.6710381668171522)],
 'Accuracy': [0.7779331003494758,
  0.7555666500249626,
  0.8755866200698952,
  0.6929605591612581,
  0.8036944583125312],
 'Recall': [0.38005390835579517,
  0.27403414195867026,
  0.0664869721473495,
  0.5031446540880503,
  0.30997304582210244],
 'Precision': [0.21614716402657128,
  0.15681233933161953,
  0.26334519572953735,
  0.18170019467878,
  0.2235904082955282]}

In [43]:
with open (f"{save_dir}/all_metrics_ham10000_low_bg.pkl", "wb") as f:
    pickle.dump(all_metrics, f)

In [44]:
with open (f"{save_dir}/store_preds_ham10000_low_bg.pkl", "wb") as f:
    pickle.dump(store_preds, f)

In [45]:
with open (f"{save_dir}/store_labels_ham10000_low_bg.pkl", "wb") as f:
    pickle.dump(store_labels, f)

# Eval on HAM10000 - Mode "high_lesion"

In [11]:
class HAM10000Dataset(Dataset):
    def __init__(self, df, image_dir, mask_dir, transform=None, mode="high_lesion", return_pil=False):
        """
        Args:
            df (pd.DataFrame): DataFrame containing image names and labels.
            image_dir (str): Directory containing original images.
            mask_dir (str): Directory containing ground truth segmentations.
            transform (callable, optional): Optional transform to apply to images.
            mode (str): One of "whole", "lesion", "background", "bbox", "bbox70",
                        "bbox90", "high_whole", "low_whole", "high_lesion",
                        "low_lesion", "high_background", "low_background".
        """
        self.df = df
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.mode = mode
        self.return_pil = return_pil

    def __len__(self):
        return len(self.df)

    def low_pass_filter(self, image, sigma=1):
      return scipy.ndimage.gaussian_filter(image, sigma=sigma)

    def high_pass_filter(self, image, sigma=1, grayscale=False):
      """
      Apply a high-pass filter to an image.

      Args:
          image (numpy.ndarray): Input image in RGB format.
          sigma (float): Standard deviation for Gaussian blur.
          grayscale (bool): If True, converts image to grayscale before filtering.

      Returns:
          numpy.ndarray: High-pass filtered image.
      """
      if grayscale:
          # Convert image to grayscale before filtering (avoids color artifacts)
          image_gray = np.dot(image[..., :3], [0.2989, 0.587, 0.114])  # Convert to grayscale
          low_frequencies = scipy.ndimage.gaussian_filter(image_gray, sigma=sigma)
          high_frequencies = image_gray - low_frequencies
          return np.stack([high_frequencies] * 3, axis=-1)  # Expand back to 3 channels for visualization

      else:
          # Apply filter to each RGB channel separately
          high_frequencies = np.zeros_like(image, dtype=np.float32)
          for c in range(3):  # Iterate over RGB channels
              low_frequencies = scipy.ndimage.gaussian_filter(image[:, :, c], sigma=sigma)
              high_frequencies[:, :, c] = image[:, :, c] - low_frequencies

          return high_frequencies

    def __getitem__(self, idx):
        # Load image
        img_name = self.df.iloc[idx]['image_id']

        # le = LabelEncoder()
        # df['label'] = le.fit_transform(df['dx'])
        # # print(le.classes_)  # saves mapping for inference
        # label = self.df.iloc[idx]['label']
        # print(label)

        # Binary label: 1 = melanoma, 0 = all other classes
        df['label'] = (df['dx'] == 'mel').astype(int)

        label = self.df.iloc[idx]['label']


        img_path = os.path.join(self.image_dir, f"{img_name}.jpg")
        mask_path = os.path.join(self.mask_dir, f"{img_name}_segmentation.png")

        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)  # Load segmentation mask

        # Ensure images and masks are the same size
        if image.shape[:2] != mask.shape:
            mask = cv2.resize(mask, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)

        # Binarize mask
        mask = (mask > 0).astype(np.uint8)

        if self.mode == "whole":
            processed_image = image

        elif self.mode == "lesion":

          #  processed_image = image * mask[:, :, np.newaxis]
          mask = mask.astype(np.float32)
          processed_image = (image.astype(np.float32) * mask[:, :, None])
          processed_image = processed_image.clip(0, 255).astype(np.uint8)


        elif self.mode == "background":
            processed_image = image * (1 - mask[:, :, np.newaxis])

        elif self.mode in ["bbox", "bbox70", "bbox90"]:
            # Compute bounding box around lesion
            y_idxs, x_idxs = np.where(mask > 0)
            if len(y_idxs) == 0 or len(x_idxs) == 0:  # If no lesion
                processed_image = image * 0  # Blackout image
            else:
                y_min, y_max = y_idxs.min(), y_idxs.max()
                x_min, x_max = x_idxs.min(), x_idxs.max()

                # Compute the original bbox (for `bbox`)
                if self.mode == "bbox":
                    processed_image = image.copy()
                    cv2.rectangle(processed_image, (x_min, y_min), (x_max, y_max), (0, 0, 0), thickness=-1)

                # Expand bbox for bbox70 and bbox90
                else:
                    expand_ratio = 0.7 if self.mode == "bbox70" else 0.9

                    img_h, img_w = image.shape[:2]
                    bbox_h = y_max - y_min
                    bbox_w = x_max - x_min

                    # Calculate expansion to reach desired percentage of total image
                    target_area = expand_ratio * img_h * img_w
                    bbox_center_y, bbox_center_x = (y_min + y_max) // 2, (x_min + x_max) // 2

                    # Compute new bbox size
                    new_bbox_h = int(np.sqrt(target_area * (bbox_h / bbox_w)))  # Keep aspect ratio
                    new_bbox_w = int(np.sqrt(target_area * (bbox_w / bbox_h)))

                    # Ensure it fits within image boundaries
                    y_min = max(0, bbox_center_y - new_bbox_h // 2)
                    y_max = min(img_h, bbox_center_y + new_bbox_h // 2)
                    x_min = max(0, bbox_center_x - new_bbox_w // 2)
                    x_max = min(img_w, bbox_center_x + new_bbox_w // 2)

                    processed_image = image.copy()
                    cv2.rectangle(processed_image, (x_min, y_min), (x_max, y_max), (0, 0, 0), thickness=-1)

        elif self.mode.startswith("high_") or self.mode.startswith("low_"):
            base_image = None

            if "whole" in self.mode:
                base_image = image
            elif "lesion" in self.mode:
                base_image = (image.astype(np.float32) * mask[:, :, None])
                # image * mask[:, :, np.newaxis]
            elif "background" in self.mode:
                base_image = image * (1 - mask[:, :, np.newaxis])

            if base_image is not None:
                if "high_" in self.mode:
                    # processed_image = self.high_pass_filter(base_image)
                    processed_image = self.high_pass_filter(base_image, sigma=3, grayscale=True)
                else:
                    processed_image = self.low_pass_filter(base_image, sigma=3)

        processed_image = np.clip(processed_image, 0, 255).astype(np.uint8)

        if self.return_pil:
            processed_image = Image.fromarray(processed_image.astype(np.uint8))
        else:
            if self.transform:
                processed_image = Image.fromarray(processed_image)
                processed_image = self.transform(processed_image)
        label = torch.tensor(label, dtype=torch.long)

        return processed_image, label

# Define paths
image_dir = "/kaggle/working/HAM10000_images"
mask_dir = "/content/drive/MyDrive/dermoscopic_artifacts/HAM10000_segmentations_lesion_tschandl"

# Create dataset instances for each mode
dataset_modes = ["high_lesion"]
#  ["whole", "lesion", "background", "bbox", "bbox70", "bbox90",
                #  "high_whole", "low_whole", "high_lesion", "low_lesion", "high_background", "low_background"]

In [12]:
df = pd.read_csv("/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv", index_col=0)

In [15]:
dataset_mode = "high_lesion"
full_dataset = HAM10000Dataset(df, image_dir, mask_dir, transform=transform, mode=dataset_mode, return_pil=True)

In [18]:


all_metrics = {
    "AUROC": [],
    "Accuracy": [],
    "Recall": [],
    "Precision": []
}

# Directory containing saved models (ISIC models)
save_dir = f"/content/drive/MyDrive/dermoscopic_artifacts/classifiers/{dataset_mode}"

# Load dataset
full_ham_dataset = HAM10000Dataset(df, image_dir, mask_dir, transform=transform, mode=dataset_mode, return_pil=False)

ham_loader = DataLoader(full_ham_dataset, batch_size=32, shuffle=False)

store_preds = {}
store_labels = {}

# Loop through each split
for split in range(1, 6):
    print(f"\nEvaluating HAM dataset {dataset_mode} with ISIC Split {split} model")
    # print(df)

    # Get test indices
    # print(df)
    # test_indices = df[df[f"split_{split}"] == "test"].index.tolist()

    # # Create test dataset and DataLoader
    # test_dataset = full_dataset
    # # Subset(full_dataset, test_indices)
    # test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Load model
    model = models.resnet50(pretrained=False)  # Load model architecture
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 1),
        nn.Sigmoid()
    )

    model.load_state_dict(torch.load(f"{save_dir}/resnet50_split_{split}.pth"))
    model = model.to(device)
    model.eval()

    # Lists to store predictions and labels
    all_preds = []
    all_labels = []

    # Evaluation loop
    with torch.no_grad():
        for images, labels in tqdm(ham_loader, desc=f"Evaluating Split {split}"):
            images = images.to(device)
            labels = labels.cpu().numpy()  # Convert labels to NumPy array
            # print(labels)

            outputs = model(images).cpu().numpy()  # Get model predictions
            preds = outputs.flatten()  # Flatten predictions

            all_preds.extend(preds)
            all_labels.extend(labels)

    # Convert lists to NumPy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Compute metrics
    auroc = roc_auc_score(all_labels, all_preds)
    acc = accuracy_score(all_labels, all_preds >= 0.5)
    recall = recall_score(all_labels, all_preds >= 0.5)
    precision = precision_score(all_labels, all_preds >= 0.5)

    # Store metrics
    all_metrics["AUROC"].append(auroc)
    all_metrics["Accuracy"].append(acc)
    all_metrics["Recall"].append(recall)
    all_metrics["Precision"].append(precision)

    store_preds[split] = all_preds
    store_labels[split] = all_labels

    print(f"Split {split} - AUROC: {auroc:.4f}, Accuracy: {acc:.4f}, Recall: {recall:.4f}, Precision: {precision:.4f}")

# Compute mean metrics across splits
mean_metrics = {metric: np.mean(values) for metric, values in all_metrics.items()}

# Print final results
print("\n===== Final Evaluation Results =====")
for metric, mean_value in mean_metrics.items():
    print(f"Mean {metric}: {mean_value:.4f}")


Evaluating HAM dataset high_lesion with ISIC Split 1 model


Evaluating Split 1: 100%|██████████| 313/313 [07:39<00:00,  1.47s/it]


Split 1 - AUROC: 0.5837, Accuracy: 0.5977, Recall: 0.5085, Precision: 0.1398

Evaluating HAM dataset high_lesion with ISIC Split 2 model


Evaluating Split 2: 100%|██████████| 313/313 [07:41<00:00,  1.47s/it]


Split 2 - AUROC: 0.6207, Accuracy: 0.8476, Recall: 0.1456, Precision: 0.2198

Evaluating HAM dataset high_lesion with ISIC Split 3 model


Evaluating Split 3: 100%|██████████| 313/313 [07:45<00:00,  1.49s/it]


Split 3 - AUROC: 0.6177, Accuracy: 0.5852, Recall: 0.5822, Precision: 0.1494

Evaluating HAM dataset high_lesion with ISIC Split 4 model


Evaluating Split 4: 100%|██████████| 313/313 [07:42<00:00,  1.48s/it]


Split 4 - AUROC: 0.6253, Accuracy: 0.7663, Recall: 0.3252, Precision: 0.1855

Evaluating HAM dataset high_lesion with ISIC Split 5 model


Evaluating Split 5: 100%|██████████| 313/313 [07:47<00:00,  1.49s/it]

Split 5 - AUROC: 0.5983, Accuracy: 0.8606, Recall: 0.0889, Precision: 0.2058

===== Final Evaluation Results =====
Mean AUROC: 0.6091
Mean Accuracy: 0.7315
Mean Recall: 0.3301
Mean Precision: 0.1801





In [19]:
all_metrics

{'AUROC': [np.float64(0.5836651383952606),
  np.float64(0.6206949365588722),
  np.float64(0.6176939553242525),
  np.float64(0.6253030654447762),
  np.float64(0.598330871667794)],
 'Accuracy': [0.5977034448327508,
  0.8476285571642537,
  0.5852221667498752,
  0.7662506240639041,
  0.8606090863704443],
 'Recall': [0.5085354896675651,
  0.14555256064690028,
  0.5822102425876011,
  0.3252470799640611,
  0.0889487870619946],
 'Precision': [0.13982213438735178,
  0.2198100407055631,
  0.1494120359695642,
  0.18545081967213115,
  0.20582120582120583]}

In [20]:
with open (f"{save_dir}/all_metrics_ham10000_high_lesion.pkl", "wb") as f:
    pickle.dump(all_metrics, f)

In [21]:
with open (f"{save_dir}/store_preds_ham10000_high_lesion.pkl", "wb") as f:
    pickle.dump(store_preds, f)

In [22]:
with open (f"{save_dir}/store_labels_ham10000_high_lesion.pkl", "wb") as f:
    pickle.dump(store_labels, f)

In [29]:
dataset_modes = ["whole", "high_lesion", "low_background"]
# , "lesion", "background", "bbox", "bbox70", "bbox90",
#                  "high_whole", "low_whole", "high_lesion", "low_lesion", "high_background", "low_background"]

mean_results = {}

# Iterate through each artifact and compute mean metrics
for mode in dataset_modes:
    save_dir = f"/content/drive/MyDrive/dermoscopic_artifacts/classifiers/{mode}"
    with open(f"{save_dir}/all_metrics_ham10000_{mode}.pkl", "rb") as f:
        results_dict = pickle.load(f)

    # Compute mean for each metric
    mean_results[mode] = {metric: np.mean(values) for metric, values in results_dict.items()}

# Convert to DataFrame for easy viewing
df_mean_results_ham10000 = pd.DataFrame.from_dict(mean_results, orient="index")

In [30]:
df_mean_results_ham10000

Unnamed: 0,AUROC,Accuracy,Recall,Precision
whole,0.713168,0.836745,0.361006,0.322352
high_lesion,0.609138,0.731483,0.330099,0.180063
low_background,0.634169,0.781148,0.306739,0.208319
