<a href="https://colab.research.google.com/github/GuldinN/Masterthesis1/blob/main/Kopie_von_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installs and Imports

In [None]:
!nvidia-smi

In [None]:
!pip install -q datasets transformers evaluate
!pip install -q albumentations

# Download Datasets

## Function: Download with progress bar

In [None]:
import requests

# Function to download a file with a progress bar
def download_with_progress(url, output_path):
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    chunk_size = 1024  # Size of the chunks in bytes (1 KB)

    with open(output_path, 'wb') as file, tqdm(
        desc=f'Downloading {os.path.basename(output_path)}',
        total=total_size,
        unit='B',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in response.iter_content(chunk_size=chunk_size):
            file.write(data)
            bar.update(len(data))

## ISIC2019

In [None]:
import os
import zipfile
from tqdm.notebook import tqdm

# URLs for the 2019 dataset
images_url = 'https://isic-challenge-data.s3.amazonaws.com/2019/ISIC_2019_Training_Input.zip'
metadata_url = 'https://isic-challenge-data.s3.amazonaws.com/2019/ISIC_2019_Training_GroundTruth.csv'

# Define the folder structure
output_folder = 'Datasources/ISIC2019'
images_folder = os.path.join(output_folder, 'images')
metadata_csv_path = os.path.join(output_folder, 'ISIC_2019_Training_GroundTruth.csv')

# Check if images need to be downloaded
if not os.path.exists(images_folder):
    os.makedirs(images_folder)

    # Download images
    images_zip_path = os.path.join(output_folder, 'images.zip')
    download_with_progress(images_url, images_zip_path)
    print("Images successfully downloaded.")

    # Extract images
    with zipfile.ZipFile(images_zip_path, 'r') as z:
        z.extractall(images_folder)
        print("Images successfully extracted to:", images_folder)
    os.remove(images_zip_path)  # Delete ZIP file after extraction
else:
    print(f"The folder '{images_folder}' already exists. Image download skipped.")

# Check if metadata needs to be downloaded
if not os.path.exists(metadata_csv_path):
    # Download metadata
    download_with_progress(metadata_url, metadata_csv_path)
    print("Metadata successfully downloaded and saved under:", metadata_csv_path)
else:
    print("The metadata file already exists. Metadata download skipped.")

## PAD-UFES-20

In [None]:
import shutil

output_folder = 'Datasources/PADUFES20'
data_url = 'https://prod-dcd-datasets-cache-zipfiles.s3.eu-west-1.amazonaws.com/zr7vgbcyr2-1.zip'

if not os.path.exists(output_folder):
  os.makedirs(output_folder)

  # Download data
  images_zip_path = os.path.join(output_folder, 'images.zip')
  download_with_progress(data_url, images_zip_path)
  print("Images successfully downloaded.")

  # Extract data
  with zipfile.ZipFile(images_zip_path, 'r') as z:
      z.extractall(output_folder)
      print("Data successfully extracted to:", output_folder)
  os.remove(images_zip_path)  # Delete ZIP file after extraction

  zf = zipfile.ZipFile('/content/Datasources/PADUFES20/images/imgs_part_1.zip', 'r')
  zf.extractall('/content/Datasources/PADUFES20/images')
  print("Images 1 successfully extracted to:", '/content/Datasources/PADUFES20/images')
  os.remove('/content/Datasources/PADUFES20/images/imgs_part_1.zip')  # Delete ZIP file after extraction

  zf = zipfile.ZipFile('/content/Datasources/PADUFES20/images/imgs_part_2.zip', 'r')
  zf.extractall('/content/Datasources/PADUFES20/images')
  print("Images 2 successfully extracted to:", '/content/Datasources/PADUFES20/images')
  os.remove('/content/Datasources/PADUFES20/images/imgs_part_2.zip')  # Delete ZIP file after extraction

  zf = zipfile.ZipFile('/content/Datasources/PADUFES20/images/imgs_part_3.zip', 'r')
  zf.extractall('/content/Datasources/PADUFES20/images')
  print("Images 3 successfully extracted to:", '/content/Datasources/PADUFES20/images')
  os.remove('/content/Datasources/PADUFES20/images/imgs_part_3.zip')  # Delete ZIP file after extraction

  #Move files
  source = "/content/Datasources/PADUFES20/images/imgs_part_1"
  destination = "/content/Datasources/PADUFES20/images"
  files = os.listdir(source)
  for file in files:
    file_name = os.path.join(source, file)
    shutil.move(file_name, destination)
  print("Files 1 Moved")
  os.rmdir("/content/Datasources/PADUFES20/images/imgs_part_1")

  source = "/content/Datasources/PADUFES20/images/imgs_part_2"
  destination = "/content/Datasources/PADUFES20/images"
  files = os.listdir(source)
  for file in files:
    file_name = os.path.join(source, file)
    shutil.move(file_name, destination)
  print("Files 2 Moved")
  os.rmdir("/content/Datasources/PADUFES20/images/imgs_part_2")

  source = "/content/Datasources/PADUFES20/images/imgs_part_3"
  destination = "/content/Datasources/PADUFES20/images"
  files = os.listdir(source)
  for file in files:
    file_name = os.path.join(source, file)
    shutil.move(file_name, destination)
  print("Files 3 Moved")
  os.rmdir("/content/Datasources/PADUFES20/images/imgs_part_3")

## Analyze Datasources

In [None]:
import pandas as pd

# Load CSV and map labels
csv_path = '/content/Datasources/ISIC2019/ISIC_2019_Training_GroundTruth.csv'
images_folder = '/content/Datasources/ISIC2019/images/ISIC_2019_Training_Input'
ISIC2019_df = pd.read_csv(csv_path)

label_mapping = {
    'MEL': 'MEL',
    'NV': 'NEV',
    'BCC': 'BCC',
    'AK': 'ACK',
    'BKL': 'BKL',
    'DF': 'DF',
    'VASC': 'VASC',
    'SCC': 'SCC',
    'UNK': 'UNK'
}

# Map labels to single label column
ISIC2019_df['label'] = ISIC2019_df[['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC', 'UNK']].idxmax(axis=1).map(label_mapping)

# Create image paths
ISIC2019_df['image_path'] = ISIC2019_df['image'].apply(lambda x: os.path.join(images_folder, f"{x}.jpg"))
#ISIC2019_df['image_id'] = ISIC2019_df['image']

# Filter necessary columns
ISIC2019_df = ISIC2019_df[['image_path', 'label']]

# Display class distribution (absolute and relative)
class_distribution = ISIC2019_df['label'].value_counts()
class_distribution_relative = ISIC2019_df['label'].value_counts(normalize=True) * 100

print(f"{len(ISIC2019_df)} images")
print("\nClass Distribution (absolute):")
print(class_distribution)
print("\nClass Distribution (relative %):")
print(class_distribution_relative)

ISIC2019_df

In [None]:
# Load CSV and map labels
csv_path = '/content/Datasources/PADUFES20/metadata.csv'
images_folder = '/content/Datasources/PADUFES20/images'
PADUFES20_df = pd.read_csv(csv_path)

# Map labels to single label column
PADUFES20_df['label'] = PADUFES20_df['diagnostic']

# Create image paths
#PADUFES20_df['image_id'] = PADUFES20_df['img_id']
PADUFES20_df['image_path'] = PADUFES20_df['img_id'].apply(lambda x: os.path.join(images_folder, f"{x}"))


# Filter necessary columns
PADUFES20_df = PADUFES20_df[['image_path', 'label']]

# Display class distribution (absolute and relative)
class_distribution = PADUFES20_df['label'].value_counts()
class_distribution_relative = PADUFES20_df['label'].value_counts(normalize=True) * 100

print(f"{len(PADUFES20_df)} images")
print("\nClass Distribution (absolute):")
print(class_distribution)
print("\nClass Distribution (relative %):")
print(class_distribution_relative)

PADUFES20_df

# Create Custom Dataframes

In [None]:
import numpy as np

#Dermoscopic
#BCC Dermoscopic
remove_n = 2478
bcc_df_dermoscopic = ISIC2019_df[ISIC2019_df['label'] == 'BCC']
drop_indices = np.random.choice(bcc_df_dermoscopic.index, remove_n, replace=False)
bcc_df = bcc_df_dermoscopic.drop(drop_indices)

df_train_dermoscopic_bcc = bcc_df.sample(frac=0.8,random_state=200)
df_test_dermoscopic_bcc = bcc_df.drop(df_train_dermoscopic_bcc.index)

#SCC Dermoscopic
remove_n = 436
scc_df_dermoscopic = ISIC2019_df[ISIC2019_df['label'] == 'SCC']
drop_indices = np.random.choice(scc_df_dermoscopic.index, remove_n, replace=False)
scc_df = scc_df_dermoscopic.drop(drop_indices)

df_train_dermoscopic_scc = scc_df.sample(frac=0.8,random_state=200)
df_test_dermoscopic_scc = scc_df.drop(df_train_dermoscopic_scc.index)

df_train_dermoscopic = pd.concat([df_train_dermoscopic_bcc, df_train_dermoscopic_scc], ignore_index=True)
df_train_dermoscopic = df_train_dermoscopic.sample(frac=1, random_state=42).reset_index(drop=True)

df_test_dermoscopic = pd.concat([df_test_dermoscopic_bcc, df_test_dermoscopic_scc], ignore_index=True)
df_test_dermoscopic = df_test_dermoscopic.sample(frac=1, random_state=42).reset_index(drop=True)

#Clinical
#BCC Clinical
bcc_df_clinical = PADUFES20_df[PADUFES20_df['label'] == 'BCC']

df_train_clinical_bcc = bcc_df_clinical.sample(frac=0.8,random_state=200)
df_test_clinical_bcc = bcc_df_clinical.drop(df_train_clinical_bcc.index)

#SCC Clinical
scc_df_clinical = PADUFES20_df[PADUFES20_df['label'] == 'SCC']

df_train_clinical_scc = scc_df_clinical.sample(frac=0.8,random_state=200)
df_test_clinical_scc = scc_df_clinical.drop(df_train_clinical_scc.index)

df_train_clinical = pd.concat([df_train_clinical_bcc, df_train_clinical_scc], ignore_index=True)
df_train_clinical = df_train_clinical.sample(frac=1, random_state=42).reset_index(drop=True)

df_test_clinical = pd.concat([df_test_clinical_bcc, df_test_clinical_scc], ignore_index=True)
df_test_clinical = df_test_clinical.sample(frac=1, random_state=42).reset_index(drop=True)

#Mixed - Train
#BCC Mixed
remove_n = 338
bcc_df_mixed_1 = df_train_dermoscopic_bcc
drop_indices = np.random.choice(df_train_dermoscopic_bcc.index, remove_n, replace=False)
bcc_df_mixed_1 = bcc_df_mixed_1.drop(drop_indices)

bcc_df_mixed_2 = df_train_clinical_bcc
drop_indices = np.random.choice(df_train_clinical_bcc.index, remove_n, replace=False)
bcc_df_mixed_2 = bcc_df_mixed_2.drop(drop_indices)

df_train_mixed_bcc = pd.concat([bcc_df_mixed_1, bcc_df_mixed_2], ignore_index=True)
df_train_mixed_bcc = df_train_mixed_bcc.sample(frac=1, random_state=42).reset_index(drop=True)

#SCC Mixed
remove_n = 77
scc_df_mixed_1 = df_train_dermoscopic_scc
drop_indices = np.random.choice(df_train_dermoscopic_scc.index, remove_n, replace=False)
scc_df_mixed_1 = scc_df_mixed_1.drop(drop_indices)

scc_df_mixed_2 = df_train_clinical_scc
drop_indices = np.random.choice(df_train_clinical_scc.index, remove_n, replace=False)
scc_df_mixed_2 = scc_df_mixed_2.drop(drop_indices)

df_train_mixed_scc = pd.concat([scc_df_mixed_1, scc_df_mixed_2], ignore_index=True)
df_train_mixed_scc = df_train_mixed_scc.sample(frac=1, random_state=42).reset_index(drop=True)

df_train_mixed = pd.concat([df_train_mixed_bcc, df_train_mixed_scc], ignore_index=True)
df_train_mixed = df_train_mixed.sample(frac=1, random_state=42).reset_index(drop=True)

#Mixed - Test
#BCC Mixed
remove_n = 84
bcc_df_mixed_1_test = df_test_dermoscopic_bcc
drop_indices = np.random.choice(df_test_dermoscopic_bcc.index, remove_n, replace=False)
bcc_df_mixed_1_test = bcc_df_mixed_1_test.drop(drop_indices)

remove_n = 85
bcc_df_mixed_2_test = df_test_clinical_bcc
drop_indices = np.random.choice(df_test_clinical_bcc.index, remove_n, replace=False)
bcc_df_mixed_2_test = bcc_df_mixed_2_test.drop(drop_indices)

df_test_mixed_bcc = pd.concat([bcc_df_mixed_1_test, bcc_df_mixed_2_test], ignore_index=True)
df_test_mixed_bcc = df_test_mixed_bcc.sample(frac=1, random_state=42).reset_index(drop=True)

#SCC Mixed
remove_n = 19
scc_df_mixed_1_test = df_test_dermoscopic_scc
drop_indices = np.random.choice(df_test_dermoscopic_scc.index, remove_n, replace=False)
scc_df_mixed_1_test = scc_df_mixed_1_test.drop(drop_indices)

scc_df_mixed_2_test = df_test_clinical_scc
drop_indices = np.random.choice(df_test_clinical_scc.index, remove_n, replace=False)
scc_df_mixed_2_test = scc_df_mixed_2_test.drop(drop_indices)

df_test_mixed_scc = pd.concat([scc_df_mixed_1_test, scc_df_mixed_2_test], ignore_index=True)
df_test_mixed_scc = df_test_mixed_scc.sample(frac=1, random_state=42).reset_index(drop=True)

df_test_mixed = pd.concat([df_test_mixed_bcc, df_test_mixed_scc], ignore_index=True)
df_test_mixed = df_test_mixed.sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
#Dataframe Dermoscopic Train Set
print("Dermoscopic Train Set ")
print(f"Dataset Größe: {len(df_train_dermoscopic)}")
print(df_train_dermoscopic['label'].value_counts())
#Dataframe Dermoscopic Test Set
print("Dermoscopic Test Set ")
print(f"Dataset Größe: {len(df_test_dermoscopic)}")
print(df_test_dermoscopic['label'].value_counts())
#Dataframe Clinical Train Set
print("Clinical Train Set ")
print(f"Dataset Größe: {len(df_train_clinical)}")
print(df_train_clinical['label'].value_counts())
#Dataframe Clinical Test Set
print("Clinical Test Set ")
print(f"Dataset Größe: {len(df_test_clinical)}")
print(df_test_clinical['label'].value_counts())
#Dataframe Mixed Train Set
print("Mixed Train Set ")
print(f"Dataset Größe: {len(df_train_mixed)}")
print(df_train_mixed['label'].value_counts())
#Dataframe Mixed Test Set
print("Mixed Test Set ")
print(f"Dataset Größe: {len(df_test_mixed)}")
print(df_test_mixed['label'].value_counts())

In [None]:
from datasets import Dataset
from PIL import Image
from datasets import Features, ClassLabel, Image as ImageFeature

#Training Sets - Will be later split into train and val set
df_dermoscopic = df_train_dermoscopic.copy()
df_dermoscopic = df_dermoscopic.rename(columns={'image_path': 'image'})
df_dermoscopic['label'] = df_dermoscopic['label'].apply(lambda x: 1 if x == 'BCC' else 0)

df_clinical = df_train_clinical.copy()
df_clinical = df_clinical.rename(columns={'image_path': 'image'})
df_clinical['label'] = df_clinical['label'].apply(lambda x: 1 if x == 'BCC' else 0)

df_mixed = df_train_mixed.copy()
df_mixed = df_mixed.rename(columns={'image_path': 'image'})
df_mixed['label'] = df_mixed['label'].apply(lambda x: 1 if x == 'BCC' else 0)

#Test Sets
df_dermoscopic_test = df_test_dermoscopic.copy()
df_dermoscopic_test = df_dermoscopic_test.rename(columns={'image_path': 'image'})
df_dermoscopic_test['label'] = df_dermoscopic_test['label'].apply(lambda x: 1 if x == 'BCC' else 0)

df_clinical_test = df_test_clinical.copy()
df_clinical_test = df_clinical_test.rename(columns={'image_path': 'image'})
df_clinical_test['label'] = df_clinical_test['label'].apply(lambda x: 1 if x == 'BCC' else 0)

df_mixed_test = df_test_mixed.copy()
df_mixed_test = df_mixed_test.rename(columns={'image_path': 'image'})
df_mixed_test['label'] = df_mixed_test['label'].apply(lambda x: 1 if x == 'BCC' else 0)

ds_dermoscopic_train = Dataset.from_pandas(df_dermoscopic)
ds_clinical_train = Dataset.from_pandas(df_clinical)
ds_mixed_train = Dataset.from_pandas(df_mixed)

ds_dermoscopic_test = Dataset.from_pandas(df_dermoscopic_test)
ds_clinical_test = Dataset.from_pandas(df_clinical_test)
ds_mixed_test = Dataset.from_pandas(df_mixed_test)

# Cast columns as features
ds_dermoscopic_train = ds_dermoscopic_train.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_dermoscopic_train = ds_dermoscopic_train.cast_column("image", ImageFeature())

ds_clinical_train = ds_clinical_train.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_clinical_train = ds_clinical_train.cast_column("image", ImageFeature())

ds_mixed_train = ds_mixed_train.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_mixed_train = ds_mixed_train.cast_column("image", ImageFeature())

ds_dermoscopic_test = ds_dermoscopic_test.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_dermoscopic_test = ds_dermoscopic_test.cast_column("image", ImageFeature())

ds_clinical_test = ds_clinical_test.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_clinical_test = ds_clinical_test.cast_column("image", ImageFeature())

ds_mixed_test = ds_mixed_test.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_mixed_test = ds_mixed_test.cast_column("image", ImageFeature())

# Reused Functions

In [None]:
labels = ds_dermoscopic_train.features['label'].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label

num_labels = len(id2label)

labeldict = ds_dermoscopic_train['label']

In [None]:
#Plot Trainning, Validation Loss and Accuracy

import matplotlib.pyplot as plt

def plot_training_metrics(history):
    # Plot Training and Validation Loss
    plt.figure(figsize=(10, 6))
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['eval_loss'], label='Validation Loss')
    plt.plot([0, 1], [0, 1], alpha=0)
    plt.xticks(plt.xticks()[0][1::2]);
    #plt.xlim(0, 30)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.show()

    # Plot Metric (e.g., Accuracy or other metric)
    plt.figure(figsize=(10, 6))
    plt.plot(history['eval_accuracy'], label='Accuracy')
    plt.plot(history['eval_precision'], label='Precision')
    plt.plot(history['eval_recall'], label='Recall')
    plt.plot(history['eval_f1'], label='F1 Score')
    plt.plot([0.6, 1], [1, 1], alpha=0)
    plt.xticks(plt.xticks()[0][1::2]);
    #plt.xlim([0, 30])
    #plt.ylim([0.6, 1])
    plt.xlabel('Epochs')
    #plt.ylabel('Accuracy')
    plt.title('Accurcay, Precision, Recall, F1 Progress')
    plt.legend()
    plt.show()

In [None]:
#Plot ConfusionMatrix

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

def plot_confusion_matrix(outputs):
  y_true = outputs.label_ids
  y_pred = np.argmax(outputs.predictions, axis=1)

  labels = train_dataset.features['label'].names
  cm = confusion_matrix(y_true, y_pred)
  disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
  disp.plot(xticks_rotation=45)

In [None]:
#Plot Roc curve

from scipy.special import softmax
from torch import nn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc

def plot_roc_curve(outputs):
    probabilities = softmax(outputs.predictions, axis=1)
    probabilities = probabilities[:, 1]

    y_pred_proba = probabilities

    # Calculate ROC curve
    fpr, tpr, thresholds = roc_curve(outputs.label_ids, y_pred_proba)
    roc_auc = auc(fpr, tpr)
    # Plot the ROC curve
    plt.figure()
    plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], 'k--', label='No Skill')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.show()

# Training CNN (Dermoscopic)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/resnet50.tv_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.4
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
import matplotlib.pyplot as plt

example = train_ds_clinical[29]
original_image = example['image']
augmented_image = example['pixel_values']

fig, axes = plt.subplots(1, 2, figsize=(10, 5))

axes[0].imshow(original_image)
axes[0].set_title("Original Image")
axes[0].axis('off')

axes[1].imshow(augmented_image)
axes[1].set_title("Augmented Image")
axes[1].axis('off')

plt.tight_layout()
plt.show()

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4 #1e-5
batch_size = 64 #32
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts",
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Dermoscopic

In [None]:
train_dataset = train_ds_dermoscopic
eval_dataset = val_ds_dermoscopic

trainer_cnn_dermoscopic = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

In [None]:
trainer_cnn_dermoscopic.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_cnn_dermoscopic.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_cnn_d_d = trainer_cnn_dermoscopic.predict(test_ds_dermoscopic)
print(outputs_cnn_d_d.metrics)
plot_confusion_matrix(outputs_cnn_d_d)
plot_roc_curve(outputs_cnn_d_d)

### Testset: Clinical

In [None]:
outputs_cnn_d_c = trainer_cnn_dermoscopic.predict(test_ds_clinical)
print(outputs_cnn_d_c.metrics)
plot_confusion_matrix(outputs_cnn_d_c)
plot_roc_curve(outputs_cnn_d_c)

### Testset: Mixed

In [None]:
outputs_cnn_d_m = trainer_cnn_dermoscopic.predict(test_ds_mixed)
print(outputs_cnn_d_m.metrics)
plot_confusion_matrix(outputs_cnn_d_m)
plot_roc_curve(outputs_cnn_d_m)

In [None]:
shutil.rmtree('/content/resnet50.tv_in1k-finetuned')

# Training CNN (Clinical)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/resnet50.tv_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.4
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts",
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Clinical

In [None]:
train_dataset = train_ds_clinical
eval_dataset = val_ds_clinical

trainer_cnn_clinical = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

In [None]:
trainer_cnn_clinical.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_cnn_clinical.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_cnn_c_d = trainer_cnn_clinical.predict(test_ds_dermoscopic)
print(outputs_cnn_c_d.metrics)
plot_confusion_matrix(outputs_cnn_c_d)
plot_roc_curve(outputs_cnn_c_d)

### Testset: Clinical

In [None]:
outputs_cnn_c_c = trainer_cnn_clinical.predict(test_ds_clinical)
print(outputs_cnn_c_c.metrics)
plot_confusion_matrix(outputs_cnn_c_c)
plot_roc_curve(outputs_cnn_c_c)

### Testset: Mixed

In [None]:
outputs_cnn_c_m = trainer_cnn_clinical.predict(test_ds_mixed)
print(outputs_cnn_c_m.metrics)
plot_confusion_matrix(outputs_cnn_c_m)
plot_roc_curve(outputs_cnn_c_m)

In [None]:
shutil.rmtree('/content/resnet50.tv_in1k-finetuned')

# Training CNN (Mixed)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/resnet50.tv_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.4
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4 #1e-5
batch_size = 64 #32
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts",
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Mixed

In [None]:
train_dataset = train_ds_mixed
eval_dataset = val_ds_mixed

trainer_cnn_mixed = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

In [None]:
trainer_cnn_mixed.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_cnn_mixed.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_cnn_m_d = trainer_cnn_mixed.predict(test_ds_dermoscopic)
print(outputs_cnn_m_d.metrics)
plot_confusion_matrix(outputs_cnn_m_d)
plot_roc_curve(outputs_cnn_m_d)

### Testset: Clinical

In [None]:
outputs_cnn_m_c = trainer_cnn_mixed.predict(test_ds_clinical)
print(outputs_cnn_m_c.metrics)
plot_confusion_matrix(outputs_cnn_m_c)
plot_roc_curve(outputs_cnn_m_c)

### Testset: Mixed

In [None]:
outputs_cnn_m_m = trainer_cnn_mixed.predict(test_ds_mixed)
print(outputs_cnn_m_m.metrics)
plot_confusion_matrix(outputs_cnn_m_m)
plot_roc_curve(outputs_cnn_m_m)

In [None]:
shutil.rmtree('/content/resnet50.tv_in1k-finetuned')

# Training ViT (Dermoscopic)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/vit_base_patch16_224.augreg_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.4
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts", #cosine_with_restarts cosine
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Dermoscopic

In [None]:
train_dataset = train_ds_dermoscopic
eval_dataset = val_ds_dermoscopic

trainer_vit_dermoscopic = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

trainer_vit_dermoscopic.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_vit_dermoscopic.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_vit_d_d = trainer_vit_dermoscopic.predict(test_ds_dermoscopic)
print(outputs_vit_d_d.metrics)
plot_confusion_matrix(outputs_vit_d_d)
plot_roc_curve(outputs_vit_d_d)

### Testset: Clinical

In [None]:
outputs_vit_d_c = trainer_vit_dermoscopic.predict(test_ds_clinical)
print(outputs_vit_d_c.metrics)
plot_confusion_matrix(outputs_vit_d_c)
plot_roc_curve(outputs_vit_d_c)

### Testset: Mixed

In [None]:
outputs_vit_d_m = trainer_vit_dermoscopic.predict(test_ds_mixed)
print(outputs_vit_d_m.metrics)
plot_confusion_matrix(outputs_vit_d_m)
plot_roc_curve(outputs_vit_d_m)

In [None]:
shutil.rmtree('/content/vit_base_patch16_224.augreg_in1k-finetuned')

# Training ViT (Clinical)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/vit_base_patch16_224.augreg_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.4
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts", #cosine_with_restarts cosine
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Clinical

In [None]:
train_dataset = train_ds_clinical
eval_dataset = val_ds_clinical

trainer_vit_clinical = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

trainer_vit_clinical.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_vit_clinical.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_vit_c_d = trainer_vit_clinical.predict(test_ds_dermoscopic)
print(outputs_vit_c_d.metrics)
plot_confusion_matrix(outputs_vit_c_d)
plot_roc_curve(outputs_vit_c_d)

### Testset: Clinical

In [None]:
outputs_vit_c_c = trainer_vit_clinical.predict(test_ds_clinical)
print(outputs_vit_c_c.metrics)
plot_confusion_matrix(outputs_vit_c_c)
plot_roc_curve(outputs_vit_c_c)

### Testset: Mixed

In [None]:
outputs_vit_c_m = trainer_vit_clinical.predict(test_ds_mixed)
print(outputs_vit_c_m.metrics)
plot_confusion_matrix(outputs_vit_c_m)
plot_roc_curve(outputs_vit_c_m)

In [None]:
shutil.rmtree('/content/vit_base_patch16_224.augreg_in1k-finetuned')

#Training ViT (Mixed)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/vit_base_patch16_224.augreg_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.4
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts", #cosine_with_restarts cosine
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Mixed

In [None]:
train_dataset = train_ds_mixed
eval_dataset = val_ds_mixed

trainer_vit_mixed = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

trainer_vit_mixed.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_vit_mixed.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_vit_m_d = trainer_vit_mixed.predict(test_ds_dermoscopic)
print(outputs_vit_m_d.metrics)
plot_confusion_matrix(outputs_vit_m_d)
plot_roc_curve(outputs_vit_m_d)
outputs_vit_m_d.metrics

### Testset: Clinical

In [None]:
outputs_vit_m_c = trainer_vit_mixed.predict(test_ds_clinical)
print(outputs_vit_m_c.metrics)
plot_confusion_matrix(outputs_vit_m_c)
plot_roc_curve(outputs_vit_m_c)
outputs_vit_m_c.metrics

### Testset: Mixed

In [None]:
outputs_vit_m_m = trainer_vit_mixed.predict(test_ds_mixed)
print(outputs_vit_m_m.metrics)
plot_confusion_matrix(outputs_vit_m_m)
plot_roc_curve(outputs_vit_m_m)
outputs_vit_m_m.metrics

In [None]:
shutil.rmtree('/content/vit_base_patch16_224.augreg_in1k-finetuned')

# Training: Swin (Dermoscopic)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/swinv2_base_window8_256.ms_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.5
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts", #cosine_with_restarts cosine
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Dermoscopic

In [None]:
train_dataset = train_ds_dermoscopic
eval_dataset = val_ds_dermoscopic

trainer_swin_dermoscopic = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

trainer_swin_dermoscopic.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_swin_dermoscopic.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_swin_d_d = trainer_swin_dermoscopic.predict(test_ds_dermoscopic)
print(outputs_swin_d_d.metrics)
plot_confusion_matrix(outputs_swin_d_d)
plot_roc_curve(outputs_swin_d_d)

### Testset: Clinical

In [None]:
outputs_swin_d_c = trainer_swin_dermoscopic.predict(test_ds_clinical)
print(outputs_swin_d_c.metrics)
plot_confusion_matrix(outputs_swin_d_c)
plot_roc_curve(outputs_swin_d_c)

### Testset: Mixed

In [None]:
outputs_swin_d_m = trainer_swin_dermoscopic.predict(test_ds_mixed)
print(outputs_swin_d_m.metrics)
plot_confusion_matrix(outputs_swin_d_m)
plot_roc_curve(outputs_swin_d_m)

In [None]:
shutil.rmtree('/content/swinv2_base_window8_256.ms_in1k-finetuned')

# Training: Swin (Clinical)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/swinv2_base_window8_256.ms_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.5
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts", #cosine_with_restarts cosine
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Clinical

In [None]:
train_dataset = train_ds_clinical
eval_dataset = val_ds_clinical

trainer_swin_clinical = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

trainer_swin_clinical.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_swin_clinical.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_swin_c_d = trainer_swin_clinical.predict(test_ds_dermoscopic)
outputs_swin_c_d.metrics
plot_confusion_matrix(outputs_swin_c_d)
plot_roc_curve(outputs_swin_c_d)

### Testset: Clinical

In [None]:
outputs_swin_c_c = trainer_swin_clinical.predict(test_ds_clinical)
print(outputs_swin_c_c.metrics)
plot_confusion_matrix(outputs_swin_c_c)
plot_roc_curve(outputs_swin_c_c)

### Testset: Mixed

In [None]:
outputs_swin_c_m = trainer_swin_clinical.predict(test_ds_mixed)
print(outputs_swin_c_m.metrics)
plot_confusion_matrix(outputs_swin_c_m)
plot_roc_curve(outputs_swin_c_m)

In [None]:
shutil.rmtree('/content/swinv2_base_window8_256.ms_in1k-finetuned')

# Training: Swin (Mixed)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/swinv2_base_window8_256.ms_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.5
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts", #cosine_with_restarts cosine
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Mixed

In [None]:
from datasets import Dataset
from PIL import Image
from datasets import Features, ClassLabel, Image as ImageFeature

#Training Sets - Will be later split into train and val set
df_dermoscopic = df_train_dermoscopic.copy()
df_dermoscopic = df_dermoscopic.rename(columns={'image_path': 'image'})
df_dermoscopic['label'] = df_dermoscopic['label'].apply(lambda x: 1 if x == 'BCC' else 0)

df_clinical = df_train_clinical.copy()
df_clinical = df_clinical.rename(columns={'image_path': 'image'})
df_clinical['label'] = df_clinical['label'].apply(lambda x: 1 if x == 'BCC' else 0)

df_mixed = df_train_mixed.copy()
df_mixed = df_mixed.rename(columns={'image_path': 'image'})
df_mixed['label'] = df_mixed['label'].apply(lambda x: 1 if x == 'BCC' else 0)

#Test Sets
df_dermoscopic_test = df_test_dermoscopic.copy()
df_dermoscopic_test = df_dermoscopic_test.rename(columns={'image_path': 'image'})
df_dermoscopic_test['label'] = df_dermoscopic_test['label'].apply(lambda x: 1 if x == 'BCC' else 0)

df_clinical_test = df_test_clinical.copy()
df_clinical_test = df_clinical_test.rename(columns={'image_path': 'image'})
df_clinical_test['label'] = df_clinical_test['label'].apply(lambda x: 1 if x == 'BCC' else 0)

df_mixed_test = df_test_mixed.copy()
df_mixed_test = df_mixed_test.rename(columns={'image_path': 'image'})
df_mixed_test['label'] = df_mixed_test['label'].apply(lambda x: 1 if x == 'BCC' else 0)

ds_dermoscopic_train = Dataset.from_pandas(df_dermoscopic)
ds_clinical_train = Dataset.from_pandas(df_clinical)
ds_mixed_train = Dataset.from_pandas(df_mixed)

ds_dermoscopic_test = Dataset.from_pandas(df_dermoscopic_test)
ds_clinical_test = Dataset.from_pandas(df_clinical_test)
ds_mixed_test = Dataset.from_pandas(df_mixed_test)

# Cast columns as features
ds_dermoscopic_train = ds_dermoscopic_train.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_dermoscopic_train = ds_dermoscopic_train.cast_column("image", ImageFeature())

ds_clinical_train = ds_clinical_train.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_clinical_train = ds_clinical_train.cast_column("image", ImageFeature())

ds_mixed_train = ds_mixed_train.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_mixed_train = ds_mixed_train.cast_column("image", ImageFeature())

ds_dermoscopic_test = ds_dermoscopic_test.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_dermoscopic_test = ds_dermoscopic_test.cast_column("image", ImageFeature())

ds_clinical_test = ds_clinical_test.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_clinical_test = ds_clinical_test.cast_column("image", ImageFeature())

ds_mixed_test = ds_mixed_test.cast_column("label", ClassLabel(num_classes=2, names=['SCC', 'BCC']))
ds_mixed_test = ds_mixed_test.cast_column("image", ImageFeature())

In [None]:
train_dataset = train_ds_mixed
eval_dataset = val_ds_mixed

trainer_swin_mixed = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

trainer_swin_mixed.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_swin_mixed.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_swin_m_d = trainer_swin_mixed.predict(test_ds_dermoscopic)
print(outputs_swin_m_d.metrics)
plot_confusion_matrix(outputs_swin_m_d)
plot_roc_curve(outputs_swin_m_d)
outputs_swin_m_d.metrics

### Testset: Clinical

In [None]:
outputs_swin_m_c = trainer_swin_mixed.predict(test_ds_clinical)
print(outputs_swin_m_c.metrics)
plot_confusion_matrix(outputs_swin_m_c)
plot_roc_curve(outputs_swin_m_c)
outputs_swin_m_c.metrics

### Testset: Mixed

In [None]:
outputs_swin_m_m = trainer_swin_mixed.predict(test_ds_mixed)
print(outputs_swin_m_m.metrics)
plot_confusion_matrix(outputs_swin_m_m)
plot_roc_curve(outputs_swin_m_m)
outputs_swin_m_m.metrics

In [None]:
shutil.rmtree('/content/swinv2_base_window8_256.ms_in1k-finetuned')

#Training: DeiT (Dermoscopic)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/deit3_base_patch16_224.fb_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.4
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts", #cosine_with_restarts cosine
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Dermoscopic

In [None]:
train_dataset = train_ds_dermoscopic
eval_dataset = val_ds_dermoscopic

trainer_deit_dermoscopic = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

trainer_deit_dermoscopic.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_deit_dermoscopic.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_deit_d_d = trainer_deit_dermoscopic.predict(test_ds_dermoscopic)
print(outputs_deit_d_d.metrics)
plot_confusion_matrix(outputs_deit_d_d)
plot_roc_curve(outputs_deit_d_d)

### Testset: Clinical

In [None]:
outputs_deit_d_c = trainer_deit_dermoscopic.predict(test_ds_clinical)
print(outputs_deit_d_c.metrics)
plot_confusion_matrix(outputs_deit_d_c)
plot_roc_curve(outputs_deit_d_c)

### Testset: Mixed

In [None]:
outputs_deit_d_m = trainer_deit_dermoscopic.predict(test_ds_mixed)
print(outputs_deit_d_m.metrics)
plot_confusion_matrix(outputs_deit_d_m)
plot_roc_curve(outputs_deit_d_m)

In [None]:
shutil.rmtree('/content/deit3_base_patch16_224.fb_in1k-finetuned')

# Training: DeiT (Clinical)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/deit3_base_patch16_224.fb_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.4
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts", #cosine_with_restarts cosine
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Clinical

In [None]:
train_dataset = train_ds_clinical
eval_dataset = val_ds_clinical

trainer_deit_clinical = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

trainer_deit_clinical.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_deit_clinical.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_deit_c_d = trainer_deit_clinical.predict(test_ds_dermoscopic)
print(outputs_deit_c_d.metrics)
plot_confusion_matrix(outputs_deit_c_d)
plot_roc_curve(outputs_deit_c_d)
outputs_deit_c_d.metrics

### Testset: Clinical

In [None]:
outputs_deit_c_c = trainer_deit_clinical.predict(test_ds_clinical)
print(outputs_deit_c_c.metrics)
plot_confusion_matrix(outputs_deit_c_c)
plot_roc_curve(outputs_deit_c_c)
outputs_deit_c_c.metrics

### Testset: Mixed

In [None]:
outputs_deit_c_m = trainer_deit_clinical.predict(test_ds_mixed)
print(outputs_deit_c_m.metrics)
plot_confusion_matrix(outputs_deit_c_m)
plot_roc_curve(outputs_deit_c_m)
outputs_deit_c_m.metrics

In [None]:
shutil.rmtree('/content/deit3_base_patch16_224.fb_in1k-finetuned')

# Training: DeiT (Mixed)

In [None]:
from transformers import AutoModelForImageClassification, AutoImageProcessor

model_checkpoint = "timm/deit3_base_patch16_224.fb_in1k"

image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
image_processor = image_processor.data_config
image_processor

In [None]:
import cv2
import albumentations as A
import numpy as np

train_transforms = A.Compose([
      # Geometric Transforms
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.ShiftScaleRotate(
        shift_limit=0.05, scale_limit=0.05, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.4
    ),

    # Photometric Adjustments
    A.RandomBrightnessContrast(
        brightness_limit=0.2, contrast_limit=0.4, p=0.5
    ),
    A.HueSaturationValue(
        hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=30, p=0.4
    ),

    # Slight blur for optical variance
    A.OneOf([
        A.GaussianBlur(blur_limit=3, p=0.5),
        A.MedianBlur(blur_limit=3, p=0.5),
    ], p=0.3),

    # Minor elastic distortion to simulate skin curvature
    A.OneOf([
        A.ElasticTransform(alpha=0.5, sigma=20, p=0.5),
        A.GridDistortion(num_steps=5, distort_limit=0.5, p=0.5),
    ], p=0.3),

    # Slight shadow/illumination simulation
    A.RandomShadow(shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 3), shadow_dimension=3, shadow_intensity_range=(0.05, 0.1), p=0.2),
    A.RandomSunFlare(src_radius=2, flare_roi=(0, 0, 0.2, 0.2), num_flare_circles_range=(1, 2), p=0.2),

    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

val_transforms = A.Compose([
    A.Resize(height=image_processor['input_size'][1], width=image_processor['input_size'][2]),
    A.Normalize(
        mean=(image_processor['mean'][0], image_processor['mean'][1], image_processor['mean'][2]),
        std=(image_processor['std'][0], image_processor['std'][1], image_processor['std'][2]),
        p=1.0
    ),
])

def preprocess_train(examples):
    examples["pixel_values"] = [
        #train_transforms(image=np.array(image))["image"] for image in examples["image"]
        train_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

def preprocess_val(examples):
    examples["pixel_values"] = [
        #val_transforms(image=np.array(image))["image"] for image in examples["image"]
        val_transforms(image=np.array(img.convert("RGB")))["image"] for img in examples["image"]
    ]

    return examples

# split up training into training + validation
splits = ds_dermoscopic_train.train_test_split(test_size=0.1)
train_ds_dermoscopic = splits['train']
val_ds_dermoscopic = splits['test']

splits = ds_clinical_train.train_test_split(test_size=0.1)
train_ds_clinical = splits['train']
val_ds_clinical = splits['test']

splits = ds_mixed_train.train_test_split(test_size=0.1)
train_ds_mixed = splits['train']
val_ds_mixed = splits['test']

test_ds_dermoscopic = ds_dermoscopic_test
test_ds_clinical = ds_clinical_test
test_ds_mixed = ds_mixed_test

train_ds_dermoscopic.set_transform(preprocess_train)
val_ds_dermoscopic.set_transform(preprocess_val)
test_ds_dermoscopic.set_transform(preprocess_val)

train_ds_clinical.set_transform(preprocess_train)
val_ds_clinical.set_transform(preprocess_val)
test_ds_clinical.set_transform(preprocess_val)

train_ds_mixed.set_transform(preprocess_train)
val_ds_mixed.set_transform(preprocess_val)
test_ds_mixed.set_transform(preprocess_val)

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels=num_labels,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True
)

In [None]:
import numpy as np
import torch
from transformers import DefaultDataCollator
import evaluate
from transformers import EarlyStoppingCallback

metric1 = evaluate.load("accuracy")
metric2 = evaluate.load("precision")
metric3 = evaluate.load("recall")
metric4 = evaluate.load("f1")
metric5 = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)

    accuracy = metric1.compute(predictions=predictions, references=eval_pred.label_ids)["accuracy"]
    precision = metric2.compute(predictions=predictions, references=eval_pred.label_ids)["precision"]
    recall = metric3.compute(predictions=predictions, references=eval_pred.label_ids)["recall"]
    f1 = metric4.compute(predictions=predictions, references=eval_pred.label_ids)["f1"]

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

model_name = model_checkpoint.split("/")[-1]
output_dir = f"{model_name}-finetuned"

learning_rate = 1e-4
batch_size = 64
num_epochs = 50

def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])

    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:
from sklearn.utils.class_weight import compute_class_weight

classweight = compute_class_weight(class_weight="balanced", classes=np.unique(labeldict), y=labeldict)
classweight = torch.tensor(classweight, dtype=torch.float)

training_args = TrainingArguments(
    output_dir=output_dir,
    remove_unused_columns=False,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    logging_strategy = "epoch",
    learning_rate=learning_rate,
    lr_scheduler_type = "cosine_with_restarts", #cosine_with_restarts cosine
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    push_to_hub=False,
    report_to="none",
)

class CustomTrainer(Trainer):
    def compute_loss_func(self, model, inputs, num_items_in_batch=None, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        loss_fct = nn.BCELoss(weight=classweight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

# Trainset: Mixed

In [None]:
train_dataset = train_ds_mixed
eval_dataset = val_ds_mixed

trainer_deit_mixed = CustomTrainer(
    model,
    args = training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
    callbacks=[EarlyStoppingCallback(10)],
)

trainer_deit_mixed.train()

In [None]:
from pandas import DataFrame

df = pd.DataFrame(trainer_deit_mixed.state.log_history)
df_history = pd.concat([df['loss'][df['loss'] > 0].reset_index(drop=True), df['eval_loss'][df['eval_loss'] > 0].reset_index(drop=True), df['eval_accuracy'][df['eval_accuracy'] > 0].reset_index(drop=True), df['eval_precision'][df['eval_precision'] > 0].reset_index(drop=True), df['eval_recall'][df['eval_recall'] > 0].reset_index(drop=True), df['eval_f1'][df['eval_f1'] > 0].reset_index(drop=True)], axis=1, ignore_index=True)
df_history.columns = ['loss', 'eval_loss', 'eval_accuracy', 'eval_precision', 'eval_recall', 'eval_f1']
df_history.index += 1

plot_training_metrics(df_history)

### Testset: Dermoscopic

In [None]:
outputs_deit_m_d = trainer_deit_mixed.predict(test_ds_dermoscopic)
print(outputs_deit_m_d.metrics)
plot_confusion_matrix(outputs_deit_m_d)
plot_roc_curve(outputs_deit_m_d)
outputs_deit_m_d.metrics

### Testset: Clinical

In [None]:
outputs_deit_m_c = trainer_deit_mixed.predict(test_ds_clinical)
print(outputs_deit_m_c.metrics)
plot_confusion_matrix(outputs_deit_m_c)
plot_roc_curve(outputs_deit_m_c)
outputs_deit_m_c.metrics

### Testset: Mixed

In [None]:
outputs_deit_m_m = trainer_deit_mixed.predict(test_ds_mixed)
print(outputs_deit_m_m.metrics)
plot_confusion_matrix(outputs_deit_m_m)
plot_roc_curve(outputs_deit_m_m)
outputs_deit_m_m.metrics

In [None]:
shutil.rmtree('/content/deit3_base_patch16_224.fb_in1k-finetuned')