In [2]:
import cv2
import glob
import numpy as np
import tensorflow as tf
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow import keras
from keras.applications import ResNet50
from keras.applications.resnet import preprocess_input
from keras import layers, models
from sklearn.metrics import (
    roc_curve,
    classification_report,
    confusion_matrix,
    accuracy_score,
)

In [7]:
def load_and_preprocess(image_paths, labels, img_size=(224, 224)):
    X, y = [], []
    for p, label in zip(image_paths, labels):
        img = cv2.imread(p)
        if img is None:
            continue
        img = cv2.resize(img, img_size)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = preprocess_input(img.astype("float32"))
        X.append(img)
        y.append(label)
    return np.array(X), np.array(y)

In [8]:
# ─── Load metadata and split ───────────────────────────────────────────────────
metadata = pd.read_csv("./HAM10000/HAM10000_metadata.csv")
image_dir_1 = "./HAM10000/HAM10000_images_part_1/"
image_dir_2 = "./HAM10000/HAM10000_images_part_2/"


def get_image_path(image_id):
    path = os.path.join(image_dir_1, image_id + ".jpg")
    if not os.path.exists(path):
        path = os.path.join(image_dir_2, image_id + ".jpg")
    return path


df = metadata[metadata["dx"].isin(["nv", "mel"])].copy()
df["label"] = df["dx"].map({"nv": 0, "mel": 1})
df["image_path"] = df["image_id"].apply(get_image_path)

# Undersampling nv Class
mel_df = df[df["label"] == 1]
nv_df = df[df["label"] == 0].sample(n=len(mel_df), random_state=42)

df = pd.concat([mel_df, nv_df]).sample(frac=1, random_state=42).reset_index(drop=True)

train_df, test_df = train_test_split(
    df, test_size=0.2, stratify=df["label"], random_state=42
)

X_train, y_train = load_and_preprocess(
    train_df["image_path"].tolist(), train_df["label"].tolist()
)
X_test, y_test = load_and_preprocess(
    test_df["image_path"].tolist(), test_df["label"].tolist()
)

In [None]:
# import shutil

# os.makedirs("test_images/mel", exist_ok=True)
# os.makedirs("test_images/nv", exist_ok=True)

# for _, row in test_df.iterrows():
#     dest_dir = "test_images/mel" if row["label"] == 1 else "test_images/nv"
#     shutil.copy(
#         row["image_path"], os.path.join(dest_dir, os.path.basename(row["image_path"]))
#     )

In [9]:
# ─── Balance via two ImageDataGenerator flows ─────────────────────────────────
batch_size = 32
half_bs = batch_size // 2

# split by class
y_train = y_train.flatten()
X_train_nv = X_train[y_train == 0]
y_train_nv = y_train[y_train == 0]
X_train_mel = X_train[y_train == 1]
y_train_mel = y_train[y_train == 1]

aug = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode="nearest",
)

gen_nv = aug.flow(X_train_nv, y_train_nv, batch_size=half_bs, shuffle=True)
gen_mel = aug.flow(X_train_mel, y_train_mel, batch_size=half_bs, shuffle=True)

In [5]:
# ─── Custom Sequence to interleave nv/mel ────────────────────────────────────
class BalancedSequence(keras.utils.Sequence):
    def __init__(self, gen0, gen1):
        self.gen0 = gen0
        self.gen1 = gen1
        # length = how many batches we can draw evenly
        self._len = min(len(gen0), len(gen1))

    def __len__(self):
        return self._len

    def __getitem__(self, idx):
        X0, y0 = self.gen0[idx]
        X1, y1 = self.gen1[idx]
        Xb = np.vstack([X0, X1])
        yb = np.concatenate([y0, y1])
        perm = np.random.permutation(len(yb))
        return Xb[perm], yb[perm]


train_seq = BalancedSequence(gen_nv, gen_mel)

In [4]:
# Focal Loss
import tensorflow.keras.backend as k


def binary_focal_loss(gamma=2.0, alpha=0.25):
    def loss(y_true, y_pred):
        epsilon = k.epsilon()
        y_pred = k.clip(y_pred, epsilon, 1.0 - epsilon)
        pt_1 = tf.where(k.equal(y_true, 1), y_pred, k.ones_like(y_pred))
        pt_0 = tf.where(k.equal(y_true, 0), y_pred, k.zeros_like(y_pred))
        return -k.mean(alpha * k.pow(1.0 - pt_1, gamma) * k.log(pt_1)) - k.mean(
            (1 - alpha) * k.pow(pt_0, gamma) * k.log(1.0 - pt_0)
        )

    return loss

In [10]:
# ─── Build & compile model ───────────────────────────────────────────────────
input_shape = (224, 224, 3)
base_model = ResNet50(
    include_top=False, weights="imagenet", input_shape=input_shape, pooling="avg"
)
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

model = models.Sequential(
    [
        base_model,
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(1, activation="sigmoid"),
    ]
)

class_weights = class_weight.compute_class_weight(
    "balanced", classes=np.unique(y_train), y=y_train
)
class_weights = dict(enumerate(class_weights))

model.compile(
    optimizer="adam",
    loss=binary_focal_loss(gamma=2.0, alpha=0.25),
    metrics=["accuracy", tf.keras.metrics.AUC(name="auc")],
)

In [11]:
# ─── Training ────────────────────────────────────────────────────────────────
callbacks = [
    keras.callbacks.ModelCheckpoint("resnet_model_nv_mel_ES.h5", save_best_only=True),
    keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
]

history = model.fit(
    train_seq,
    epochs=20,
    validation_data=(X_test[:500], y_test[:500]),
    callbacks=callbacks,
    class_weight=class_weights,
)

Epoch 1/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6647 - auc: 0.7804 - loss: 0.1253



[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 2s/step - accuracy: 0.6651 - auc: 0.7812 - loss: 0.1245 - val_accuracy: 0.5000 - val_auc: 0.5000 - val_loss: 1.9831
Epoch 2/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7506 - auc: 0.8978 - loss: 0.0438



[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 2s/step - accuracy: 0.7510 - auc: 0.8981 - loss: 0.0438 - val_accuracy: 0.7735 - val_auc: 0.8706 - val_loss: 0.1242
Epoch 3/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8059 - auc: 0.9405 - loss: 0.0342



[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 2s/step - accuracy: 0.8059 - auc: 0.9404 - loss: 0.0343 - val_accuracy: 0.8453 - val_auc: 0.9294 - val_loss: 0.0686
Epoch 4/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 2s/step - accuracy: 0.8263 - auc: 0.9473 - loss: 0.0327 - val_accuracy: 0.8341 - val_auc: 0.9225 - val_loss: 0.0758
Epoch 5/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 2s/step - accuracy: 0.8571 - auc: 0.9615 - loss: 0.0281 - val_accuracy: 0.8565 - val_auc: 0.9309 - val_loss: 0.0703
Epoch 6/20
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 2s/step - accuracy: 0.8563 - auc: 0.9666 - loss: 0.0267 - val_accuracy: 0.8453 - val_auc: 0.9305 - val_loss: 0.0824


In [5]:
model = keras.models.load_model(
    "resnet_model_nv_mel_ES.h5",
    custom_objects={"loss": binary_focal_loss(gamma=2.0, alpha=0.25)},
)



In [10]:
val_probs = model.predict(X_test).ravel()

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2s/step


In [11]:
# Threshold Tuning
fpr, tpr, thresholds = roc_curve(y_test, val_probs)
youden_j = tpr - fpr
best_idx = np.argmax(youden_j)
best_thresh = thresholds[best_idx]
print(f"Optimal sigmoid threshold = {best_thresh:.3f}")

Optimal sigmoid threshold = 0.482


In [12]:
# model.evaluate
test_loss, test_acc, test_auc = model.evaluate(X_test, y_test, verbose=0)

In [44]:
test_preds = (val_probs >= best_thresh).astype(int)

In [45]:
print(
    f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}, Test AUC: {test_auc:.4f}"
)

Test Loss: 0.0686, Test Accuracy: 0.8453, Test AUC: 0.9294


In [46]:
# Classification Report
print("Classification Report:")
print(classification_report(y_test, test_preds, target_names=["nv", "mel"]))

Classification Report:
              precision    recall  f1-score   support

          nv       0.92      0.78      0.84       223
         mel       0.81      0.93      0.86       223

    accuracy                           0.85       446
   macro avg       0.86      0.85      0.85       446
weighted avg       0.86      0.85      0.85       446



In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, test_preds, labels=[0, 1])
cm_df = pd.DataFrame(
    cm, index=["nv (true)", "mel (true)"], columns=["nv (pred)", "mel (pred)"]
)
print("Confusion Matrix:")
print(cm_df)

Confusion Matrix:
            nv (pred)  mel (pred)
nv (true)         173          50
mel (true)         16         207


In [48]:
# Accuracy Score
print(f"Accuracy Score: {accuracy_score(y_test, test_preds):.4f}")

Accuracy Score: 0.8520
