In [1]:
from google.colab import drive
# This error is often transient and can be resolved by retrying.
drive.mount('/content/drive')

# List files in your Drive
!ls -lh /content/drive/MyDrive

Mounted at /content/drive
total 1.5G
drwx------ 2 root root 4.0K Mar  5 19:37  Ants_Simulation
drwx------ 2 root root 4.0K Aug 16 04:41 'Colab Notebooks'
-rw------- 1 root root 1.3G Aug  7 09:59  images.hdf5
-rw------- 1 root root 246M Aug  7 09:57  subject_data.csv


In [2]:
import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/subject_data.csv")

  df = pd.read_csv("/content/drive/MyDrive/subject_data.csv")


In [21]:
# mobile_cnn_augmented_split.py
import os, io, random
from datetime import datetime
import numpy as np
import pandas as pd
import h5py
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_preprocess
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve
from sklearn.model_selection import train_test_split

In [22]:
# ---------------------------
# User-editable paths & params
# ---------------------------
SUBJECT_CSV = "/content/drive/MyDrive/subject_data.csv"   # change if needed
IMAGES_HDF5 = "/content/drive/MyDrive/images.hdf5"       # change if needed
OUTPUT_DIR = "./cnn_mobilenet_augmented_split"
os.makedirs(OUTPUT_DIR, exist_ok=True)

IMG_SIZE = 224
BATCH_SIZE = 256        # reduce if OOM
EPOCHS = 2
RANDOM_SEED = 42
FOCAL_GAMMA = 2.0
FOCAL_ALPHA = 0.9

# class1 split ratios (explicit as requested)
POS_TRAIN_FRAC = 0.65
POS_VAL_FRAC   = 0.10
POS_TEST_FRAC  = 0.25

# same splits applied to negatives for consistency (you can change if desired)
NEG_TRAIN_FRAC = 0.55
NEG_VAL_FRAC   = 0.15
NEG_TEST_FRAC  = 0.30

# augmentation intensity for positives
HEAVY_AUG = True   # used by generator for positive samples

# reproducibility
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)


In [23]:
# ---------------------------
# Load CSV and HDF5
# ---------------------------
df = pd.read_csv(SUBJECT_CSV, low_memory=False)
hf = h5py.File(IMAGES_HDF5, 'r')


In [24]:
# ---------------------------
# Robust decode helper (supports bytes or numeric arrays)
# ---------------------------
def decode_bytes_image(raw_bytes):
    # raw_bytes: python bytes or np.bytes_
    try:
        img_tensor = tf.io.decode_jpeg(raw_bytes, channels=3)
        return img_tensor.numpy()  # uint8 HxWx3
    except Exception:
        # fallback using PIL
        try:
            return np.asarray(Image.open(io.BytesIO(raw_bytes)).convert("RGB"), dtype=np.uint8)
        except Exception as e:
            raise

def ensure_uint8_array(arr):
    # bytes-like
    if isinstance(arr, (bytes, np.bytes_)):
        return decode_bytes_image(arr)
    # string/object dtype -> try to extract raw bytes
    if hasattr(arr, 'dtype') and (arr.dtype.kind == 'S' or arr.dtype.kind == 'O'):
        try:
            b = arr.tobytes()
            return decode_bytes_image(b)
        except Exception:
            try:
                b = arr.item()
                return decode_bytes_image(b)
            except Exception:
                pass
    # numeric ndarray
    if isinstance(arr, np.ndarray):
        if arr.ndim == 3:
            if arr.shape[-1] == 3:
                return arr.astype(np.uint8)
            if arr.shape[0] == 3:
                return np.transpose(arr, (1,2,0)).astype(np.uint8)
        if arr.ndim == 2:
            # grayscale -> convert to 3-channel
            return np.stack([arr,arr,arr], axis=-1).astype(np.uint8)
    # last-resort: try converting to bytes
    try:
        b = np.asarray(arr).tobytes()
        return decode_bytes_image(b)
    except Exception as e:
        raise ValueError(f"Could not decode HDF5 entry: dtype={getattr(arr,'dtype',None)}, shape={getattr(arr,'shape',None)}; err={e}")


In [25]:
# ---------------------------
# Light and heavy augmentations (numpy/tf) - robust across envs
# ---------------------------
def random_resized_crop(img, out_size=IMG_SIZE, scale=(0.7, 1.0)):
    h, w = img.shape[:2]
    s = random.uniform(scale[0], scale[1])
    new_h, new_w = max(1, int(h * s)), max(1, int(w * s))
    top = random.randint(0, h - new_h) if new_h < h else 0
    left = random.randint(0, w - new_w) if new_w < w else 0
    cropped = img[top: top + new_h, left: left + new_w]
    resized = tf.image.resize(cropped, (out_size, out_size), method='bilinear').numpy()
    return resized

def random_flip(img):
    return np.fliplr(img).copy() if random.random() < 0.5 else img

def random_rotate_90(img):
    if random.random() < 0.15:
        k = random.choice([1,2,3])
        return np.ascontiguousarray(np.rot90(img, k))
    return img

def random_brightness_contrast(img, brightness_delta=32, contrast_low=0.85, contrast_high=1.15):
    delta = random.uniform(-brightness_delta, brightness_delta)
    img = img + delta
    factor = random.uniform(contrast_low, contrast_high)
    mean = img.mean(axis=(0,1), keepdims=True)
    img = (img - mean) * factor + mean
    return img

def random_zoom(img, min_zoom=1.0, max_zoom=1.2):
    # zoom by cropping center then resizing back (zoom in)
    if random.random() < 0.3:
        h, w = img.shape[:2]
        zm = random.uniform(min_zoom, max_zoom)
        crop_h = int(h / zm)
        crop_w = int(w / zm)
        top = (h - crop_h) // 2
        left = (w - crop_w) // 2
        crop = img[top:top+crop_h, left:left+crop_w]
        return tf.image.resize(crop, (h, w)).numpy()
    return img

def clip_cast(img):
    return np.clip(img, 0, 255).astype(np.float32)


In [26]:
# ---------------------------
# Loader: decode, augment-level-controlled, preprocess
# ---------------------------
def load_image_by_id(isic_id, augment_level='none'):
    """
    augment_level: 'none' | 'light' | 'heavy'
    """
    key = str(isic_id)
    if key not in hf:
        # missing -> return zeros
        img = np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
    else:
        raw = hf[key][()]
        img = ensure_uint8_array(raw)

    if augment_level == 'none':
        img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE), method='bilinear').numpy()
    elif augment_level == 'light':
        img = random_resized_crop(img, out_size=IMG_SIZE, scale=(0.8, 1.0))
        img = random_flip(img)
        img = random_brightness_contrast(img, brightness_delta=16, contrast_low=0.9, contrast_high=1.1)
    elif augment_level == 'heavy':
        img = random_resized_crop(img, out_size=IMG_SIZE, scale=(0.5, 1.0))
        img = random_flip(img)
        img = random_rotate_90(img)
        img = random_zoom(img, min_zoom=1.0, max_zoom=1.4)
        img = random_brightness_contrast(img, brightness_delta=32, contrast_low=0.8, contrast_high=1.2)
    else:
        img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE), method='bilinear').numpy()

    img = clip_cast(img)
    img = mobilenet_preprocess(img)   # MobileNet preprocessing
    return img

# ---------------------------
# Custom f1 metric (tensor-based)
# ---------------------------
def f1_metric(y_true, y_pred):
    y_pred_bin = tf.cast(y_pred > 0.5, tf.float32)
    tp = tf.reduce_sum(y_true * y_pred_bin)
    fp = tf.reduce_sum((1 - y_true) * y_pred_bin)
    fn = tf.reduce_sum(y_true * (1 - y_pred_bin))
    precision = tp / (tp + fp + tf.keras.backend.epsilon())
    recall = tp / (tp + fn + tf.keras.backend.epsilon())
    return 2 * precision * recall / (precision + recall + tf.keras.backend.epsilon())


In [27]:
# ---------------------------
# Create per-class splits with required positive-class proportions
# ---------------------------
pos_df = df[df['target'] == 1].sample(frac=1.0, random_state=RANDOM_SEED).reset_index(drop=True)
neg_df = df[df['target'] == 0].sample(frac=1.0, random_state=RANDOM_SEED).reset_index(drop=True)

def split_class_df(class_df, train_frac, val_frac, test_frac):
    n = len(class_df)
    n_train = int(np.round(n * train_frac))
    n_val = int(np.round(n * val_frac))
    # ensure sum not exceed n
    n_test = n - n_train - n_val
    train_part = class_df.iloc[:n_train].copy()
    val_part = class_df.iloc[n_train:n_train + n_val].copy()
    test_part = class_df.iloc[n_train + n_val:].copy()
    return train_part, val_part, test_part

pos_train, pos_val, pos_test = split_class_df(pos_df, POS_TRAIN_FRAC, POS_VAL_FRAC, POS_TEST_FRAC)
neg_train, neg_val, neg_test = split_class_df(neg_df, NEG_TRAIN_FRAC, NEG_VAL_FRAC, NEG_TEST_FRAC)

train_df = pd.concat([pos_train, neg_train], axis=0).sample(frac=1.0, random_state=RANDOM_SEED).reset_index(drop=True)
val_df   = pd.concat([pos_val,   neg_val],   axis=0).sample(frac=1.0, random_state=RANDOM_SEED).reset_index(drop=True)
test_df  = pd.concat([pos_test,  neg_test],  axis=0).sample(frac=1.0, random_state=RANDOM_SEED).reset_index(drop=True)

print("Splits sizes -> train:", len(train_df), "val:", len(val_df), "test:", len(test_df))
print("Pos counts -> train,val,test:", len(pos_train), len(pos_val), len(pos_test))
print("Neg counts -> train,val,test:", len(neg_train), len(neg_val), len(neg_test))


Splits sizes -> train: 220621 val: 60139 test: 120299
Pos counts -> train,val,test: 255 39 99
Neg counts -> train,val,test: 220366 60100 120200


In [30]:
# ---------------------------
# Balanced-batch generator with 60% pos, 40% neg
# ---------------------------
def balanced_batch_generator(df_subset, batch_size=BATCH_SIZE, augment_pos_heavy=HEAVY_AUG):
    pos_ids = df_subset[df_subset['target'] == 1]['isic_id'].astype(str).tolist()
    neg_ids = df_subset[df_subset['target'] == 0]['isic_id'].astype(str).tolist()
    if len(pos_ids) == 0 or len(neg_ids) == 0:
        raise ValueError("both classes must exist in df_subset")

    # compute number of pos/neg per batch
    pos_count = int(np.round(batch_size * 0.6))
    neg_count = batch_size - pos_count

    while True:
        # sample positives (with replacement if needed)
        sample_pos = (
            random.sample(pos_ids, pos_count)
            if len(pos_ids) >= pos_count
            else random.choices(pos_ids, k=pos_count)
        )
        # sample negatives (with replacement if needed)
        sample_neg = (
            random.sample(neg_ids, neg_count)
            if len(neg_ids) >= neg_count
            else random.choices(neg_ids, k=neg_count)
        )

        batch_ids = sample_pos + sample_neg
        random.shuffle(batch_ids)

        X, y = [], []
        for iid in batch_ids:
            if iid in sample_pos:
                level = 'heavy' if augment_pos_heavy else 'light'
                label = 1.0
            else:
                level = 'none'  # negatives kept raw
                label = 0.0
            img = load_image_by_id(iid, augment_level=level)
            X.append(img)
            y.append(label)

        X = np.stack(X)
        y = np.array(y, dtype=np.float32)
        yield X, y


In [32]:
# ---------------------------
# Build MobileNetV2 model with GAP + GMP
# ---------------------------
def build_model(trainable_backbone=False, lr=1e-3):
    inp = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    # backbone WITHOUT built-in pooling so we can do GAP+GMP
    backbone = MobileNetV2(
        weights='imagenet',
        include_top=False,
        input_tensor=inp  # uses 'inp' as input
    )
    backbone.trainable = trainable_backbone

    # apply both poolings and concatenate
    feat = backbone.output
    gap = layers.GlobalAveragePooling2D(name="gap")(feat)
    gmp = layers.GlobalMaxPooling2D(name="gmp")(feat)
    x = layers.Concatenate(name="gap_gmp_concat")([gap, gmp])

    # your dense stack (unchanged)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(128, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Dense(64, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    out = layers.Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=Adam(learning_rate=0.005),
        loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=FOCAL_GAMMA, alpha = FOCAL_ALPHA),
        metrics=[
            tf.keras.metrics.AUC(curve='ROC', name='auc_roc'),
            tf.keras.metrics.AUC(curve='PR',  name='auc_pr'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
            f1_metric
        ]
    )
    return model


In [33]:
# ---------------------------
# Callbacks
# ---------------------------
logdir = os.path.join(OUTPUT_DIR, "logs_" + datetime.now().strftime("%Y%m%d_%H%M%S"))
os.makedirs(logdir, exist_ok=True)
ckpt_path = os.path.join(OUTPUT_DIR, "best_model.weights.h5")
callbacks = [
    EarlyStopping(monitor='val_auc_pr', patience=4, mode='max', restore_best_weights=True),
    ModelCheckpoint(ckpt_path, monitor='val_auc_pr', mode='max', save_best_only=True, save_weights_only=True, verbose=1),
    ReduceLROnPlateau(monitor='val_auc_pr', mode='max', factor=0.5, patience=2, min_lr=1e-6, verbose=1),
    tf.keras.callbacks.CSVLogger(os.path.join(logdir, "training_log.csv")),
    tf.keras.callbacks.TensorBoard(log_dir=logdir)
]


In [None]:
# ---------------------------
# Train: Stage 1 (head only)
# ---------------------------
model = build_model(trainable_backbone=False, lr=1e-3)
steps = max(1, len(train_df[train_df['target']==0]) // (BATCH_SIZE // 2))
val_steps = max(1, len(val_df) // BATCH_SIZE)
print("Training head: steps_per_epoch", steps, "val_steps", val_steps)

model.fit(
    balanced_batch_generator(train_df, batch_size=BATCH_SIZE, augment_pos_heavy=True),
    steps_per_epoch=steps,
    epochs=EPOCHS,
    validation_data=balanced_batch_generator(val_df, batch_size=BATCH_SIZE, augment_pos_heavy=False),
    validation_steps=val_steps,
    callbacks=callbacks,
    verbose=1
)

  backbone = MobileNetV2(


Training head: steps_per_epoch 1721 val_steps 234
Epoch 1/2
[1m1721/1721[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - auc_pr: 0.9794 - auc_roc: 0.9717 - f1_metric: 0.6037 - loss: 0.0561 - precision: 0.9319 - recall: 0.9394
Epoch 1: val_auc_pr improved from -inf to 0.92444, saving model to ./cnn_mobilenet_augmented_split/best_model.weights.h5
[1m1721/1721[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3089s[0m 2s/step - auc_pr: 0.9794 - auc_roc: 0.9717 - f1_metric: 0.6037 - loss: 0.0561 - precision: 0.9319 - recall: 0.9394 - val_auc_pr: 0.9244 - val_auc_roc: 0.8958 - val_f1_metric: 0.4894 - val_loss: 0.2758 - val_precision: 0.9267 - val_recall: 0.6365 - learning_rate: 0.0050
Epoch 2/2
[1m 444/1721[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m33:17[0m 2s/step - auc_pr: 0.9964 - auc_roc: 0.9950 - f1_metric: 0.6032 - loss: 0.0224 - precision: 0.9734 - recall: 0.9790

In [None]:
# ---------------------------
# Optionally Stage 2: fine-tune top of backbone
# ---------------------------
# Example: unfreeze last N layers of backbone and fine-tune with small lr
def unfreeze_top_layers(model, backbone_layer_name="mobilenetv2_1.00_224", n_unfreeze=30):
    # find backbone by class
    for layer in model.layers:
        if isinstance(layer, tf.keras.Model):
            backbone = layer
            break
    else:
        backbone = None
    if backbone is None:
        print("Backbone model not found; skip unfreeze.")
        return model
    # Freeze all, then unfreeze top n_unfreeze
    for l in backbone.layers:
        l.trainable = False
    for l in backbone.layers[-n_unfreeze:]:
        if not isinstance(l, tf.keras.layers.BatchNormalization):
            l.trainable = True
    # recompile with lower lr
    model.compile(optimizer=Adam(1e-4),
                  loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=FOCAL_GAMMA),
                  metrics=[tf.keras.metrics.AUC(curve='PR', name='auc_pr'), tf.keras.metrics.AUC(curve='ROC', name='auc_roc'), f1_metric, "accuracy"])
    return model

In [None]:
# Uncomment to fine-tune:
'''
model = unfreeze_top_layers(model, n_unfreeze=35)
model.fit(  balanced_batch_generator(train_df, batch_size=BATCH_SIZE, augment_pos_heavy=True),
    steps_per_epoch=steps,
    epochs=EPOCHS,
    validation_data=balanced_batch_generator(val_df, batch_size=BATCH_SIZE, augment_pos_heavy=False),
    validation_steps=val_steps,
    callbacks=callbacks,
    verbose=1
            )
'''

In [None]:
# ---------------------------
# Evaluate final on test set
# ---------------------------
def evaluate_on_df(model, df_eval):
    ids = df_eval['isic_id'].astype(str).tolist()
    ys = df_eval['target'].values
    preds = []
    for i in range(0, len(ids), BATCH_SIZE):
        batch = ids[i:i+BATCH_SIZE]
        X = np.stack([load_image_by_id(idv, augment_level='none') for idv in batch])
        p = model.predict(X, verbose=0).ravel().tolist()
        preds.extend(p)
    preds = np.array(preds)
    # metrics
    if len(np.unique(ys)) < 2:
        return {"roc_auc": None, "pr_auc": None, "best_f1": None}
    roc = roc_auc_score(ys, preds)
    pr = average_precision_score(ys, preds)
    prec, rec, th = precision_recall_curve(ys, preds)
    best_f1 = float(np.max(2*prec*rec/(prec+rec+1e-12))) if len(prec)>0 else 0.0
    return {"roc_auc": float(roc), "pr_auc": float(pr), "best_f1": best_f1}

metrics = evaluate_on_df(model, test_df)
print("Test metrics:", metrics)

# cleanup
hf.close()

In [11]:
pos_weight = len(neg_train) / len(pos_train)
FOCAL_ALPHA = pos_weight / (1 + pos_weight)

In [None]:
FOCAL_ALPHA

0.9988441716790333