In [None]:
import os, sys, glob, shutil, subprocess, warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import Dict
np.set_printoptions(suppress=True)

import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, precision_recall_fscore_support,
                             confusion_matrix, roc_curve, auc)



In [None]:

SEED = 42
np.random.seed(SEED); tf.random.set_seed(SEED)


In [None]:
def _run(cmd):
    print(">", cmd)
    return subprocess.run(cmd, shell=True, check=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

if "google.colab" in sys.modules:
    _run("apt-get -y update && apt-get -y install p7zip-full")
    _run("pip -q install patool lime scikit-image")

import patoolib
from skimage.segmentation import slic
from lime import lime_image


> apt-get -y update && apt-get -y install p7zip-full
> pip -q install patool lime scikit-image


In [None]:
YES_RAR_PATH = "/content/yes.rar"
NO_RAR_PATH  = "/content/no.rar"
ROOT_DIR = "/content/data"
YES_DIR  = os.path.join(ROOT_DIR, "yes")
NO_DIR   = os.path.join(ROOT_DIR, "no")

IMG_SIZE = (300, 300)       # ↑ bigger input improves EfficientNet performance
BATCH_SIZE = 16
VAL_SPLIT = 0.2
TEST_SPLIT = 0.1

WARMUP_EPOCHS =30
FINETUNE_EPOCHS = 12        # total epochs = WARMUP + FINETUNE
UNFREEZE_AT = 200           # unfreeze last N layers in fine-tune
LR_WARMUP = 1e-3
LR_FINETUNE = 3e-5
WEIGHT_DECAY = 1e-5         # AdamW

TTA_N = 5                   # TTA passes at inference
F_BETA = 1.5

In [None]:
def ensure_dirs():
    os.makedirs(ROOT_DIR, exist_ok=True)
    os.makedirs(YES_DIR, exist_ok=True)
    os.makedirs(NO_DIR, exist_ok=True)



In [None]:

def dir_is_empty(path):
    return (not os.path.exists(path)) or (len(os.listdir(path)) == 0)

In [None]:
def extract_rar_if_needed(rar_path, outdir):
    if os.path.isfile(rar_path) and dir_is_empty(outdir):
        print(f"Extracting {rar_path} -> {outdir}")
        try:
            patoolib.extract_archive(rar_path, outdir=outdir)
        except Exception as e:
            print(f"Extraction failed for {rar_path}: {e}")
    else:
        if not os.path.isfile(rar_path):
            print(f"(Info) {rar_path} not found. Skipping extraction.")
        else:
            print(f"(Info) {outdir} already has files. Skipping extraction.")





In [None]:
def flatten_images_inplace(root):
    exts = {"jpg","jpeg","png","bmp"}
    moved = 0
    for dirpath, _, filenames in os.walk(root):
        if dirpath == root: continue
        for fn in filenames:
            if fn.lower().split(".")[-1] in exts:
                src = os.path.join(dirpath, fn)
                dst = os.path.join(root, fn)
                base, ext2 = os.path.splitext(dst); k = 1
                while os.path.exists(dst):
                    dst = f"{base}_{k}{ext2}"; k += 1
                shutil.move(src, dst); moved += 1
    for dirpath, _, _ in os.walk(root, topdown=False):
        if dirpath != root and len(os.listdir(dirpath)) == 0:
            shutil.rmtree(dirpath, ignore_errors=True)
    if moved: print(f"(Info) Flattened {moved} images into {root}")



In [None]:
ensure_dirs()
extract_rar_if_needed(YES_RAR_PATH, YES_DIR)
extract_rar_if_needed(NO_RAR_PATH, NO_DIR)
flatten_images_inplace(YES_DIR)
flatten_images_inplace(NO_DIR)


INFO patool: Extracting /content/yes.rar ...
INFO:patool:Extracting /content/yes.rar ...
INFO patool: running /usr/bin/unrar x -kb -or -- /content/yes.rar
INFO:patool:running /usr/bin/unrar x -kb -or -- /content/yes.rar


Extracting /content/yes.rar -> /content/data/yes


INFO patool: ... /content/yes.rar extracted to `/content/data/yes'.
INFO:patool:... /content/yes.rar extracted to `/content/data/yes'.
INFO patool: Extracting /content/no.rar ...
INFO:patool:Extracting /content/no.rar ...
INFO patool: running /usr/bin/unrar x -kb -or -- /content/no.rar
INFO:patool:running /usr/bin/unrar x -kb -or -- /content/no.rar


Extracting /content/no.rar -> /content/data/no


INFO patool: ... /content/no.rar extracted to `/content/data/no'.
INFO:patool:... /content/no.rar extracted to `/content/data/no'.


(Info) Flattened 1500 images into /content/data/yes
(Info) Flattened 1500 images into /content/data/no


In [None]:
def list_images_labels(root_dir):
    classes = [d for d in sorted(os.listdir(root_dir)) if os.path.isdir(os.path.join(root_dir, d))]
    assert len(classes) == 2, f"Expected 2 classes. Found: {classes}"
    def gather(c):
        fps = sorted(glob.glob(os.path.join(root_dir, c, "**", "*"), recursive=True))
        return [f for f in fps if os.path.isfile(f) and f.lower().split(".")[-1] in {"jpg","jpeg","png","bmp"}]
    files0 = gather(classes[0]); files1 = gather(classes[1])
    X = np.array(files0 + files1)
    y = np.array([0]*len(files0) + [1]*len(files1), dtype=int)

    print("\n=== DATASET SUMMARY (Before Split) ===")
    print(f"{classes[0]}: {len(files0)}")
    print(f"{classes[1]}: {len(files1)}")
    print(f"TOTAL: {len(X)}")
    return classes, X, y

classes, X_all, y_all = list_images_labels(ROOT_DIR)
TOTAL = len(X_all)


=== DATASET SUMMARY (Before Split) ===
no: 1500
yes: 1500
TOTAL: 3000


In [None]:
def make_splits(X, y, val_split=0.2, test_split=0.1, seed=SEED):
    X_tmp, X_test, y_tmp, y_test = train_test_split(X, y, test_size=test_split, random_state=seed, stratify=y)
    val_rel = val_split / (1. - test_split)
    X_train, X_val, y_train, y_val = train_test_split(X_tmp, y_tmp, test_size=val_rel, random_state=seed, stratify=y_tmp)
    return (X_train,y_train), (X_val,y_val), (X_test,y_test)

(X_train, y_train), (X_val, y_val), (X_test, y_test) = make_splits(X_all, y_all, VAL_SPLIT, TEST_SPLIT)




In [None]:
def split_report():
    def cnt(y): return {classes[i]: int((y==i).sum()) for i in range(2)}
    print("\n=== SPLIT SUMMARY ===")
    print("Train:", len(X_train), cnt(y_train))
    print("Val:  ", len(X_val),   cnt(y_val))
    print("Test: ", len(X_test),  cnt(y_test))
    print("Sum(Train+Val+Test):", len(X_train)+len(X_val)+len(X_test), "| TOTAL:", TOTAL)
    assert len(X_train)+len(X_val)+len(X_test) == TOTAL

split_report()



=== SPLIT SUMMARY ===
Train: 2100 {'no': 1050, 'yes': 1050}
Val:   600 {'no': 300, 'yes': 300}
Test:  300 {'no': 150, 'yes': 150}
Sum(Train+Val+Test): 3000 | TOTAL: 3000


In [None]:
def decode_resize_uint8(path, label, img_size=IMG_SIZE):
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, img_size)                 # float32 0..255 if not cast yet
    img = tf.cast(img, tf.float32)                       # keep 0..255
    return img, tf.cast(label, tf.int32)




In [None]:
def make_tfds(X, y, shuffle=False, seed=SEED, batch_size=BATCH_SIZE):
    ds = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(X), seed=seed, reshuffle_each_iteration=False)
    ds = ds.map(lambda a,b: decode_resize_uint8(a,b,IMG_SIZE), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size, drop_remainder=False).prefetch(tf.data.AUTOTUNE)
    return ds

In [None]:
train_ds = make_tfds(X_train, y_train, shuffle=True)
val_ds   = make_tfds(X_val,   y_val,   shuffle=False)
test_ds  = make_tfds(X_test,  y_test,  shuffle=False)


In [None]:
def count_ds(ds):
    s=0
    for a,b in ds: s += a.shape[0]
    return int(s)
print("\n=== PIPELINE COUNTS ===")
print("Train DS:", count_ds(train_ds), " Val DS:", count_ds(val_ds), " Test DS:", count_ds(test_ds), " Total:", TOTAL)




=== PIPELINE COUNTS ===
Train DS: 2100  Val DS: 600  Test DS: 300  Total: 3000


In [None]:
from collections import Counter
ctr = Counter(y_train.tolist())
w0 = (len(y_train) / (2.0 * ctr[0] + 1e-9))
w1 = (len(y_train) / (2.0 * ctr[1] + 1e-9))
class_weight = {0: w0, 1: w1}
print("\nClass weights:", class_weight)



Class weights: {0: 0.9999999999995238, 1: 0.9999999999995238}


In [None]:
Aug = keras.Sequential([
    keras.layers.RandomFlip("horizontal"),                # brain MRIs: LR flip ok, avoid UD flip
    keras.layers.RandomRotation(0.1),
    keras.layers.RandomZoom(0.1),
    keras.layers.RandomContrast(0.1),
], name="augment")

inputs = keras.Input(shape=IMG_SIZE+(3,), name="input_uint8")
x = Aug(inputs)
x = tf.keras.applications.efficientnet.preprocess_input(x)   # handles 0..255 -> normalized
base = tf.keras.applications.EfficientNetB0(include_top=False, input_shape=IMG_SIZE+(3,), weights="imagenet")
base.trainable = False
x = base(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.3)(x)
outputs = keras.layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs, name="EffB0_binary")

# AdamW + weight decay
optimizer = keras.optimizers.AdamW(learning_rate=LR_WARMUP, weight_decay=WEIGHT_DECAY)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])
model.summary()

callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.3, patience=2, min_lr=1e-6, verbose=1),
    keras.callbacks.ModelCheckpoint("/content/best_warmup.keras", monitor="val_loss", save_best_only=True, verbose=1),
]

















Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
hist_warm = model.fit(train_ds, validation_data=val_ds, epochs=WARMUP_EPOCHS,
                      class_weight=class_weight, callbacks=callbacks)

Epoch 1/30
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.6747 - loss: 0.5907
Epoch 1: val_loss improved from inf to 0.41864, saving model to /content/best_warmup.keras
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m518s[0m 4s/step - accuracy: 0.6752 - loss: 0.5901 - val_accuracy: 0.8333 - val_loss: 0.4186 - learning_rate: 0.0010
Epoch 2/30
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8154 - loss: 0.4143
Epoch 2: val_loss improved from 0.41864 to 0.34511, saving model to /content/best_warmup.keras
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m370s[0m 3s/step - accuracy: 0.8156 - loss: 0.4141 - val_accuracy: 0.8733 - val_loss: 0.3451 - learning_rate: 0.0010
Epoch 3/30
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8576 - loss: 0.3594
Epoch 3: val_loss improved from 0.34511 to 0.30536, saving model to /content/best_warmup.keras
[1m132/13

In [None]:
base.trainable = True
for layer in base.layers[:-UNFREEZE_AT]:
    layer.trainable = False



In [None]:
model.compile(optimizer=keras.optimizers.AdamW(learning_rate=LR_FINETUNE, weight_decay=WEIGHT_DECAY),
              loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
callbacks_ft = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.3, patience=2, min_lr=1e-6, verbose=1),
    keras.callbacks.ModelCheckpoint("/content/best_finetune.keras", monitor="val_loss", save_best_only=True, verbose=1),
]

In [None]:
hist_ft = model.fit(train_ds, validation_data=val_ds, epochs=FINETUNE_EPOCHS,
                    class_weight=class_weight, callbacks=callbacks_ft)


Epoch 1/12
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.7781 - loss: 0.4764
Epoch 1: val_loss improved from inf to 0.16138, saving model to /content/best_finetune.keras
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m856s[0m 6s/step - accuracy: 0.7786 - loss: 0.4755 - val_accuracy: 0.9317 - val_loss: 0.1614 - learning_rate: 3.0000e-05
Epoch 2/12
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.9107 - loss: 0.2157
Epoch 2: val_loss improved from 0.16138 to 0.11779, saving model to /content/best_finetune.keras
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m849s[0m 6s/step - accuracy: 0.9107 - loss: 0.2156 - val_accuracy: 0.9533 - val_loss: 0.1178 - learning_rate: 3.0000e-05
Epoch 3/12
[1m 23/132[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m9:45[0m 5s/step - accuracy: 0.9322 - loss: 0.1451

In [None]:
def plot_curves(hists, labels):
    plt.figure(figsize=(7,4))
    for h, lab in zip(hists, labels):
        plt.plot(h.history["accuracy"], label=f"{lab} Train Acc")
        plt.plot(h.history["val_accuracy"], label=f"{lab} Val Acc")
    plt.xlabel("Epoch"); plt.ylabel("Accuracy"); plt.title("Training vs Validation Accuracy")
    plt.legend(); plt.tight_layout(); plt.show()

    plt.figure(figsize=(7,4))
    for h, lab in zip(hists, labels):
        plt.plot(h.history["loss"], label=f"{lab} Train Loss")
        plt.plot(h.history["val_loss"], label=f"{lab} Val Loss")
    plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.title("Training vs Validation Loss")
    plt.legend(); plt.tight_layout(); plt.show()

plot_curves([hist_warm, hist_ft], ["Initial","Finetune"])



In [None]:
def collect_preds(dataset, model, tta=False, N=TTA_N):
    y_true, y_prob = [], []
    if not tta:
        for bx, by in dataset:
            p = model.predict(bx, verbose=0).reshape(-1)
            y_prob.append(p); y_true.append(by.numpy())
    else:
        # Simple TTA: original + horizontal flip (others can be added)
        for bx, by in dataset:
            probs_list = []
            p0 = model.predict(bx, verbose=0).reshape(-1); probs_list.append(p0)
            p1 = model.predict(tf.image.flip_left_right(bx), verbose=0).reshape(-1); probs_list.append(p1)
            # Add minor rotations/contrast jitter TTA samples
            for _ in range(max(0, N-2)):
                bx_aug = tf.image.random_contrast(bx, 0.9, 1.1)
                probs_list.append(model.predict(bx_aug, verbose=0).reshape(-1))
            p_avg = np.mean(np.stack(probs_list, axis=0), axis=0)
            y_prob.append(p_avg); y_true.append(by.numpy())
    y_true = np.concatenate(y_true, axis=0)
    y_prob = np.concatenate(y_prob, axis=0)
    return y_true.astype(int), y_prob


In [None]:
def specificity_from_cm(cm):
    TN, FP = cm[0,0], cm[0,1]
    return float(TN/(TN+FP)) if (TN+FP)>0 else 0.0



In [None]:
def bin_metrics(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary", zero_division=0)
    cm = confusion_matrix(y_true, y_pred)
    spec = specificity_from_cm(cm)
    return {"accuracy":acc, "precision":prec, "recall/sensitivity":rec, "specificity":spec, "f1":f1}, cm





In [None]:
def bootstrap_ci(y_true, y_pred, n_boot=500, alpha=0.05):
    rng = np.random.default_rng(SEED)
    N = len(y_true)
    keys = ["accuracy","precision","recall/sensitivity","specificity","f1"]
    store = {k:[] for k in keys}
    for _ in range(n_boot):
        idx = rng.integers(0, N, N)
        m,_ = bin_metrics(y_true[idx], y_pred[idx])
        for k in keys: store[k].append(m[k])
    ci = {k: (float(np.percentile(v, 2.5)), float(np.percentile(v, 97.5))) for k,v in store.items()}
    return ci

In [None]:
def choose_threshold_by_val(y_true_val, y_prob_val, beta=F_BETA):
    # choose thr maximizing F-beta on Validation
    best_thr, best_score = 0.5, -1.0
    for thr in np.linspace(0.05, 0.95, 181):
        y_pred = (y_prob_val >= thr).astype(int)
        _, _, f1, _ = precision_recall_fscore_support(y_true_val, y_pred, average="binary", zero_division=0)
        # approximate F-beta from precision & recall
        prec, rec, _, _ = precision_recall_fscore_support(y_true_val, y_pred, average="binary", zero_division=0)
        if prec+rec == 0:
            score = 0
        else:
            score = (1+beta**2) * (prec*rec) / (beta**2 * prec + rec + 1e-12)
        if score > best_score:
            best_score, best_thr = score, thr
    return best_thr, best_score


In [None]:
y_true_val, y_prob_val = collect_preds(val_ds, model, tta=True)
y_true_te,  y_prob_te  = collect_preds(test_ds, model, tta=True)


In [None]:
thr, fbeta = choose_threshold_by_val(y_true_val, y_prob_val, beta=F_BETA)
print(f"\nChosen decision threshold (by Val F{F_BETA}): {thr:.3f}  | best Fβ: {fbeta:.4f}")

In [None]:
y_true_val, y_prob_val, y_pred_val = collect_preds(val_dataset, model)
y_true_te,  y_prob_te,  y_pred_te  = collect_preds(test_dataset, model)


In [None]:
y_pred_val = (y_prob_val >= thr).astype(int)
y_pred_te  = (y_prob_te  >= thr).astype(int)

m_val, cm_val = bin_metrics(y_true_val, y_pred_val)
m_te,  cm_te  = bin_metrics(y_true_te,  y_pred_te)
ci_val = bootstrap_ci(y_true_val, y_pred_val)
ci_te  = bootstrap_ci(y_true_te,  y_pred_te)

In [None]:
print("\n=== VALIDATION (threshold-tuned) ===")
for k,v in m_val.items():
    lo,hi = ci_val[k]; print(f"{k:>18}: {v:.4f}  (95% CI: {lo:.4f}–{hi:.4f})")

In [None]:
print("\n=== TEST (threshold from Val) ===")
for k,v in m_te.items():
    lo,hi = ci_te[k]; print(f"{k:>18}: {v:.4f}  (95% CI: {lo:.4f}–{hi:.4f})")

In [None]:
def plot_cm(cm, title):
    fig = plt.figure(figsize=(5,4)); ax = fig.add_subplot(111)
    im = ax.imshow(cm, interpolation="nearest"); ax.set_title(title)
    plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
    ax.set_xticks([0,1]); ax.set_xticklabels(classes, rotation=45, ha="right")
    ax.set_yticks([0,1]); ax.set_yticklabels(classes)
    th = cm.max()/2.0
    for i in range(2):
        for j in range(2):
            ax.text(j, i, int(cm[i,j]), ha="center", va="center",
                    color="white" if cm[i,j] > th else "black")
    ax.set_ylabel("True"); ax.set_xlabel("Pred"); plt.tight_layout(); plt.show()




In [None]:
plot_cm(cm_val, "Confusion Matrix — Validation")
plot_cm(cm_te,  "Confusion Matrix — Test")
print(f"[Sanity] Val CM sum: {int(cm_val.sum())} | n_val: {len(y_true_val)}")
print(f"[Sanity] Test CM sum: {int(cm_te.sum())} | n_test: {len(y_true_te)}")

In [None]:
def plot_roc(y_true, y_prob, title):
    fpr, tpr, _ = roc_curve(y_true, y_prob); roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(5,4))
    plt.plot(fpr, tpr, label=f"AUC={roc_auc:.3f}")
    plt.plot([0,1],[0,1],"--")
    plt.xlabel("FPR"); plt.ylabel("TPR"); plt.title(title)
    plt.legend(); plt.grid(True, alpha=0.3); plt.tight_layout(); plt.show()


In [None]:
plot_roc(y_true_val, y_prob_val, "ROC — Validation (TTA)")
plot_roc(y_true_te,  y_prob_te,  "ROC — Test (TTA)")

In [None]:
backbone = model.get_layer("efficientnetb0")
head = keras.Model(inputs=backbone.output, outputs=model.output)


In [None]:
@tf.function
def gradcam(img_batch):
    pre = tf.keras.applications.efficientnet.preprocess_input(img_batch)
    with tf.GradientTape() as tape:
        feats = backbone(pre, training=False)      # (1, Hf, Wf, C)
        preds = head(feats, training=False)        # (1, 1)
        score = preds[:, 0]
        tape.watch(feats)
        grads = tape.gradient(score, feats)
    weights = tf.reduce_mean(grads, axis=(1,2), keepdims=False)   # (1,C)
    feats  = feats[0]
    w      = weights[0]
    cam    = tf.tensordot(feats, w, axes=[2,0])
    cam    = tf.maximum(cam, 0); cam /= (tf.reduce_max(cam)+1e-12)
    return cam

In [None]:
def overlay(img, heatmap, alpha=0.4):
    if img.max() <= 1.0: img = (img*255.0).astype("uint8")
    heatmap_r = tf.image.resize(heatmap[..., tf.newaxis], img.shape[:2]).numpy().squeeze()
    heatmap_rgb = plt.cm.jet(heatmap_r)[:, :, :3]; overlay = (heatmap_rgb*255).astype("uint8")
    return (alpha*overlay + (1-alpha)*img).astype("uint8")


In [None]:
print("\nGrad-CAM examples:")
for idx, (img, lab) in enumerate(test_ds.unbatch().take(3), 1):
    cam = gradcam(tf.expand_dims(img, 0)).numpy()
    blended = overlay(img.numpy(), cam, 0.45)
    plt.figure(figsize=(8,3))
    plt.subplot(1,3,1); plt.imshow((img.numpy()*255).astype("uint8")); plt.title("Input"); plt.axis("off")
    plt.subplot(1,3,2); plt.imshow(cam, cmap="jet"); plt.title("Grad-CAM"); plt.axis("off")
    plt.subplot(1,3,3); plt.imshow(blended); plt.title("Overlay"); plt.axis("off")
    plt.suptitle(f"Example {idx} — True: {classes[int(lab.numpy())]}")
    plt.tight_layout(); plt.show()



In [None]:
def model_predict_for_lime(imgs_np):
    x = imgs_np.astype("float32")           # 0..255 expected
    # LIME sends uint8; our model pipeline expects 0..255 then preprocess inside graph
    return np.hstack([1.0 - model.predict(x, verbose=0), model.predict(x, verbose=0)])



In [None]:
from lime import lime_image
from skimage.segmentation import slic

sample = next(iter(test_ds.unbatch().take(1)))
img0, label0 = sample
img0_uint8 = (img0.numpy()*255).astype("uint8")
explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(
    image=img0_uint8,
    classifier_fn=model_predict_for_lime,
    top_labels=2,
    hide_color=0,
    num_samples=1000,
    segmentation_fn=lambda x: slic(x, n_segments=80, compactness=10, sigma=1, start_label=0)
)
pred_label = explanation.top_labels[0]
lime_img, lime_mask = explanation.get_image_and_mask(
    label=pred_label, positive_only=True, num_features=10, hide_rest=False
)
plt.figure(figsize=(10,4))
plt.subplot(1,2,1); plt.imshow(img0_uint8); plt.title(f"Original (True: {classes[int(label0.numpy())]})"); plt.axis("off")
plt.subplot(1,2,2); plt.imshow(lime_img); plt.title(f"LIME — regions for class {pred_label}"); plt.axis("off")
plt.tight_layout(); plt.show()

print("\nClinical note: We tuned for higher sensitivity (Fβ>1). In screening, missing tumors (FN) is worse than over-calling (FP),")
print("so we 1) optimized threshold on Validation for Fβ, 2) used class weights, 3) added TTA to stabilize predictions.")
