Step 1A — Check Kaggle input + find class folders

In [1]:
import os
from pathlib import Path

print("Kaggle inputs:", os.listdir("/kaggle/input"))

ROOT = Path("/kaggle/input/busi-dataset")  # change if your dataset name is different
print("Top-level:", list(ROOT.iterdir())[:10])

# Find the actual folder that contains benign/malignant/normal
candidates = []
for p in ROOT.rglob("*"):
    if p.is_dir() and p.name.lower() in {"benign", "malignant", "normal"}:
        candidates.append(p)

print("Found class folders:")
for p in candidates:
    print("-", p)


Kaggle inputs: ['busi-dataset']
Top-level: [PosixPath('/kaggle/input/busi-dataset/Dataset BUSI')]
Found class folders:
- /kaggle/input/busi-dataset/Dataset BUSI/benign
- /kaggle/input/busi-dataset/Dataset BUSI/normal
- /kaggle/input/busi-dataset/Dataset BUSI/malignant


Step 1B — Quick sanity check: count images (excluding masks)

In [2]:
exts = {".png", ".jpg", ".jpeg"}

def count_images(folder: Path):
    imgs = [p for p in folder.rglob("*") if p.suffix.lower() in exts and "mask" not in p.name.lower()]
    return len(imgs)

# Update BASE if your printed path is different (from Step 1A output)
BASE = Path("/kaggle/input/busi-dataset/Dataset BUSI")

for cls in ["normal", "benign", "malignant"]:
    d = BASE / cls
    print(cls, "exists:", d.exists(), "| images(no-mask):", count_images(d))


normal exists: True | images(no-mask): 133
benign exists: True | images(no-mask): 437
malignant exists: True | images(no-mask): 210


Step 2: Create output folders (3-class split)

In [3]:
from pathlib import Path

OUT_ROOT = Path("/kaggle/working/busi_processed_3class")

for split in ["train", "val", "test"]:
    for cls in ["normal", "benign", "malignant"]:
        (OUT_ROOT / split / cls).mkdir(parents=True, exist_ok=True)

print("Created folder structure at:", OUT_ROOT)

# Quick check
for p in sorted((OUT_ROOT).rglob("*")):
    if p.is_dir():
        print(p)


Created folder structure at: /kaggle/working/busi_processed_3class
/kaggle/working/busi_processed_3class/test
/kaggle/working/busi_processed_3class/test/benign
/kaggle/working/busi_processed_3class/test/malignant
/kaggle/working/busi_processed_3class/test/normal
/kaggle/working/busi_processed_3class/train
/kaggle/working/busi_processed_3class/train/benign
/kaggle/working/busi_processed_3class/train/malignant
/kaggle/working/busi_processed_3class/train/normal
/kaggle/working/busi_processed_3class/val
/kaggle/working/busi_processed_3class/val/benign
/kaggle/working/busi_processed_3class/val/malignant
/kaggle/working/busi_processed_3class/val/normal


Step 3: Split BUSI into train/val/test (70/15/15) and copy images (3 classes)

In [4]:
import random, shutil
from pathlib import Path
import pandas as pd

SEED = 42
random.seed(SEED)

# ✅ Update BASE if your Step 1 output showed a different folder
BASE = Path("/kaggle/input/busi-dataset/Dataset BUSI")

OUT_ROOT = Path("/kaggle/working/busi_processed_3class")

classes = ["normal", "benign", "malignant"]
exts = {".png", ".jpg", ".jpeg"}

def collect_non_mask_images(class_dir: Path):
    paths = [p for p in class_dir.rglob("*") if p.suffix.lower() in exts]
    # BUSI mask images usually contain "_mask" in filename; exclude them [web:81]
    paths = [p for p in paths if "mask" not in p.name.lower()]
    return sorted(paths)

def split_copy(paths, cls):
    paths = paths[:]
    random.shuffle(paths)
    n = len(paths)

    n_train = int(0.70 * n)
    n_val = int(0.15 * n)
    # remainder goes to test
    mapping = (
        [("train", p) for p in paths[:n_train]] +
        [("val", p) for p in paths[n_train:n_train+n_val]] +
        [("test", p) for p in paths[n_train+n_val:]]
    )

    rows = []
    for split, src in mapping:
        dst = OUT_ROOT / split / cls / src.name
        shutil.copy2(src, dst)  # copy2 preserves metadata [web:538]
        rows.append({"filepath": str(dst), "label": cls, "split": split})
    return rows

all_rows = []

for cls in classes:
    class_dir = BASE / cls
    if not class_dir.exists():
        raise FileNotFoundError(f"Missing class folder: {class_dir}")

    paths = collect_non_mask_images(class_dir)
    print(f"{cls}: {len(paths)} images (non-mask)")
    all_rows += split_copy(paths, cls)

df = pd.DataFrame(all_rows)
csv_path = Path("/kaggle/working/splits_3class.csv")
df.to_csv(csv_path, index=False)

print("\nSaved:", csv_path)
print("\nCounts per split/class:")
print(df.groupby(["split", "label"]).size())


normal: 133 images (non-mask)
benign: 437 images (non-mask)
malignant: 210 images (non-mask)

Saved: /kaggle/working/splits_3class.csv

Counts per split/class:
split  label    
test   benign        67
       malignant     32
       normal        21
train  benign       305
       malignant    147
       normal        93
val    benign        65
       malignant     31
       normal        19
dtype: int64


Step 4: Load 3-class datasets (normal/benign/malignant)

In [5]:
import tensorflow as tf

DATA_ROOT = "/kaggle/working/busi_processed_3class"
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# Fix class order so labels stay consistent everywhere (recommended)
CLASS_NAMES = ["normal", "benign", "malignant"]

train_ds = tf.keras.utils.image_dataset_from_directory(
    f"{DATA_ROOT}/train",
    labels="inferred",
    label_mode="int",                 # integer labels for multiclass [web:59]
    class_names=CLASS_NAMES,          # controls class->index mapping [web:59]
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    f"{DATA_ROOT}/val",
    labels="inferred",
    label_mode="int",
    class_names=CLASS_NAMES,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    f"{DATA_ROOT}/test",
    labels="inferred",
    label_mode="int",
    class_names=CLASS_NAMES,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False
)

print("Class names:", train_ds.class_names)


2026-01-28 18:09:03.515466: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1769623743.676692      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1769623743.724789      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1769623744.110449      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1769623744.110489      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1769623744.110492      55 computation_placer.cc:177] computation placer alr

Found 545 files belonging to 3 classes.


I0000 00:00:1769623756.352935      55 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1769623756.356870      55 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Found 115 files belonging to 3 classes.
Found 120 files belonging to 3 classes.
Class names: ['normal', 'benign', 'malignant']


Step 5: Speed up input pipeline (cache + prefetch)

In [7]:
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(AUTOTUNE)
val_ds   = val_ds.cache().prefetch(AUTOTUNE)
test_ds  = test_ds.cache().prefetch(AUTOTUNE)


Step 6: Compute 3-class weights

In [8]:
import pandas as pd

CLASS_NAMES = ["normal", "benign", "malignant"]  # must match your dataset loader order

df = pd.read_csv("/kaggle/working/splits_3class.csv")

train_counts = (
    df[df["split"] == "train"]
    .groupby("label")
    .size()
    .reindex(CLASS_NAMES)
)

total = int(train_counts.sum())
n_classes = len(CLASS_NAMES)

class_weight = {
    i: total / (n_classes * int(train_counts.iloc[i]))
    for i in range(n_classes)
}

print("Train counts:\n", train_counts)
print("\nClass index mapping:", {name: i for i, name in enumerate(CLASS_NAMES)})
print("\nclass_weight:", class_weight)


Train counts:
 label
normal        93
benign       305
malignant    147
dtype: int64

Class index mapping: {'normal': 0, 'benign': 1, 'malignant': 2}

class_weight: {0: 1.9534050179211468, 1: 0.5956284153005464, 2: 1.2358276643990929}


Step 7: Build 3-class EfficientNet model

In [9]:
import tensorflow as tf

IMG_SIZE = (224, 224)
NUM_CLASSES = 3

# Augmentation
data_aug = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.1),
])

# Backbone
base = tf.keras.applications.EfficientNetB0(
    include_top=False,
    weights="imagenet",
    input_shape=IMG_SIZE + (3,),
)
base.trainable = False  # train head first (transfer learning) [web:58]

# Model
inputs = tf.keras.Input(shape=IMG_SIZE + (3,))
x = data_aug(inputs)
x = tf.keras.applications.efficientnet.preprocess_input(x)
x = base(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)

model = tf.keras.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")],
)

model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


Step 8: Train head + save best

In [10]:
import tensorflow as tf

callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=5,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ModelCheckpoint(
        filepath="/kaggle/working/best_model_3class.keras",
        monitor="val_loss",
        save_best_only=True
    )
]

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    class_weight=class_weight,
    callbacks=callbacks
)


Epoch 1/15


E0000 00:00:1769624089.410910      55 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_1_1/efficientnetb0_1/block2b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
I0000 00:00:1769624092.671401     132 cuda_dnn.cc:529] Loaded cuDNN version 91002


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 364ms/step - acc: 0.2903 - loss: 1.2376 - val_acc: 0.5739 - val_loss: 0.9232
Epoch 2/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 122ms/step - acc: 0.4851 - loss: 0.9969 - val_acc: 0.5217 - val_loss: 0.9050
Epoch 3/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 122ms/step - acc: 0.6191 - loss: 0.8627 - val_acc: 0.5478 - val_loss: 0.8670
Epoch 4/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 122ms/step - acc: 0.6141 - loss: 0.8181 - val_acc: 0.5652 - val_loss: 0.8453
Epoch 5/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 123ms/step - acc: 0.6592 - loss: 0.7589 - val_acc: 0.6000 - val_loss: 0.8164
Epoch 6/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 124ms/step - acc: 0.6961 - loss: 0.7123 - val_acc: 0.5739 - val_loss: 0.8084
Epoch 7/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 123ms/step - acc: 0.681

Step 9: Evaluate on test (metrics + confusion matrix)

In [34]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report

CLASS_NAMES = ["normal", "benign", "malignant"]

best = tf.keras.models.load_model("/kaggle/working/best_model_3class_finetuned.keras")


# 1) Keras evaluation
results = best.evaluate(test_ds, verbose=0, return_dict=True)
print("Test metrics:", results)

# 2) Collect predictions
y_true = []
y_pred = []

for x_batch, y_batch in test_ds:
    probs = best.predict(x_batch, verbose=0)          # shape: (batch, 3)
    preds = np.argmax(probs, axis=1)                 # predicted class index
    y_true.append(y_batch.numpy().astype(int))
    y_pred.append(preds.astype(int))

y_true = np.concatenate(y_true)
y_pred = np.concatenate(y_pred)

# 3) Confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=[0, 1, 2])
print("\nConfusion Matrix (rows=true, cols=pred):\n", cm)

# 4) Per-class metrics
print("\nClassification report:")
print(classification_report(
    y_true, y_pred,
    labels=[0, 1, 2],
    target_names=CLASS_NAMES,
    digits=4
))


Test metrics: {'acc': 0.8083333373069763, 'loss': 0.5081573128700256}

Confusion Matrix (rows=true, cols=pred):
 [[20  0  1]
 [ 5 53  9]
 [ 1  7 24]]

Classification report:
              precision    recall  f1-score   support

      normal     0.7692    0.9524    0.8511        21
      benign     0.8833    0.7910    0.8346        67
   malignant     0.7059    0.7500    0.7273        32

    accuracy                         0.8083       120
   macro avg     0.7861    0.8311    0.8043       120
weighted avg     0.8160    0.8083    0.8089       120



Fine-tuning code

In [14]:
import tensorflow as tf
from tensorflow.keras import layers

CLASS_NAMES = ["normal", "benign", "malignant"]

model = tf.keras.models.load_model("/kaggle/working/best_model_3class.keras")

# Find the EfficientNet base inside the model
base_model = None
for layer in model.layers:
    if isinstance(layer, tf.keras.Model) and "efficientnet" in layer.name.lower():
        base_model = layer
        break

print("Base model:", base_model.name, "layers:", len(base_model.layers))

# Unfreeze for fine-tuning (only top N layers)
base_model.trainable = True
N = 30  # try 20, 30, 50 if you want
for layer in base_model.layers[:-N]:
    layer.trainable = False

# Keep BatchNorm frozen
for layer in base_model.layers:
    if isinstance(layer, layers.BatchNormalization):
        layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),  # small LR for fine-tuning [web:117]
    loss="sparse_categorical_crossentropy",
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")]
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=4, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2, min_lr=1e-7, verbose=1),  # [web:574]
    tf.keras.callbacks.ModelCheckpoint(
        "/kaggle/working/best_model_3class_finetuned.keras",
        monitor="val_loss",
        save_best_only=True
    ),  # [web:257]
]

history_ft = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    class_weight=class_weight,
    callbacks=callbacks
)


Base model: efficientnetb0 layers: 238
Epoch 1/10


E0000 00:00:1769624506.939157      55 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_1_1/efficientnetb0_1/block2b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 293ms/step - acc: 0.7639 - loss: 0.5845 - val_acc: 0.6783 - val_loss: 0.7277 - learning_rate: 1.0000e-05
Epoch 2/10
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 135ms/step - acc: 0.7418 - loss: 0.6115 - val_acc: 0.6783 - val_loss: 0.7251 - learning_rate: 1.0000e-05
Epoch 3/10
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 166ms/step - acc: 0.7704 - loss: 0.5632 - val_acc: 0.6783 - val_loss: 0.7180 - learning_rate: 1.0000e-05
Epoch 4/10
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 94ms/step - acc: 0.7661 - loss: 0.5791 - val_acc: 0.6783 - val_loss: 0.7266 - learning_rate: 1.0000e-05
Epoch 5/10
[1m17/18[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 81ms/step - acc: 0.7720 - loss: 0.5423
Epoch 5: ReduceLROnPlateau reducing learning rate to 1.9999999494757505e-06.
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 93ms/step - acc: 0.7714 - loss: 0.54

In [16]:
import os, time

p = "/kaggle/working/best_model_3class_finetuned.keras"
print("Exists:", os.path.exists(p))
if os.path.exists(p):
    st = os.stat(p)
    print("Size (MB):", st.st_size / (1024**2))
    print("Modified:", time.ctime(st.st_mtime))


Exists: True
Size (MB): 27.627933502197266
Modified: Wed Jan 28 18:21:57 2026


In [18]:
import tensorflow as tf
from tensorflow.keras import layers

model = tf.keras.models.load_model("/kaggle/working/best_model_3class.keras")

# find EfficientNet base
base_model = None
for layer in model.layers:
    if isinstance(layer, tf.keras.Model) and "efficientnet" in layer.name.lower():
        base_model = layer
        break

base_model.trainable = True

N = 100                 # try 80/100/120
for layer in base_model.layers[:-N]:
    layer.trainable = False

# keep BatchNorm frozen
for layer in base_model.layers:
    if isinstance(layer, layers.BatchNormalization):
        layer.trainable = False

model.compile(
    optimizer=tf.keras.optimizers.Adam(3e-5),  # try 3e-5 then 1e-5 if needed [web:117]
    loss="sparse_categorical_crossentropy",
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")]
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2, min_lr=1e-7, verbose=1),  # [web:608]
    tf.keras.callbacks.ModelCheckpoint(
        "/kaggle/working/best_model_3class_finetuned_v2.keras",
        monitor="val_loss",
        save_best_only=True
    )
]

history_ft2 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    class_weight=class_weight,
    callbacks=callbacks
)


Epoch 1/20


E0000 00:00:1769624917.743656      55 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_1_1/efficientnetb0_1/block2b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 318ms/step - acc: 0.7288 - loss: 0.6125 - val_acc: 0.7043 - val_loss: 0.7138 - learning_rate: 3.0000e-05
Epoch 2/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 167ms/step - acc: 0.7528 - loss: 0.5456 - val_acc: 0.7217 - val_loss: 0.6335 - learning_rate: 3.0000e-05
Epoch 3/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 167ms/step - acc: 0.7912 - loss: 0.4943 - val_acc: 0.7478 - val_loss: 0.6241 - learning_rate: 3.0000e-05
Epoch 4/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 167ms/step - acc: 0.7991 - loss: 0.4572 - val_acc: 0.7652 - val_loss: 0.6079 - learning_rate: 3.0000e-05
Epoch 5/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 169ms/step - acc: 0.8016 - loss: 0.4191 - val_acc: 0.7565 - val_loss: 0.6001 - learning_rate: 3.0000e-05
Epoch 6/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 171ms/step - acc: 0.8496 - loss: 0.

In [26]:
# 1) What keys were recorded?
print(history_ft2.history.keys())

# 2) Last 5 epochs (train/val)
for k in ["loss", "val_loss", "acc", "val_acc"]:
    if k in history_ft2.history:
        print(k, history_ft2.history[k][-5:])


dict_keys(['acc', 'loss', 'val_acc', 'val_loss', 'learning_rate'])
loss [0.3270880877971649, 0.3100455403327942, 0.28317391872406006, 0.25816091895103455, 0.26012152433395386]
val_loss [0.5875351428985596, 0.5945738554000854, 0.5769171714782715, 0.5760425329208374, 0.5755965113639832]
acc [0.853210985660553, 0.8605504631996155, 0.8935779929161072, 0.884403645992279, 0.8935779929161072]
val_acc [0.7652173638343811, 0.7652173638343811, 0.7739130258560181, 0.7739130258560181, 0.7739130258560181]


In [21]:
import numpy as np

val_losses = np.array(history_ft2.history["val_loss"])
best_epoch = int(val_losses.argmin()) + 1
print("Best epoch:", best_epoch, "best val_loss:", float(val_losses.min()))


Best epoch: 7 best val_loss: 0.5698786377906799


In [22]:
import tensorflow as tf
best = tf.keras.models.load_model("/kaggle/working/best_model_3class_finetuned_v2.keras")
results = best.evaluate(test_ds, verbose=0, return_dict=True)
print("Test metrics:", results)


Test metrics: {'acc': 0.8500000238418579, 'loss': 0.3743954598903656}


In [24]:
# Start from the best head-only model again
model = tf.keras.models.load_model("/kaggle/working/best_model_3class.keras")

# find EfficientNet base (same snippet you used earlier)
base_model = None
for layer in model.layers:
    if isinstance(layer, tf.keras.Model) and "efficientnet" in layer.name.lower():
        base_model = layer
        break

base_model.trainable = True

N = 40  # safer: 20–50
for layer in base_model.layers[:-N]:
    layer.trainable = False

# keep BatchNorm frozen (important)
from tensorflow.keras import layers
for layer in base_model.layers:
    if isinstance(layer, layers.BatchNormalization):
        layer.trainable = False  # [web:235]

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),  # very low LR for fine-tuning [web:54]
    loss="sparse_categorical_crossentropy",
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")]
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=2, min_lr=1e-7, verbose=1),  # [web:608]
    tf.keras.callbacks.ModelCheckpoint(
        "/kaggle/working/best_model_3class_finetuned_v3.keras",
        monitor="val_loss",
        save_best_only=True
    )  # [web:257]
]

history_ft3 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=25,
    class_weight=class_weight,
    callbacks=callbacks
)


Epoch 1/25


E0000 00:00:1769625333.205234      55 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_1_1/efficientnetb0_1/block2b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 300ms/step - acc: 0.7454 - loss: 0.5986 - val_acc: 0.6696 - val_loss: 0.7289 - learning_rate: 1.0000e-05
Epoch 2/25
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 141ms/step - acc: 0.7539 - loss: 0.5937 - val_acc: 0.6783 - val_loss: 0.7235 - learning_rate: 1.0000e-05
Epoch 3/25
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 140ms/step - acc: 0.7194 - loss: 0.5680 - val_acc: 0.6870 - val_loss: 0.7144 - learning_rate: 1.0000e-05
Epoch 4/25
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 141ms/step - acc: 0.7593 - loss: 0.5453 - val_acc: 0.7043 - val_loss: 0.7002 - learning_rate: 1.0000e-05
Epoch 5/25
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 98ms/step - acc: 0.7678 - loss: 0.5550 - val_acc: 0.6957 - val_loss: 0.7015 - learning_rate: 1.0000e-05
Epoch 6/25
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 141ms/step - acc: 0.7661 - loss: 0.5

Step 11: Stronger augmentation + regularization

In [28]:
import tensorflow as tf

NUM_CLASSES = 3
SMOOTH = 0.05

cce_ls = tf.keras.losses.CategoricalCrossentropy(label_smoothing=SMOOTH)  # [web:648]

def scce_with_label_smoothing(y_true, y_pred):
    y_true = tf.one_hot(tf.cast(y_true, tf.int32), depth=NUM_CLASSES)     # [web:655]
    return cce_ls(y_true, y_pred)                                         # [web:646]

# compile using the custom loss
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=scce_with_label_smoothing,
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")]
)


Step 12A) Train with malignant upweight

In [29]:
import tensorflow as tf

# start from your best head-only model (the good baseline)
model = tf.keras.models.load_model("/kaggle/working/best_model_3class.keras")

# bump malignant weight (index 2)
class_weight_m = dict(class_weight)
class_weight_m[2] *= 1.5   # try 1.25, 1.5, 2.0

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint(
        "/kaggle/working/best_model_3class_malignant_recall.keras",
        monitor="val_loss",
        save_best_only=True
    )
]

history_m = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    class_weight=class_weight_m,
    callbacks=callbacks
)


Epoch 1/15


E0000 00:00:1769625900.221770      55 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_1_1/efficientnetb0_1/block2b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 281ms/step - acc: 0.7667 - loss: 0.6726 - val_acc: 0.6609 - val_loss: 0.7671
Epoch 2/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 124ms/step - acc: 0.7219 - loss: 0.6735 - val_acc: 0.6957 - val_loss: 0.7481
Epoch 3/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 125ms/step - acc: 0.7627 - loss: 0.6336 - val_acc: 0.6957 - val_loss: 0.7450
Epoch 4/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 88ms/step - acc: 0.7626 - loss: 0.6455 - val_acc: 0.6957 - val_loss: 0.7508
Epoch 5/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 88ms/step - acc: 0.7593 - loss: 0.6294 - val_acc: 0.6957 - val_loss: 0.7518
Epoch 6/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 89ms/step - acc: 0.7490 - loss: 0.6626 - val_acc: 0.7043 - val_loss: 0.7463
Epoch 7/15
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 126ms/step - acc: 0.7594 -

12B) Tune a malignant threshold (val set)

In [30]:
import numpy as np
from sklearn.metrics import precision_score, recall_score

best = tf.keras.models.load_model("/kaggle/working/best_model_3class_malignant_recall.keras")

# collect val probabilities + labels
p = []
y = []
for xb, yb in val_ds:
    p.append(best.predict(xb, verbose=0))
    y.append(yb.numpy().astype(int))
p = np.concatenate(p)
y = np.concatenate(y)

t_values = np.linspace(0.2, 0.9, 15)
for t in t_values:
    pred = np.argmax(p, axis=1)
    force_m = p[:, 2] >= t
    pred[force_m] = 2

    y_true_m = (y == 2).astype(int)
    y_pred_m = (pred == 2).astype(int)

    prec = precision_score(y_true_m, y_pred_m, zero_division=0)
    rec  = recall_score(y_true_m, y_pred_m, zero_division=0)
    print(f"t={t:.2f} | malignant precision={prec:.3f} recall={rec:.3f}")


t=0.20 | malignant precision=0.409 recall=0.871
t=0.25 | malignant precision=0.473 recall=0.839
t=0.30 | malignant precision=0.500 recall=0.806
t=0.35 | malignant precision=0.533 recall=0.774
t=0.40 | malignant precision=0.548 recall=0.742
t=0.45 | malignant precision=0.595 recall=0.710
t=0.50 | malignant precision=0.629 recall=0.710
t=0.55 | malignant precision=0.629 recall=0.710
t=0.60 | malignant precision=0.629 recall=0.710
t=0.65 | malignant precision=0.629 recall=0.710
t=0.70 | malignant precision=0.629 recall=0.710
t=0.75 | malignant precision=0.629 recall=0.710
t=0.80 | malignant precision=0.629 recall=0.710
t=0.85 | malignant precision=0.629 recall=0.710
t=0.90 | malignant precision=0.629 recall=0.710


Final test evaluation (with threshold)

In [31]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score

CLASS_NAMES = ["normal", "benign", "malignant"]
T = 0.25  # choose from your val sweep

best = tf.keras.models.load_model("/kaggle/working/best_model_3class_malignant_recall.keras")

# collect test probs + labels
p = []
y = []
for xb, yb in test_ds:
    p.append(best.predict(xb, verbose=0))
    y.append(yb.numpy().astype(int))
p = np.concatenate(p)
y = np.concatenate(y)

# base prediction = argmax
pred = np.argmax(p, axis=1)

# override to malignant if prob >= T
pred[p[:, 2] >= T] = 2

# overall report
cm = confusion_matrix(y, pred, labels=[0, 1, 2])
print("Confusion Matrix (rows=true, cols=pred):\n", cm)
print("\nClassification report:")
print(classification_report(y, pred, labels=[0, 1, 2], target_names=CLASS_NAMES, digits=4))

# malignant-only precision/recall
y_true_m = (y == 2).astype(int)
y_pred_m = (pred == 2).astype(int)
print("\nMalignant precision:", precision_score(y_true_m, y_pred_m, zero_division=0))
print("Malignant recall:", recall_score(y_true_m, y_pred_m, zero_division=0))


Confusion Matrix (rows=true, cols=pred):
 [[16  0  5]
 [ 3 44 20]
 [ 1  4 27]]

Classification report:
              precision    recall  f1-score   support

      normal     0.8000    0.7619    0.7805        21
      benign     0.9167    0.6567    0.7652        67
   malignant     0.5192    0.8438    0.6429        32

    accuracy                         0.7250       120
   macro avg     0.7453    0.7541    0.7295       120
weighted avg     0.7903    0.7250    0.7353       120


Malignant precision: 0.5192307692307693
Malignant recall: 0.84375


In [32]:
import tensorflow as tf
best = tf.keras.models.load_model("/kaggle/working/best_model_3class.keras")
best.evaluate(test_ds, return_dict=True)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 85ms/step - acc: 0.8569 - loss: 0.4565


{'acc': 0.8166666626930237, 'loss': 0.51880943775177}

In [35]:
best = tf.keras.models.load_model("/kaggle/working/best_model_3class.keras")
best.save("/kaggle/working/final_efficientnetb0_3class.keras")
