In [1]:
# === Cell 1: Setup & Data Config ===
import os, numpy as np, tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from google.colab import drive
drive.mount('/content/drive')
# Path to dataset with two subfolders
DATA_DIR = "/content/drive/MyDrive/Images/Images"

# Training configuration
IMG_SIZE = (320, 320)
BATCH    = 16
SEED     = 13
AUTOTUNE = tf.data.AUTOTUNE

# Quick sanity check
print("Exists?", os.path.isdir(DATA_DIR))
if os.path.isdir(DATA_DIR):
    print("First items:", os.listdir(DATA_DIR)[:10])
else:
    print("WARNING: DATA_DIR not found. Please update DATA_DIR.")


Mounted at /content/drive
Exists? True
First items: ['No_Appendicitis_Images', 'Appendicitis_Images']


In [2]:
# === Cell 2: Build Datasets (train/val) ===
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR, labels="inferred", label_mode="binary",
    validation_split=0.20, subset="training", seed=SEED,
    image_size=IMG_SIZE, batch_size=BATCH
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR, labels="inferred", label_mode="binary",
    validation_split=0.20, subset="validation", seed=SEED,
    image_size=IMG_SIZE, batch_size=BATCH
)

# If have a separate test set, load it similarly, Otherwise we reuse val_ds below
test_ds = val_ds

def prep(ds, training=False):
    # Shuffle only during training, cache+prefetch for performance
    if training:
        ds = ds.shuffle(1024, seed=SEED, reshuffle_each_iteration=True)
    return ds.cache().prefetch(AUTOTUNE)

train_ds = prep(train_ds, training=True)
val_ds   = prep(val_ds)
test_ds  = prep(test_ds)


Found 1721 files belonging to 2 classes.
Using 1377 files for training.
Found 1721 files belonging to 2 classes.
Using 344 files for validation.


In [3]:
# === Cell 3: Compute Class Weights (handles imbalance) ===
neg, pos = 0, 0
for _, yb in train_ds.unbatch().take(1_000_000):  # large limit to cover full dataset
    if int(yb.numpy()[0]) == 1:
        pos += 1
    else:
        neg += 1
total = max(1, pos+neg)
cw = {
    0: total/(2*max(1,neg)),
    1: total/(2*max(1,pos)),
}
print("Class counts:", {"neg":neg, "pos":pos})
print("Using class weights:", cw)


Class counts: {'neg': 1074, 'pos': 303}
Using class weights: {0: 0.6410614525139665, 1: 2.272277227722772}


In [4]:
# === Cell 4: Data Augmentations (light/medical-safe) ===
data_augment = keras.Sequential([
    layers.RandomFlip("horizontal"),   # remove if left/right matters clinically
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
], name="augment")


In [5]:
# === Cell 5: Build DenseNet201 (Stage 1: Frozen base) ===
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input

base = DenseNet201(include_top=False, weights="imagenet", input_shape=IMG_SIZE+(3,))
base.trainable = False

inp = layers.Input(shape=IMG_SIZE+(3,))
x   = data_augment(inp)
x   = layers.Lambda(preprocess_input)(x)
x   = base(x, training=False)
x   = layers.GlobalAveragePooling2D()(x)
x   = layers.Dropout(0.3)(x)  # adjust if over/underfitting
out = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inp, out)

opt = tf.keras.optimizers.Adam(learning_rate=1e-3)  # same as before unless you want to retune
model.compile(
    optimizer=opt,
    loss="binary_crossentropy",
    metrics=[
        keras.metrics.BinaryAccuracy(name="acc"),
        keras.metrics.AUC(name="auc"),
        keras.metrics.AUC(name="auprc", curve="PR"),
        keras.metrics.Precision(name="prec"),
        keras.metrics.Recall(name="rec"),
    ],
)

cbs = [
    keras.callbacks.ModelCheckpoint("densenet201_stage1.keras", save_best_only=True, monitor="val_auc", mode="max"),
    keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=3, restore_best_weights=True),
]

model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m74836368/74836368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [6]:
# === Cell 6: Train Stage 1 (Frozen) ===
history1 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5,                # can increase slightly if still improving
    class_weight=cw,
    callbacks=cbs
)


Epoch 1/5
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 2s/step - acc: 0.4905 - auc: 0.5203 - auprc: 0.2598 - loss: 0.7368 - prec: 0.2338 - rec: 0.5203 - val_acc: 0.7471 - val_auc: 0.6371 - val_auprc: 0.3486 - val_loss: 0.5897 - val_prec: 0.3947 - val_rec: 0.1899
Epoch 2/5
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 307ms/step - acc: 0.5591 - auc: 0.6266 - auprc: 0.3777 - loss: 0.6770 - prec: 0.2901 - rec: 0.6166 - val_acc: 0.7500 - val_auc: 0.6698 - val_auprc: 0.3829 - val_loss: 0.5765 - val_prec: 0.4407 - val_rec: 0.3291
Epoch 3/5
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 308ms/step - acc: 0.6203 - auc: 0.6743 - auprc: 0.3881 - loss: 0.6625 - prec: 0.3419 - rec: 0.6863 - val_acc: 0.7355 - val_auc: 0.6820 - val_auprc: 0.4067 - val_loss: 0.5712 - val_prec: 0.4167 - val_rec: 0.3797
Epoch 4/5
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 305ms/step - acc: 0.5854 - auc: 0.6489 - auprc: 0.3780 - loss: 0.6805

In [7]:
# === Cell 7: Fine-tune Stage 2 (unfreeze deeper layers) ===
base.trainable = True

# Freeze a proportion (model-agnostic), e.g., first ~2/3
cut = int(len(base.layers) * (2/3))
for layer in base.layers[:cut]:
    layer.trainable = False

opt = tf.keras.optimizers.Adam(learning_rate=1e-4)

model.compile(
    optimizer=opt,
    loss="binary_crossentropy",
    metrics=[
        "accuracy",
        tf.keras.metrics.AUC(name="auc"),
        tf.keras.metrics.AUC(curve="PR", name="auprc"),
        tf.keras.metrics.Precision(name="prec"),
        tf.keras.metrics.Recall(name="rec"),
    ],
)

cbs_ft = [
    keras.callbacks.ModelCheckpoint("densenet201_finetuned.keras", save_best_only=True, monitor="val_auc", mode="max"),
    keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=3, restore_best_weights=True),
]

history2 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    callbacks=cbs_ft,
    verbose=1,
)


Epoch 1/15
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 548ms/step - accuracy: 0.7472 - auc: 0.6757 - auprc: 0.4161 - loss: 0.5324 - prec: 0.4367 - rec: 0.2952 - val_accuracy: 0.7703 - val_auc: 0.7465 - val_auprc: 0.4979 - val_loss: 0.6306 - val_prec: 0.0000e+00 - val_rec: 0.0000e+00
Epoch 2/15
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 424ms/step - accuracy: 0.8121 - auc: 0.8449 - auprc: 0.6290 - loss: 0.3937 - prec: 0.6703 - rec: 0.3853 - val_accuracy: 0.7733 - val_auc: 0.7662 - val_auprc: 0.5120 - val_loss: 0.5316 - val_prec: 0.5294 - val_rec: 0.1139
Epoch 3/15
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 424ms/step - accuracy: 0.8345 - auc: 0.9015 - auprc: 0.7410 - loss: 0.3277 - prec: 0.7086 - rec: 0.4916 - val_accuracy: 0.7820 - val_auc: 0.7839 - val_auprc: 0.5369 - val_loss: 0.5670 - val_prec: 0.7000 - val_rec: 0.0886
Epoch 4/15
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 427ms/step - accuracy: 0

In [8]:
# === Cell 8: Threshold Search (Youden J) on Validation ===
import numpy as np
y_true, y_score = [], []
for Xb, yb in val_ds:
    y_true.append(yb.numpy().ravel())
    y_score.append(model.predict(Xb, verbose=0).ravel())
y_true  = np.concatenate(y_true)
y_score = np.concatenate(y_score)

from sklearn.metrics import roc_curve, auc
fpr, tpr, thr = roc_curve(y_true, y_score)
best_idx = np.argmax(tpr - fpr)  # Youden J statistic
best_thr = float(thr[best_idx])

print(f"Best threshold (Youden J): {best_thr:.3f}, Val AUC={auc(fpr,tpr):.3f}")


Best threshold (Youden J): 0.033, Val AUC=0.817


In [9]:
# === Cell 9: Optional Test-Time Augmentation (TTA) and Evaluation ===
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

def tta_predict(ds, n=5):
    preds = []
    for _ in range(n):
        batch_preds = []
        for Xb, _ in ds:
            batch_preds.append(model.predict(Xb, verbose=0))
        preds.append(np.concatenate(batch_preds).ravel())
    return np.mean(np.stack(preds, axis=0), axis=0)

use_tta = True
if use_tta:
    y_t = []
    for _, yb in test_ds:
        y_t.append(yb.numpy().ravel())
    y_t = np.concatenate(y_t)
    y_p = tta_predict(test_ds, n=5)
else:
    # if want to evaluate on val with no TTA, reuse the arrays from Cell 8
    y_t, y_p = y_true, y_score

y_pred = (y_p >= best_thr).astype(int)

cm = confusion_matrix(y_t, y_pred)
print("Confusion matrix:\n", cm)
print(classification_report(y_t, y_pred, digits=3))
print("Accuracy @best_thr:", accuracy_score(y_t, y_pred))


Confusion matrix:
 [[187  78]
 [ 14  65]]
              precision    recall  f1-score   support

         0.0      0.930     0.706     0.803       265
         1.0      0.455     0.823     0.586        79

    accuracy                          0.733       344
   macro avg      0.692     0.764     0.694       344
weighted avg      0.821     0.733     0.753       344

Accuracy @best_thr: 0.7325581395348837
