In [1]:
# Mount Drive and import libs
from google.colab import drive
drive.mount('/content/drive')

import os, math, numpy as np, tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Paths & basic settings
DATA_DIR = "/content/drive/MyDrive/Images/Images"   # <-- two subfolders (e.g., 'negative', 'positive')
IMG_SIZE = (224, 224)
BATCH    = 16
SEED     = 13

# Quick sanity check
print("Exists?", os.path.isdir(DATA_DIR))
print("Top-level items:", os.listdir(DATA_DIR)[:10])


Mounted at /content/drive
Exists? True
Top-level items: ['No_Appendicitis_Images', 'Appendicitis_Images']


In [3]:
# Build train/val from directory with a fixed split
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="binary",           # binary labels {0,1}
    validation_split=0.20,         # 80/20 split
    subset="training",
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="binary",
    validation_split=0.20,
    subset="validation",
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH
)

class_names = train_ds.class_names
print("Classes detected (alphabetical):", class_names)
print("Label mapping ->", {class_names[0]:0, class_names[1]:1})


Found 1721 files belonging to 2 classes.
Using 1377 files for training.
Found 1721 files belonging to 2 classes.
Using 344 files for validation.
Classes detected (alphabetical): ['Appendicitis_Images', 'No_Appendicitis_Images']
Label mapping -> {'Appendicitis_Images': 0, 'No_Appendicitis_Images': 1}


In [4]:
# Create a small test set from validation (quick & simple)
test_take = max(1, len(val_ds) // 5)  # 20% of the current val as test
test_ds   = val_ds.take(test_take)
val_ds    = val_ds.skip(test_take)

# Cache, prefetch, shuffle
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.shuffle(2048, seed=SEED).prefetch(AUTOTUNE)
val_ds   = val_ds.prefetch(AUTOTUNE)
test_ds  = test_ds.prefetch(AUTOTUNE)

print("Batches -> train:", len(train_ds), "val:", len(val_ds), "test:", len(test_ds))


Batches -> train: 87 val: 18 test: 4


In [6]:
# Estimate class counts by iterating once
def count_labels(dataset):
    zeros = ones = 0
    for _, y in dataset.unbatch():
        # y is shape (1,) with float {0.,1.}
        label = int(y.numpy().squeeze())
        if label == 0:
            zeros += 1
        else:
            ones += 1
    return zeros, ones

train_zeros, train_ones = count_labels(train_ds)
total = train_zeros + train_ones
print(f"Train counts -> class 0: {train_zeros}, class 1: {train_ones}, total: {total}")

# Balanced weighting (works well when one class is rarer):
# weight_c = total / (2 * count_c)
class_weight = {
    0: (total / (2.0 * max(1, train_zeros))),
    1: (total / (2.0 * max(1, train_ones))),
}
print("Class weights:", class_weight)


Train counts -> class 0: 1074, class 1: 303, total: 1377
Class weights: {0: 0.6410614525139665, 1: 2.272277227722772}


In [10]:
# Keep augmentations small; ultrasound textures are subtle
augment = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.08),
    layers.RandomContrast(0.08),
], name="augment")


In [11]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.applications.densenet import preprocess_input

# IMPORTANT: Use preprocess_input for DenseNet (don't also use Rescaling(1/255))
base = DenseNet121(
    include_top=False,
    input_shape=IMG_SIZE + (3,),
    weights="imagenet"
)
base.trainable = False  # warmup: freeze backbone

inputs = keras.Input(shape=IMG_SIZE + (3,))
x = preprocess_input(inputs)     # scales to expected range for DenseNet
x = augment(x)
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

model = keras.Model(inputs, outputs)
model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="binary_crossentropy",
    metrics=[
        "accuracy",
        keras.metrics.AUC(name="auc", curve="ROC"),
        keras.metrics.AUC(name="auprc", curve="PR"),
        keras.metrics.Precision(name="precision"),
        keras.metrics.Recall(name="recall"),
    ],
)
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [12]:
# Callbacks
cbs = [
    keras.callbacks.EarlyStopping(monitor="val_auc", patience=5, mode="max", restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2),
    keras.callbacks.ModelCheckpoint("/content/densenet121_best.keras", monitor="val_auc",
                                    mode="max", save_best_only=True)
]

# Phase 1: warmup with frozen DenseNet
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    class_weight=class_weight,      # helps if imbalanced
    callbacks=cbs,
    verbose=1
)

# Phase 2: fine-tune top ~30% (skip BatchNorms for stability)
for layer in base.layers[int(len(base.layers) * 0.7):]:
    if not isinstance(layer, layers.BatchNormalization):
        layer.trainable = True

model.compile(
    optimizer=keras.optimizers.Adam(1e-4),  # lower LR for fine-tuning
    loss="binary_crossentropy",
    metrics=[
        "accuracy",
        keras.metrics.AUC(name="auc", curve="ROC"),
        keras.metrics.AUC(name="auprc", curve="PR"),
        keras.metrics.Precision(name="precision"),
        keras.metrics.Recall(name="recall"),
    ],
)

history_ft = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=12,
    class_weight=class_weight,
    callbacks=cbs,
    verbose=1
)


Epoch 1/15
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m342s[0m 4s/step - accuracy: 0.4997 - auc: 0.4870 - auprc: 0.2303 - loss: 0.7968 - precision: 0.2176 - recall: 0.4781 - val_accuracy: 0.7750 - val_auc: 0.4572 - val_auprc: 0.1929 - val_loss: 0.5662 - val_precision: 0.3333 - val_recall: 0.0161 - learning_rate: 0.0010
Epoch 2/15
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m344s[0m 3s/step - accuracy: 0.5154 - auc: 0.5057 - auprc: 0.2446 - loss: 0.7416 - precision: 0.2277 - recall: 0.4927 - val_accuracy: 0.7714 - val_auc: 0.5024 - val_auprc: 0.2152 - val_loss: 0.6527 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 0.0010
Epoch 3/15
[1m87/87[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 3s/step - accuracy: 0.5445 - auc: 0.5138 - auprc: 0.2549 - loss: 0.7515 - precision: 0.2162 - recall: 0.4120 - val_accuracy: 0.7679 - val_auc: 0.4893 - val_auprc: 0.2147 - val_loss: 0.7170 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - l

In [13]:
# Evaluate
test_metrics = model.evaluate(test_ds, return_dict=True, verbose=0)
print("Test metrics:", test_metrics)

# Optional: choose a threshold based on validation ROC (Youden’s J)
# Compute predictions on val to pick a threshold, then apply to test
y_val_true, y_val_score = [], []
for Xb, yb in val_ds:
    y_val_true.append(yb.numpy().ravel())
    y_val_score.append(model.predict(Xb, verbose=0).ravel())
y_val_true  = np.concatenate(y_val_true)
y_val_score = np.concatenate(y_val_score)

# Find threshold that maximizes (tpr - fpr)
from sklearn.metrics import roc_curve, auc
fpr, tpr, thr = roc_curve(y_val_true, y_val_score)
best_idx = np.argmax(tpr - fpr)
best_thr = thr[best_idx]
print(f"Best validation threshold (Youden J): {best_thr:.3f} | Val AUC={auc(fpr, tpr):.3f}")

# Apply threshold to test for a human-readable confusion matrix
y_test_true, y_test_score = [], []
for Xb, yb in test_ds:
    y_test_true.append(yb.numpy().ravel())
    y_test_score.append(model.predict(Xb, verbose=0).ravel())
y_test_true  = np.concatenate(y_test_true)
y_test_score = np.concatenate(y_test_score)
y_test_pred  = (y_test_score >= best_thr).astype(int)

from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(y_test_true, y_test_pred)
print("Confusion matrix @best_thr:\n", cm)
print(classification_report(y_test_true, y_test_pred, digits=3))

# Save model
model.save("/content/appendicitis_densenet121.keras")


Test metrics: {'accuracy': 0.734375, 'auc': 0.5550527572631836, 'auprc': 0.2183961570262909, 'loss': 0.6136813163757324, 'precision': 0.0, 'recall': 0.0}
Best validation threshold (Youden J): 0.073 | Val AUC=0.657
Confusion matrix @best_thr:
 [[17 32]
 [ 3 12]]
              precision    recall  f1-score   support

         0.0      0.850     0.347     0.493        49
         1.0      0.273     0.800     0.407        15

    accuracy                          0.453        64
   macro avg      0.561     0.573     0.450        64
weighted avg      0.715     0.453     0.473        64

