# Downloading data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.applications.efficientnet import preprocess_input

# ==== Paths ====
train_dir = '/content/drive/MyDrive/VGG/train'
val_dir   = '/content/drive/MyDrive/VGG/valid'
test_dir  = '/content/drive/MyDrive/VGG/test'

# ==== Parameters ====
img_size    = (256, 256)
batch_size  = 16
seed        = 123
num_classes = 1   # binary classification


In [5]:
def prepare_ds(path):
    ds = tf.keras.utils.image_dataset_from_directory(
        path, seed=seed,
        image_size=img_size, batch_size=batch_size,
        label_mode='int'
    )
    ds = ds.map(lambda x, y: (preprocess_input(tf.cast(x, tf.float32)), y))
    return ds.prefetch(tf.data.AUTOTUNE)

train_ds = prepare_ds(train_dir)
val_ds   = prepare_ds(val_dir)
test_ds  = prepare_ds(test_dir)


Found 3200 files belonging to 2 classes.
Found 398 files belonging to 2 classes.
Found 402 files belonging to 2 classes.


In [6]:
def build_model(backbone_class):
    backbone = backbone_class(
        input_shape=img_size + (3,),
        include_top=False,
        weights='imagenet'
    )
    backbone.trainable = False

    inputs = layers.Input(shape=img_size + (3,))
    x = backbone(inputs, training=False)
    x = layers.GlobalAveragePooling2D(name='gap')(x)
    x = layers.Dropout(0.3, name='dropout1')(x)
    x = layers.Dense(
        128, activation='relu',
        kernel_regularizer=regularizers.l2(1e-4),
        name='dense128'
    )(x)
    x = layers.Dropout(0.3, name='dropout2')(x)
    outputs = layers.Dense(1, activation='sigmoid', name='output')(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer=tf.keras.optimizers.AdamW(learning_rate=2.5e-4, weight_decay=1e-4),
        loss='binary_crossentropy',
        metrics=[tf.keras.metrics.AUC(name='auc'), 'accuracy']
    )
    return model


In [7]:
from tensorflow.keras.applications import VGG16
model = build_model(VGG16)
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [8]:
from tensorflow.keras.callbacks import (
    ModelCheckpoint, EarlyStopping,
    ReduceLROnPlateau, CSVLogger
)

callbacks = [
    ModelCheckpoint('best_model.h5', monitor='val_auc', save_best_only=True, verbose=1),
    EarlyStopping(monitor='val_auc', patience=10, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1),
    CSVLogger('training_log.csv', append=False)
]

history = model.fit(
    train_ds, validation_data=val_ds,
    epochs=50, callbacks=callbacks
)


Epoch 1/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6244 - auc: 0.6379 - loss: 1.5616
Epoch 1: val_auc improved from inf to 0.86866, saving model to best_model.h5




[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m500s[0m 2s/step - accuracy: 0.6245 - auc: 0.6381 - loss: 1.5601 - val_accuracy: 0.7638 - val_auc: 0.8687 - val_loss: 0.4954 - learning_rate: 2.5000e-04
Epoch 2/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step - accuracy: 0.6940 - auc: 0.7686 - loss: 0.8066
Epoch 2: val_auc did not improve from 0.86866
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 147ms/step - accuracy: 0.6941 - auc: 0.7686 - loss: 0.8064 - val_accuracy: 0.8191 - val_auc: 0.9307 - val_loss: 0.4167 - learning_rate: 2.5000e-04
Epoch 3/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step - accuracy: 0.7520 - auc: 0.8335 - loss: 0.5923
Epoch 3: val_auc did not improve from 0.86866
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 159ms/step - accuracy: 0.7520 - auc: 0.8335 - loss: 0.5

In [11]:
import numpy as np
from sklearn.metrics import roc_curve, accuracy_score, roc_auc_score, f1_score
import tensorflow as tf
# Evaluate on test set using the best saved model
model.load_weights('best_model.h5')

# 1) Predict probabilities
y_true, y_prob = [], []
for x_batch, y_batch in test_ds:
    y_true.extend(y_batch.numpy())
    y_prob.extend(model.predict(x_batch).flatten())
y_true = np.array(y_true)
y_prob = np.array(y_prob)

# 2) Compute ROC curve and Youden’s J threshold
fpr, tpr, thresholds = roc_curve(y_true, y_prob)
j_idx = np.argmax(tpr - fpr)
best_threshold = thresholds[j_idx]
print(f'Optimal threshold (Youden J): {best_threshold:.3f}')

# 3) Compute binary predictions and metrics
y_pred = (y_prob >= best_threshold).astype(int)
test_acc = accuracy_score(y_true, y_pred)
test_auc = roc_auc_score(y_true, y_prob)
test_f1  = f1_score(y_true, y_pred)

print(f'Test AUC : {test_auc:.4f}')
print(f'Test Acc : {test_acc:.4f}')
print(f'Test F1  : {test_f1:.4f}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 380ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 320ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 343ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 354ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 251ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 245ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 263ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 215ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 