In [2]:
import os
import re
import tensorflow as tf
from tensorflow.keras.applications import VGG19, DenseNet121
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Concatenate, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
K.clear_session()


In [2]:
import os
import re
import tensorflow as tf
from tensorflow.keras.applications import VGG19, DenseNet121
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Concatenate, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import backend as K

# ——— GPU & MEMORY CONFIG —————————————————————————————————
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
tf.keras.mixed_precision.set_global_policy('mixed_float16')

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Using GPU: {[gpu.name for gpu in gpus]}")
    except RuntimeError as e:
        print(f"GPU setup error: {e}")
else:
    print("No GPU found. Using CPU instead.")

def clear_memory():
    K.clear_session()
    tf.keras.backend.clear_session()
    import gc
    gc.collect()

# ——— USER SETTINGS —————————————————————————————————————
selected_optimizer = 'nadam'
train_dir      = './amla_images/train'
validation_dir = './amla_images/val'
test_dir       = './amla_images/test'
checkpoint_dir = "./checkpoints_amla_opt1"
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_path = os.path.join(checkpoint_dir, "model_checkpoint_amla_opt1_epoch_{epoch:02d}.keras")

# ——— DATA ————————————————————————————————————————————
train_datagen    = ImageDataGenerator(rescale=1./255)
val_test_datagen = ImageDataGenerator(rescale=1./255)

batch_size = 4  # ↓ Smaller batch to avoid OOM

train_gen = train_datagen.flow_from_directory(
    train_dir, target_size=(224, 224), batch_size=batch_size, class_mode='binary'
)
val_gen = val_test_datagen.flow_from_directory(
    validation_dir, target_size=(224, 224), batch_size=batch_size, class_mode='binary'
)
test_gen = val_test_datagen.flow_from_directory(
    test_dir, target_size=(224, 224), batch_size=batch_size, class_mode='binary', shuffle=False
)

# ——— MODEL ————————————————————————————————————————————
inp      = Input(shape=(224, 224, 3))
vgg_base = VGG19(weights='imagenet', include_top=False, input_tensor=inp)
dn_base  = DenseNet121(weights='imagenet', include_top=False, input_tensor=inp)

for l in vgg_base.layers: l.trainable = False
for l in dn_base.layers:  l.trainable = False

x1 = GlobalAveragePooling2D()(vgg_base.output)
x2 = GlobalAveragePooling2D()(dn_base.output)
x  = Concatenate()([x1, x2])
x  = Dense(1024, activation='relu')(x)
out= Dense(1, activation='sigmoid', dtype='float32')(x)  # For mixed precision

model = Model(inputs=inp, outputs=out)

# ——— CHECKPOINT UTIL ————————————————————————————————————
def get_latest_checkpoint():
    files = [f for f in os.listdir(checkpoint_dir) if f.startswith("model_checkpoint_amla_opt1_epoch_")]
    if not files: return None, 0
    epochs = [int(re.search(r'epoch_(\d+)', f).group(1)) for f in files]
    e_max  = max(epochs)
    return os.path.join(checkpoint_dir, f"model_checkpoint_amla_opt1_epoch_{e_max:02d}.keras"), e_max

latest_ckpt, last_epoch = get_latest_checkpoint()
if latest_ckpt and os.path.exists(latest_ckpt):
    print("Loading checkpoint:", latest_ckpt)
    model = load_model(latest_ckpt)
else:
    print("No checkpoint found, starting fresh")

# ——— OPTIMIZERS ————————————————————————————————————————
def get_optimizer(name, lr):
    if name == 'adamw':
        return tf.keras.optimizers.AdamW(learning_rate=lr, weight_decay=1e-5)
    elif name == 'nadam':
        return tf.keras.optimizers.Nadam(learning_rate=lr)
    else:
        return tf.keras.optimizers.Adam(learning_rate=lr)

# ——— CALLBACK ————————————————————————————————————————
checkpoint_cb = ModelCheckpoint(
    filepath=checkpoint_path,
    save_best_only=False,
    verbose=1
)

# ——— TRAINING PHASES ————————————————————————————————————
ph1, ph2, ph3 = 10, 5, 5

if last_epoch < ph1:
    model.compile(
        optimizer=get_optimizer(selected_optimizer, 1e-4),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    model.fit(
        train_gen,
        steps_per_epoch=train_gen.samples // batch_size,
        validation_data=val_gen,
        validation_steps=val_gen.samples // batch_size,
        initial_epoch=last_epoch,
        epochs=ph1,
        callbacks=[checkpoint_cb]
    )
    last_epoch = ph1
    clear_memory()

if last_epoch < ph1 + ph2:
    for l in vgg_base.layers[-5:]: l.trainable = True
    for l in dn_base.layers[-5:]:  l.trainable = True
    model.compile(
        optimizer=get_optimizer(selected_optimizer, 1e-5),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    model.fit(
        train_gen,
        steps_per_epoch=train_gen.samples // batch_size,
        validation_data=val_gen,
        validation_steps=val_gen.samples // batch_size,
        initial_epoch=last_epoch,
        epochs=ph1 + ph2,
        callbacks=[checkpoint_cb]
    )
    last_epoch = ph1 + ph2
    clear_memory()

if last_epoch < ph1 + ph2 + ph3:
    for l in vgg_base.layers: l.trainable = True
    for l in dn_base.layers:  l.trainable = True
    model.compile(
        optimizer=get_optimizer(selected_optimizer, 1e-6),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    model.fit(
        train_gen,
        steps_per_epoch=train_gen.samples // batch_size,
        validation_data=val_gen,
        validation_steps=val_gen.samples // batch_size,
        initial_epoch=last_epoch,
        epochs=ph1 + ph2 + ph3,
        callbacks=[checkpoint_cb]
    )
    clear_memory()

# Recompile the model after clearing session
model.compile(
    optimizer=get_optimizer(selected_optimizer, 1e-6),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# ——— EVALUATE ————————————————————————————————————————
test_loss, test_acc = model.evaluate(test_gen)
val_loss, val_acc  = model.evaluate(val_gen)
print(f"Test   → loss={test_loss:.4f}, acc={test_acc:.4f}")
print(f"Val    → loss={val_loss:.4f}, acc={val_acc:.4f}")


Using GPU: ['/physical_device:GPU:0']
Found 11124 images belonging to 2 classes.
Found 2386 images belonging to 2 classes.
Found 2386 images belonging to 2 classes.
Loading checkpoint: ./checkpoints_amla_opt1/model_checkpoint_amla_opt1_epoch_21.keras


  self._warn_if_super_not_called()
I0000 00:00:1745775985.407260    1887 service.cc:152] XLA service 0x7fe7b008fa00 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1745775985.407330    1887 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-04-27 17:46:25.652273: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1745775987.069862    1887 cuda_dnn.cc:529] Loaded cuDNN version 90300

I0000 00:00:1745776011.416861    1887 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m596/597[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 75ms/step - accuracy: 0.9999 - loss: 1.5284e-04




[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 130ms/step - accuracy: 0.9999 - loss: 1.6238e-04
[1m597/597[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 123ms/step - accuracy: 0.9994 - loss: 0.0038
Test   → loss=0.0030, acc=0.9992
Val    → loss=0.0067, acc=0.9992


Using GPU: ['/physical_device:GPU:0']


KeyboardInterrupt: 