In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, EfficientNetB3
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.utils import class_weight
import shutil
from tqdm import tqdm
import random
import matplotlib.pyplot as plt

2025-05-06 23:00:02.104496: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746572402.300198      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746572402.355915      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
TARGET_CLASS = 'DRUSEN'

In [3]:
BASE_PATH = '/kaggle/input/labeled-optical-coherence-tomography-oct/Dataset - train+val+test'
TRAIN_PATH = os.path.join(BASE_PATH, 'train')
VAL_PATH = os.path.join(BASE_PATH, 'val')
TEST_PATH = os.path.join(BASE_PATH, 'test')

TEMP_DIR = '/kaggle/working/temp_data_DRUSEN'


In [4]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 15
MODEL_NAME = 'vgg16' 

In [5]:
def prepare_binary_dataset(target_class):
    if os.path.exists(TEMP_DIR):
        shutil.rmtree(TEMP_DIR)
    
    os.makedirs(f'{TEMP_DIR}/train/{target_class}', exist_ok=True)
    os.makedirs(f'{TEMP_DIR}/train/OTHER', exist_ok=True)
    os.makedirs(f'{TEMP_DIR}/val/{target_class}', exist_ok=True)
    os.makedirs(f'{TEMP_DIR}/val/OTHER', exist_ok=True)
    os.makedirs(f'{TEMP_DIR}/test/{target_class}', exist_ok=True)
    os.makedirs(f'{TEMP_DIR}/test/OTHER', exist_ok=True)

    def copy_images(source_dir, subset):
        for class_name in os.listdir(source_dir):
            src = os.path.join(source_dir, class_name)
            for img_file in tqdm(os.listdir(src), desc=f'Processing {subset} - {class_name}'):
                dest_label = target_class if class_name == target_class else 'OTHER'
                dest = os.path.join(TEMP_DIR, subset, dest_label)
                shutil.copy(os.path.join(src, img_file), os.path.join(dest, img_file))

    copy_images(TRAIN_PATH, 'train')
    copy_images(VAL_PATH, 'val')
    copy_images(TEST_PATH, 'test')

prepare_binary_dataset(TARGET_CLASS)


Processing train - DRUSEN: 100%|██████████| 6206/6206 [01:12<00:00, 85.98it/s] 
Processing train - CNV: 100%|██████████| 26218/26218 [05:26<00:00, 80.29it/s]
Processing train - NORMAL: 100%|██████████| 35973/35973 [07:22<00:00, 81.33it/s]
Processing train - DME: 100%|██████████| 8118/8118 [01:43<00:00, 78.44it/s]
Processing val - DRUSEN: 100%|██████████| 1773/1773 [00:21<00:00, 82.37it/s]
Processing val - CNV: 100%|██████████| 7491/7491 [01:35<00:00, 78.07it/s]
Processing val - NORMAL: 100%|██████████| 10278/10278 [02:07<00:00, 80.69it/s]
Processing val - DME: 100%|██████████| 2319/2319 [00:28<00:00, 82.15it/s]
Processing test - DRUSEN: 100%|██████████| 887/887 [00:10<00:00, 82.06it/s]
Processing test - CNV: 100%|██████████| 3746/3746 [00:46<00:00, 80.12it/s]
Processing test - NORMAL: 100%|██████████| 5139/5139 [01:02<00:00, 82.73it/s]
Processing test - DME: 100%|██████████| 1161/1161 [00:14<00:00, 80.68it/s]


In [6]:
train_aug = ImageDataGenerator(
    rescale=1./255,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.7, 1.3],
)

val_aug = ImageDataGenerator(rescale=1./255)

train_gen = train_aug.flow_from_directory(
    os.path.join(TEMP_DIR, 'train'),
    target_size=IMG_SIZE,
    class_mode='binary',
    batch_size=BATCH_SIZE
)

val_gen = val_aug.flow_from_directory(
    os.path.join(TEMP_DIR, 'val'),
    target_size=IMG_SIZE,
    class_mode='binary',
    batch_size=BATCH_SIZE
)

test_gen = val_aug.flow_from_directory(
    os.path.join(TEMP_DIR, 'test'),
    target_size=IMG_SIZE,
    class_mode='binary',
    batch_size=BATCH_SIZE,
    shuffle=False
)


Found 76515 images belonging to 2 classes.
Found 21861 images belonging to 2 classes.
Found 10933 images belonging to 2 classes.


In [7]:
def build_model(name='vgg16'):
    base_model = VGG16(include_top=False, weights='imagenet', input_shape=IMG_SIZE + (3,), pooling='avg') if name == 'vgg16' else EfficientNetB3(include_top=False, weights='imagenet', input_shape=IMG_SIZE + (3,), pooling='avg')

    base_model.trainable = False  # You can fine-tune later

    model = models.Sequential([
        base_model,
        layers.BatchNormalization(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(
        loss='binary_crossentropy',
        optimizer=optimizers.Adam(learning_rate=1e-4),
        metrics=['accuracy']
    )
    return model

model = build_model(MODEL_NAME)
model.summary()


I0000 00:00:1746573769.719177      31 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


In [None]:
early_stop = EarlyStopping(patience=5, monitor='val_accuracy', restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model_drusen.keras', save_best_only=True, monitor='val_accuracy')

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    callbacks=[early_stop, checkpoint]
)


Epoch 1/15


  self._warn_if_super_not_called()
I0000 00:00:1746573782.031592     111 service.cc:148] XLA service 0x7f0da8010170 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746573782.032400     111 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1746573782.450894     111 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m   2/2392[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:04[0m 77ms/step - accuracy: 0.5938 - loss: 0.6767  

I0000 00:00:1746573790.864767     111 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m 100/2392[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m15:50[0m 415ms/step - accuracy: 0.7632 - loss: 0.5080

In [None]:
loss, acc = model.evaluate(test_gen)
print(f'Test Accuracy: {acc * 100:.2f}%')

model.save(f'/kaggle/working/{TARGET_CLASS.lower()}_model.keras')
print("✅ Saved model as:", f'{TARGET_CLASS.lower()}_model.keras')


In [None]:
 from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

# Get ground truth and predictions
y_true = test_gen.classes
y_pred_probs = model.predict(test_gen)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["OTHER", "Target Class"])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

# Classification Report (Precision, Recall, F1)
print("📊 Classification Report:")
print(classification_report(y_true, y_pred, target_names=["OTHER", "Target"]))

# AUC-ROC Score
auc = roc_auc_score(y_true, y_pred_probs)
print(f"🧠 AUC-ROC Score: {auc:.4f}")
