In [1]:
import tensorflow as tf
import numpy as np
import xgboost as xgb
import pickle
import os
from pathlib import Path
from sklearn.metrics import classification_report

# 1. HARDWARE AND MEMORY MANAGEMENT
gpus = tf.config.list_physical_devices('GPU')
gpu_name = "CPU (No GPU found)"

if gpus:
    try:
        gpu_details = tf.config.experimental.get_device_details(gpus[0])
        gpu_name = gpu_details.get('device_name', 'Generic GPU')
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Hardware Detected: {gpu_name}")
        print("GPU Memory Growth Enabled")
    except RuntimeError as e:
        print(e)

# 2. LOAD FEATURE EXTRACTOR
MODEL_PATH = "Final_OCT_EfficientNet.keras"
print(f"Loading Feature Extractor from {MODEL_PATH}...")
model = tf.keras.models.load_model(MODEL_PATH, compile=False)
feature_extractor = tf.keras.models.Model(
    inputs=model.input,
    outputs=model.get_layer("feature_extraction_layer").output
)
print("Feature Extractor Loaded Successfully")

# 3. DATA PREPARATION
base_path = Path("C:/Users/adim/.cache/kagglehub/datasets/anirudhcv/labeled-optical-coherence-tomography-oct/versions/2")
train_dir = base_path / "Dataset - train+val+test" / "train"
test_dir = base_path / "Dataset - train+val+test" / "test"

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet_v2.preprocess_input
)

train_gen = datagen.flow_from_directory(train_dir, target_size=(224, 224), batch_size=32, class_mode='categorical', shuffle=False)
test_gen = datagen.flow_from_directory(test_dir, target_size=(224, 224), batch_size=32, class_mode='categorical', shuffle=False)

# 4. FEATURE EXTRACTION
print(f"Extracting features using {gpu_name} acceleration...")
X_train_features = feature_extractor.predict(train_gen, verbose=1)
y_train = train_gen.classes
X_test_features = feature_extractor.predict(test_gen, verbose=1)
y_test = test_gen.classes

# 5. CONSOLIDATED HYBRID HEAD TRAINING
print("Training XGBoost Hybrid Head...")
# Parameters optimized for Drusen detection and GPU execution
xgb_model = xgb.XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    tree_method='hist',
    device='cuda' if gpus else 'cpu',
    eval_metric='mlogloss',
    verbosity=0  # Suppresses device mismatch warnings
)

# Call fit exactly once
xgb_model.fit(X_train_features, y_train)

# 6. SAVE ARTIFACTS
print("Saving Hybrid Assets...")
# The model is now fitted, so save_model will not error
xgb_model.save_model("Final_XGBoost_Classifier.json")

# Save class labels for app.py
classes = list(train_gen.class_indices.keys())
with open('class_labels.pkl', 'wb') as f:
    pickle.dump(classes, f)

# 7. FINAL EVALUATION
print("\n--- Final Performance Report ---")
y_pred = xgb_model.predict(X_test_features)
print(classification_report(y_test, y_pred, target_names=classes))
print("Hybrid Pipeline Training Complete")

Hardware Detected: NVIDIA GeForce RTX 4060 Laptop GPU
GPU Memory Growth Enabled
Loading Feature Extractor from Final_OCT_EfficientNet.keras...
Feature Extractor Loaded Successfully
Found 76515 images belonging to 4 classes.
Found 10933 images belonging to 4 classes.
Extracting features using NVIDIA GeForce RTX 4060 Laptop GPU acceleration...
Training XGBoost Hybrid Head...
Saving Hybrid Assets...

--- Final Performance Report ---
              precision    recall  f1-score   support

         CNV       0.95      0.96      0.95      3746
         DME       0.91      0.86      0.88      1161
      DRUSEN       0.81      0.65      0.72       887
      NORMAL       0.95      0.98      0.97      5139

    accuracy                           0.93     10933
   macro avg       0.90      0.86      0.88     10933
weighted avg       0.93      0.93      0.93     10933

Hybrid Pipeline Training Complete
