In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.applications import ResNet50, VGG16
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
from utils.preprocessing import load_and_preprocess_images  # custom function

# ==== Google Drive Dataset Path ====
BASE_DIR = "/content/drive/MyDrive/projects/v"
DATASET_DIR = os.path.join(BASE_DIR, "dataset")

SOURCE1_DIR = os.path.join(DATASET_DIR, "source1_lung_dataset")
SOURCE2_DIR = os.path.join(DATASET_DIR, "source2_lung_dataset")

MODELS_DIR = os.path.join(BASE_DIR, "models")
LOGS_DIR = os.path.join(BASE_DIR, "outputs/training_logs")
REPORTS_DIR = os.path.join(BASE_DIR, "outputs/classification_reports")

os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(LOGS_DIR, exist_ok=True)
os.makedirs(REPORTS_DIR, exist_ok=True)

# ==== Load Data ====
print("[INFO] Loading datasets from Google Drive...")
X1, y1 = load_and_preprocess_images(SOURCE1_DIR)
X2, y2 = load_and_preprocess_images(SOURCE2_DIR)

# Merge datasets
X = np.concatenate([X1, X2], axis=0)
y = np.concatenate([y1, y2], axis=0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# ==== Model Builder ====
def build_model(base_model):
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    predictions = Dense(len(np.unique(y)), activation="softmax")(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

# ==== Train ResNet50 ====
print("[INFO] Training ResNet50...")
resnet_base = ResNet50(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))
resnet_model = build_model(resnet_base)

resnet_model.compile(optimizer=Adam(1e-4), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
csv_logger_resnet = CSVLogger(os.path.join(LOGS_DIR, "resnet50_training.log"))
checkpoint_resnet = ModelCheckpoint(os.path.join(MODELS_DIR, "resnet50_model.h5"), save_best_only=True, monitor="val_accuracy", mode="max")

resnet_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=10,
    batch_size=32,
    callbacks=[csv_logger_resnet, checkpoint_resnet]
)

# ==== Train VGG16 ====
print("[INFO] Training VGG16...")
vgg_base = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))
vgg_model = build_model(vgg_base)

vgg_model.compile(optimizer=Adam(1e-4), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
csv_logger_vgg = CSVLogger(os.path.join(LOGS_DIR, "vgg16_training.log"))
checkpoint_vgg = ModelCheckpoint(os.path.join(MODELS_DIR, "vgg16_model.h5"), save_best_only=True, monitor="val_accuracy", mode="max")

vgg_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=10,
    batch_size=32,
    callbacks=[csv_logger_vgg, checkpoint_vgg]
)

# ==== Ensemble Model ====
print("[INFO] Building Ensemble Model...")
resnet_trained = load_model(os.path.join(MODELS_DIR, "resnet50_model.h5"))
vgg_trained = load_model(os.path.join(MODELS_DIR, "vgg16_model.h5"))

# Remove softmax layers
resnet_feat = Model(inputs=resnet_trained.input, outputs=resnet_trained.layers[-2].output)
vgg_feat = Model(inputs=vgg_trained.input, outputs=vgg_trained.layers[-2].output)

input_layer = Input(shape=(224, 224, 3))
resnet_output = resnet_feat(input_layer)
vgg_output = vgg_feat(input_layer)

merged = Concatenate()([resnet_output, vgg_output])
final_output = Dense(len(np.unique(y)), activation="softmax")(merged)
ensemble_model = Model(inputs=input_layer, outputs=final_output)

ensemble_model.compile(optimizer=Adam(1e-4), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
csv_logger_ens = CSVLogger(os.path.join(LOGS_DIR, "ensemble_training.log"))
checkpoint_ens = ModelCheckpoint(os.path.join(MODELS_DIR, "final_ensemble_model.h5"), save_best_only=True, monitor="val_accuracy", mode="max")

ensemble_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=10,
    batch_size=32,
    callbacks=[csv_logger_ens, checkpoint_ens]
)

# ==== Evaluation ====
print("[INFO] Evaluating Models...")
for model_name, model_path in [("ResNet50", "resnet50_model.h5"), ("VGG16", "vgg16_model.h5"), ("Ensemble", "final_ensemble_model.h5")]:
    model = load_model(os.path.join(MODELS_DIR, model_path))
    y_pred = np.argmax(model.predict(X_test), axis=1)
    report = classification_report(y_test, y_pred, target_names=[str(c) for c in np.unique(y)])
    with open(os.path.join(REPORTS_DIR, f"{model_name}_report.txt"), "w") as f:
        f.write(report)
    print(f"\n===== {model_name} Report =====\n", report)
