In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

# === CONFIG ===
base_dir = "uncentred_ternary_100_stratified4fold_1000per_seed3888 copy"
quadrants = ["Q1", "Q2", "Q3", "Q4"]
img_size = (299, 299)
batch_size = 32
num_classes = 3
epochs = 15

# Create InceptionV3 model
def create_inception_model(input_shape, num_classes):
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dropout(0.4)(x)
    output = Dense(num_classes, activation='softmax')(x)
    return Model(inputs=base_model.input, outputs=output)

# === 1. CROSS-VALIDATION ===
best_model = None
best_val_accuracy = 0  # Track the best validation accuracy

fold_metrics = []

for i in range(4):
    test_q = quadrants[i]
    train_qs = [q for j, q in enumerate(quadrants) if j != i]

    print(f"\n==== Fold {i+1}: Test on {test_q}, Train on {train_qs} ====")

    # Data generators
    datagen = ImageDataGenerator(rescale=1./255)

    # Collect training data
    train_data = []
    train_labels = []
    for q in train_qs:
        train_gen = datagen.flow_from_directory(
            directory=os.path.join(base_dir, q),
            target_size=img_size,
            batch_size=batch_size,
            class_mode='categorical',
            shuffle=True
        )

        for x, y in train_gen:
            train_data.append(x)
            train_labels.append(y)

    X_train = np.vstack(train_data)
    y_train = np.vstack(train_labels)

    # Test data generator
    test_gen = datagen.flow_from_directory(
        directory=os.path.join(base_dir, test_q),
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )

    # Create model
    model = create_inception_model((*img_size, 3), num_classes)
    model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

    # Early stopping and checkpoint callback
    es = EarlyStopping(patience=3, restore_best_weights=True, monitor='val_loss')
    mc = ModelCheckpoint(f"best_model_fold_{i+1}.h5", save_best_only=True, monitor='val_loss')

    # Training the model
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        validation_split=0.1,
        callbacks=[es, mc],
        batch_size=32,
        verbose=1
    )

    # Evaluation on test set
    preds = model.predict(test_gen, verbose=0)
    y_true = test_gen.classes
    y_pred = np.argmax(preds, axis=1)

    print(f"Classification Report for Fold {i+1}:")
    print(classification_report(y_true, y_pred))

    # Track best model based on validation accuracy
    val_accuracy = max(history.history['val_accuracy'])
    fold_metrics.append(val_accuracy)

    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        best_model = model  # Save the model with the best validation accuracy

# === 2. PRINT CROSS-VAL RESULTS ===
print("\n==== Cross-Validation Accuracy Scores ====")
for i, score in enumerate(fold_metrics):
    print(f"Fold {i+1}: {score:.4f}")
print(f"Average Accuracy: {np.mean(fold_metrics):.4f}")

# === 3. SAVE THE BEST MODEL ===
if best_model:
    best_model.save("best_inception_model_across_folds.h5")
    print("\n✅ Best model across all folds saved as 'best_inception_model_across_folds.h5'.")

# === 4. FINAL TRAINING ON ALL DATA ===
print("\n==== Training Final Model on ALL Data ====")
train_datagen = ImageDataGenerator(rescale=1./255)

all_data = []
all_labels = []

# Collect all data for final training
for q in quadrants:
    temp_gen = train_datagen.flow_from_directory(
        directory=os.path.join(base_dir, q),
        target_size=img_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True
    )
    for x, y in temp_gen:
        all_data.append(x)
        all_labels.append(y)

X_final = np.vstack(all_data)
y_final = np.vstack(all_labels)

# Final model creation
final_model = create_inception_model((*img_size, 3), num_classes)
final_model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# Train final model
history = final_model.fit(
    X_final, y_final,
    epochs=epochs,
    validation_split=0.1,
    batch_size=32,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True, monitor='val_loss')],
    verbose=1
)

# === 5. SAVE FINAL MODEL ===
final_model.save("final_model_inception.h5")
print("\n✅ Final model saved as 'inal_model_inception.h5'.")

# === 6. PLOT TRAINING HISTORY ===
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.title("Final Model Accuracy")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title("Final Model Loss")
plt.legend()

plt.tight_layout()
plt.savefig("final_training_plot.png")
plt.show()
