In [None]:
from sklearn.model_selection import KFold
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, roc_auc_score
)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

# Parameters
K = 5
random_state = 42
batch_size = 32
epochs = 100
size = 256

tiles_img_norm = tiles_img / 255.0
tiles_mask = tiles_mask[..., np.newaxis]

kf = KFold(n_splits=K, shuffle=True, random_state=random_state)

# Store metrics per fold
metrics_per_fold = {m: [] for m in ["Accuracy", "Precision", "Recall", "F1", "mIoU", "ROC-AUC"]}
conf_matrices = []

fold = 1
for train_index, val_index in kf.split(tiles_img_norm):
    print(f"\n===== Fold {fold}/{K} =====")
    
    X_train_cv, X_val_cv = tiles_img_norm[train_index], tiles_img_norm[val_index]
    y_train_cv, y_val_cv = tiles_mask[train_index], tiles_mask[val_index]
    
    # Data augmentation
    datagen_cv = tf.keras.preprocessing.image.ImageDataGenerator(
        shear_range=0.2, zoom_range=0.2, horizontal_flip=True,
        rotation_range=20, width_shift_range=0.2, height_shift_range=0.2,
        brightness_range=[0.8, 1.2]
    )
    
    # Create model
    model_cv = vgg16_unet_model(input_size=(size, size, 3), freeze_encoder=True)
    
    # Callbacks
    lr_scheduler_cv = tf.keras.callbacks.LearningRateScheduler(lr_schedule)
    early_stop_cv = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=1)
    
    # Train
    model_cv.fit(
        datagen_cv.flow(X_train_cv, y_train_cv, batch_size=batch_size),
        epochs=epochs, validation_data=(X_val_cv, y_val_cv),
        callbacks=[lr_scheduler_cv, early_stop_cv], verbose=1
    )
    
    # Predict
    y_prob = model_cv.predict(X_val_cv, verbose=0)
    y_pred = (y_prob > 0.5).astype(np.uint8)
    
    # Flatten for metrics
    y_true_flat = y_val_cv.flatten()
    y_pred_flat = y_pred.flatten()
    y_prob_flat = y_prob.flatten()
    
    # Metrics
    acc = accuracy_score(y_true_flat, y_pred_flat)
    prec = precision_score(y_true_flat, y_pred_flat, zero_division=0)
    rec = recall_score(y_true_flat, y_pred_flat, zero_division=0)
    f1 = f1_score(y_true_flat, y_pred_flat, zero_division=0)
    intersection = np.logical_and(y_true_flat, y_pred_flat).sum()
    union = np.logical_or(y_true_flat, y_pred_flat).sum()
    miou = intersection / union if union > 0 else 0
    roc = roc_auc_score(y_true_flat, y_prob_flat)
    
    # Store metrics
    metrics_per_fold["Accuracy"].append(acc)
    metrics_per_fold["Precision"].append(prec)
    metrics_per_fold["Recall"].append(rec)
    metrics_per_fold["F1"].append(f1)
    metrics_per_fold["mIoU"].append(miou)
    metrics_per_fold["ROC-AUC"].append(roc)
    
    # Confusion matrix
    cm = confusion_matrix(y_true_flat, y_pred_flat)
    conf_matrices.append(cm)
    
    print(f"Fold {fold} — Acc: {acc:.4f}, Prec: {prec:.4f}, Rec: {rec:.4f}, "
          f"F1: {f1:.4f}, mIoU: {miou:.4f}, ROC-AUC: {roc:.4f}")
    
    fold += 1

# ===== Final results =====
print("\n==== Cross-validation results ====")
for metric, values in metrics_per_fold.items():
    mean_val = np.mean(values)
    std_val = np.std(values)
    ci95 = 1.96 * (std_val / np.sqrt(K))  # 95% CI
    print(f"{metric}: {mean_val:.4f} ± {std_val:.4f} (95% CI: {mean_val-ci95:.4f} – {mean_val+ci95:.4f})")

# ===== Average confusion matrix =====
avg_cm = np.mean(conf_matrices, axis=0)
plt.figure(figsize=(5, 4))
sns.heatmap(avg_cm, annot=True, fmt=".0f", cmap="Blues",
            xticklabels=["Non-Forest", "Forest"],
            yticklabels=["Non-Forest", "Forest"])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Average Confusion Matrix (5-Fold CV)")
plt.tight_layout()
plt.show()
