In [1]:
# ============================================================
# Independent training per file (pad3, pad5, pad9) with RF + GridSearchCV
# Aggregate testing performance & feature importance across the three runs
# ============================================================
import os, warnings, json, joblib
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# -------------------------------
# Config
# -------------------------------
feature_cols = ['B','G','R','RE','NIR','NDVI','NDRE','SAVI','VARI','ExG']
target_col   = 'class'
csv_files    = [
    "split_pad3.csv",
    "split_pad5.csv",
    "split_pad9.csv"
]

classes_order = ["grass", "legume"]   # fixed order in CMs & table

out_dir = "./rf_paddock_hold-out_3_repetition_outputs"
os.makedirs(out_dir, exist_ok=True)

# Lean but effective grid (10 features, mild imbalance)
param_grid = {
    "rf__n_estimators": [400],          # we’ll refit final with 1000 trees
    "rf__max_depth": [None, 20, 30],
    "rf__min_samples_split": [2, 5, 10],
    "rf__min_samples_leaf": [1, 2, 4],
    "rf__max_features": ["sqrt", 0.5],
    "rf__bootstrap": [True],
}
cv5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# -------------------------------
# Helpers
# -------------------------------
def plot_cm(cm, title, path_png, normalize=False):
    if normalize:
        with np.errstate(invalid="ignore", divide="ignore"):
            cm_disp = cm.astype(float) / cm.sum(axis=1, keepdims=True)
        fmt = ".2f"
    else:
        cm_disp = cm
        fmt = None
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_disp, display_labels=classes_order)
    disp.plot(cmap=plt.cm.Blues, values_format=fmt)
    plt.title(title)
    plt.yticks(rotation="vertical")
    plt.tight_layout()
    plt.savefig(path_png, dpi=300)
    plt.close()

def cm_to_metrics(cm):
    """Return per-class dicts (precision, sensitivity/recall, specificity, f1) and OA from a 2x2 CM."""
    metrics = {}
    total = cm.sum()
    for i, cls in enumerate(classes_order):
        TP = cm[i, i]
        FN = cm[i, :].sum() - TP
        FP = cm[:, i].sum() - TP
        TN = total - TP - FN - FP

        prec = TP / (TP + FP) if (TP + FP) > 0 else 0.0
        rec  = TP / (TP + FN) if (TP + FN) > 0 else 0.0
        spec = TN / (TN + FP) if (TN + FP) > 0 else 0.0
        f1   = (2*prec*rec)/(prec+rec) if (prec+rec) > 0 else 0.0
        metrics[cls] = dict(precision=prec, sensitivity=rec, specificity=spec, f1=f1)

    OA = np.trace(cm) / total if total > 0 else 0.0
    return metrics, OA

# -------------------------------
# One independent run (one file)
# -------------------------------
def run_one_file(csv_path):
    tag = os.path.splitext(os.path.basename(csv_path))[0]
    run_dir = os.path.join(out_dir, tag)
    os.makedirs(run_dir, exist_ok=True)

    df = pd.read_csv(csv_path)
    train_df = df[df['folder'] == 'training'].copy()
    test_df  = df[df['folder'] == 'testing' ].copy()

    X_tr, y_tr = train_df[feature_cols], train_df[target_col].astype(str)
    X_te, y_te = test_df[feature_cols],  test_df[target_col].astype(str)

    pipe = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("rf", RandomForestClassifier(
            random_state=42, n_jobs=-1, class_weight="balanced"
        ))
    ])
    grid = GridSearchCV(pipe, param_grid=param_grid, scoring="f1_weighted",
                        cv=cv5, n_jobs=-1, verbose=0)
    grid.fit(X_tr, y_tr)
    
    # Save best hyperparameters to JSON
    best_params = grid.best_params_.copy()
    with open(os.path.join(run_dir, f"{tag}_best_params.json"), "w") as f:
        json.dump(best_params, f, indent=4)

    # Optional variance reduction: refit best with 1000 trees
    from sklearn.base import clone
    best_params = grid.best_params_.copy()
    best_params["rf__n_estimators"] = 1000
    final_model = clone(grid.best_estimator_)
    final_model.set_params(**best_params)
    final_model.fit(X_tr, y_tr)
    
    # Save model to .joblib
    model_path = os.path.join(run_dir, f"{tag}_best_model.joblib")
    joblib.dump(final_model, model_path)
    print(f"Best model saved → {model_path}")

    # Test predictions
    y_pred = final_model.predict(X_te)

    # Confusion matrices
    cm = confusion_matrix(y_te, y_pred, labels=classes_order)
    plot_cm(cm, f"Prediction number",
            os.path.join(run_dir, f"{tag}_cm_counts.png"), normalize=False)
    plot_cm(cm, f"Prediction percentage",
            os.path.join(run_dir, f"{tag}_cm_norm.png"), normalize=True)

    # Metrics from CM
    per_class, OA = cm_to_metrics(cm)
    # Save per-class metrics for this file
    per_file_rows = []
    for cls in classes_order:
        m = per_class[cls]
        per_file_rows.append({
            "Dataset": tag, "Class": cls,
            "Precision": m["precision"], "Sensitivity": m["sensitivity"],
            "Specificity": m["specificity"], "F1": m["f1"]
        })
    pd.DataFrame(per_file_rows).to_csv(os.path.join(run_dir, f"{tag}_per_class_metrics.csv"), index=False)

    # Feature importances
    rf = final_model.named_steps["rf"]
    fi = pd.Series(rf.feature_importances_, index=feature_cols).sort_values(ascending=False)
    fi.to_csv(os.path.join(run_dir, f"{tag}_feature_importances.csv"), header=["importance"])
    plt.figure(figsize=(8,4), dpi=300)
    fi.plot(kind='bar')
    plt.ylabel('Importance')
    plt.title(f'Feature Importances')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(os.path.join(run_dir, f"{tag}_feature_importances.png"), dpi=300)
    plt.close()

    return {
        "tag": tag,
        "best_cv_score": grid.best_score_,
        "best_params": grid.best_params_,
        "cm": cm,
        "per_class": per_class,
        "OA": OA,
        "fi": fi,
        "y_true": y_te.to_numpy(),
        "y_pred": y_pred
    }

# -------------------------------
# Run all three files independently
# -------------------------------
results = [run_one_file(p) for p in csv_files]

# -------------------------------
# Build the overall performance table (your format, in %)
# -------------------------------
rows = []
for cls in classes_order:
    prec = np.mean([r["per_class"][cls]["precision"]   for r in results])
    sens = np.mean([r["per_class"][cls]["sensitivity"] for r in results])
    spec = np.mean([r["per_class"][cls]["specificity"] for r in results])
    f1   = np.mean([r["per_class"][cls]["f1"]          for r in results])
    rows.append({"Metric":"Precision",   "Class":cls, "Value":prec})
    rows.append({"Metric":"Sensitivity", "Class":cls, "Value":sens})
    rows.append({"Metric":"Specificity", "Class":cls, "Value":spec})
    rows.append({"Metric":"F1-score",    "Class":cls, "Value":f1})

OA_mean = np.mean([r["OA"] for r in results])

tbl = pd.DataFrame(rows)
print(tbl)
pivot = tbl.pivot(index="Metric", columns="Class", values="Value").reindex(
    ["Precision","Sensitivity","Specificity","F1-score"]
)
pivot["Average"] = pivot.mean(axis=1)
print(pivot)
final_table = (pivot * 100).round(2)

oa_row = pd.DataFrame({"grass":[np.nan], "legume":[np.nan], "Average":[round(100*OA_mean,2)]},
                      index=["Overall Accuracy"])
final_table = pd.concat([final_table, oa_row], axis=0)
final_table = final_table[["grass","legume","Average"]]

print("\n=== Overall Testing Performance (3 independent runs) – Macro Metrics (%) ===")
print(final_table)
final_table.to_csv(os.path.join(out_dir, "overall_metrics_table.csv"))

# Also save per-file summary
per_file_summary = []
for r in results:
    row = {"Dataset": r["tag"], "Best_CV_F1_weighted": r["best_cv_score"], "Overall_Accuracy": r["OA"]}
    for cls in classes_order:
        m = r["per_class"][cls]
        row[f"{cls}_Precision"]   = m["precision"]
        row[f"{cls}_Sensitivity"] = m["sensitivity"]
        row[f"{cls}_Specificity"] = m["specificity"]
        row[f"{cls}_F1"]          = m["f1"]
    per_file_summary.append(row)
pd.DataFrame(per_file_summary).to_csv(os.path.join(out_dir, "per_file_metrics_raw.csv"), index=False)

# -------------------------------
# Overall pooled confusion matrix across all three test sets
# -------------------------------
overall_cm = sum([r["cm"] for r in results])
disp = ConfusionMatrixDisplay(confusion_matrix=overall_cm, display_labels=classes_order)
disp.plot(cmap=plt.cm.Blues)
plt.title("Prediction number")
plt.yticks(rotation="vertical")
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "overall_cm_counts.png"), dpi=300)
plt.close()

with np.errstate(invalid="ignore", divide="ignore"):
    overall_cm_norm = overall_cm.astype(float) / overall_cm.sum(axis=1, keepdims=True)
disp = ConfusionMatrixDisplay(confusion_matrix=overall_cm_norm, display_labels=classes_order)
disp.plot(cmap=plt.cm.Blues, values_format=".2f")
plt.title("Prediction percentage")
plt.yticks(rotation="vertical")
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "overall_cm_norm.png"), dpi=300)
plt.close()

# -------------------------------
# Aggregate feature importances (mean ± SD) across runs
# -------------------------------
fi_df = pd.DataFrame({r["tag"]: r["fi"] for r in results}).T.fillna(0.0)
fi_df.to_csv(os.path.join(out_dir, "feature_importances_by_file.csv"))
fi_mean = fi_df.mean(axis=0).sort_values(ascending=False)
fi_std  = fi_df.std(axis=0).reindex(fi_mean.index)

agg_fi = pd.DataFrame({"mean_importance": fi_mean, "std_importance": fi_std})
agg_fi.to_csv(os.path.join(out_dir, "feature_importances_mean_std.csv"))

plt.figure(figsize=(9,4.5), dpi=300)
plt.bar(range(len(fi_mean)), fi_mean.values, yerr=fi_std.values, capsize=3)
plt.xticks(range(len(fi_mean)), fi_mean.index, rotation=45, ha='right')
plt.ylabel('Mean Importance (± SD)')
plt.title('Feature Importances')
plt.tight_layout()
plt.savefig(os.path.join(out_dir, "feature_importances_mean_std.png"), dpi=300)
plt.close()

print(f"\nAll outputs saved to: {os.path.abspath(out_dir)}")


Best model saved → ./rf_paddock_hold-out_3_repetition_outputs\split_pad3\split_pad3_best_model.joblib
Best model saved → ./rf_paddock_hold-out_3_repetition_outputs\split_pad5\split_pad5_best_model.joblib
Best model saved → ./rf_paddock_hold-out_3_repetition_outputs\split_pad9\split_pad9_best_model.joblib
        Metric   Class     Value
0    Precision   grass  0.767943
1  Sensitivity   grass  0.973118
2  Specificity   grass  0.606349
3     F1-score   grass  0.858251
4    Precision  legume  0.951804
5  Sensitivity  legume  0.606349
6  Specificity  legume  0.973118
7     F1-score  legume  0.738078
Class           grass    legume   Average
Metric                                   
Precision    0.767943  0.951804  0.859874
Sensitivity  0.973118  0.606349  0.789734
Specificity  0.606349  0.973118  0.789734
F1-score     0.858251  0.738078  0.798165

=== Overall Testing Performance (3 independent runs) – Macro Metrics (%) ===
                  grass  legume  Average
Precision         76.79   