This notebook visualizes the metric, confusion metric and plots

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, f1_score, jaccard_score,
    accuracy_score, precision_score, recall_score
)
from tensorflow.keras.models import load_model
import rasterio
from sklearn.model_selection import train_test_split
from math import pi
OUT_DIR = "../experiments_final"
MODEL_DIR = OUT_DIR
SAVE_DIR = os.path.join(OUT_DIR, "evaluation_results")
os.makedirs(SAVE_DIR, exist_ok=True)
RAW_IMAGE = "../data/raw/20241110_053942_45_24f7_3B_AnalyticMS_SR_8b_clip.tif"
RAW_MASK  = "../data/raw/20241110_053942_45_24f7_3B_AnalyticMS_SR_8b_clip_Hybrid_mask.tif"
TEST_SAVE_X = os.path.join(SAVE_DIR, "testX.npy")
TEST_SAVE_Y = os.path.join(SAVE_DIR, "testY.npy")
def create_tiles(image, mask, tile_size=256):
    tiles_img, tiles_mask = [], []
    H, W, _ = image.shape
    for i in range(0, H, tile_size):
        for j in range(0, W, tile_size):
            sub_img = image[i:i+tile_size, j:j+tile_size]
            sub_mask = mask[i:i+tile_size, j:j+tile_size]
            if sub_img.shape[:2] == (tile_size, tile_size):
                tiles_img.append(sub_img)
                tiles_mask.append(sub_mask)
    tiles_img = np.array(tiles_img)
    tiles_mask = np.array(tiles_mask)[..., np.newaxis] 
    return tiles_img, tiles_mask
def load_test_tiles(img_path, mask_path, seed=42):
    if os.path.exists(TEST_SAVE_X) and os.path.exists(TEST_SAVE_Y):
        print("Loading pre-saved test tiles ...")
        testX = np.load(TEST_SAVE_X)
        testY = np.load(TEST_SAVE_Y)
        print(f" Loaded test data: {testX.shape}, {testY.shape}")
        return testX, testY
print(" Rebuilding test tiles from image & mask ...")
    with rasterio.open(img_path) as s:
        img = np.moveaxis(s.read(), 0, 2).astype(np.float32)
        img = img / (np.max(img) + 1e-8)
    with rasterio.open(mask_path) as s:
        mask = s.read(1).astype(np.uint8)
    X, Y = create_tiles(img, mask)
    non_empty = np.sum(Y, axis=(1,2,3)) > 0
    X, Y = X[non_empty], Y[non_empty]
    _, tempX, _, tempY = train_test_split(X, Y, test_size=0.75, random_state=seed)
    _, testX, _, testY = train_test_split(tempX, tempY, test_size=2/3, random_state=seed)
    np.save(TEST_SAVE_X, testX)
    np.save(TEST_SAVE_Y, testY)
    print(f" Test tiles rebuilt and saved ({testX.shape}, {testY.shape})")
    return testX, testY
testX, testY = load_test_tiles(RAW_IMAGE, RAW_MASK)
model_names = ["unet", "resunet", "attnunet", "attnresunet", "asdms"]
models = {name: load_model(os.path.join(MODEL_DIR, f"{name}.keras"), compile=False)
          for name in model_names}
def evaluate_model(model, X, Y):
    preds = (model.predict(X, verbose=0) > 0.5).astype(np.uint8)
    y_true = Y.flatten()
    y_pred = preds.flatten()
    cm = confusion_matrix(y_true, y_pred)
    if cm.size == 4:
        tn, fp, fn, tp = cm.ravel()
    else:
        tn = fp = fn = tp = 0
    metrics = {
        "IoU": jaccard_score(y_true, y_pred, zero_division=0),
        "F1": f1_score(y_true, y_pred, zero_division=0),
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred, zero_division=0),
        "Recall": recall_score(y_true, y_pred, zero_division=0),
        "TP": tp, "FP": fp, "FN": fn, "TN": tn
    }
    return metrics, cm
all_metrics = {}
conf_matrices = {}
for name, model in models.items():
    print(f"\n Evaluating {name} ...")
    metrics, cm = evaluate_model(model, testX, testY)
    all_metrics[name] = metrics
    conf_matrices[name] = cm
    print(f" {name.upper():10s} — F1={metrics['F1']:.4f}, IoU={metrics['IoU']:.4f}, Acc={metrics['Accuracy']:.4f}")
metrics_df = pd.DataFrame(all_metrics).T
metrics_csv_path = os.path.join(SAVE_DIR, "metrics_summary.csv")
metrics_df.to_csv(metrics_csv_path, index=True)
print(f"\n Metrics saved to: {metrics_csv_path}")
for name, cm in conf_matrices.items():
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=["Pred 0","Pred 1"], yticklabels=["True 0","True 1"])
    plt.title(f"Confusion Matrix — {name.upper()}")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.savefig(os.path.join(SAVE_DIR, f"{name}_confusion_matrix.png"), dpi=300)
    plt.close()
    print(f" Saved: {name}_confusion_matrix.png")
plt.figure(figsize=(10,6))
metrics_df[["IoU","F1","Accuracy","Precision","Recall"]].plot(kind='bar')
plt.title(" Model Performance Comparison (Landslide Segmentation)")
plt.ylabel("Score")
plt.xticks(rotation=0)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True, axis='y')
plt.tight_layout()
plt.savefig(os.path.join(SAVE_DIR, "model_performance_comparison.png"), dpi=300)
plt.close()
print("Saved bar chart comparison.")
categories = ["IoU", "F1", "Accuracy", "Precision", "Recall"]
N = len(categories)
angles = [n / float(N) * 2 * np.pi for n in range(N)]
angles += angles[:1]
plt.figure(figsize=(7,7))
for name, row in metrics_df.iterrows():
    values = row[categories].tolist()
    values += values[:1]
    plt.polar(angles, values, label=name)
    plt.fill(angles, values, alpha=0.1)
plt.title("Radar Chart — Model Metrics", size=14)
plt.legend(loc='upper right', bbox_to_anchor=(1.25, 1.15))
plt.tight_layout()
plt.savefig(os.path.join(SAVE_DIR, "model_metrics_radar.png"), dpi=300)
plt.close()
print("Saved radar chart of model metrics.")
print("\n Evaluation Complete! Final Results:")
print(metrics_df.round(4))
