# Imports

In [None]:
import pickle
import json
import pandas as pd
from scipy.stats import wilcoxon

# Load data

In [2]:
with open("outputs/model_results/ann_full.pkl", "rb") as f:
    ann_full_data = pickle.load(f)

with open("outputs/model_results/ann_fe.pkl", "rb") as f:
    ann_fe_data = pickle.load(f)

with open("outputs/model_results/ann_fe_reduced.pkl", "rb") as f:
    ann_reduced_data = pickle.load(f)

with open("outputs/model_results/sar_fe.json", "r") as f:
    sar_fe_data = json.load(f)

with open("outputs/model_results/sar_fe_reduced.json", "r") as f:
    sar_reduced_data = json.load(f)

with open("outputs/model_results/gwr_fe_reduced.pkl", "rb") as f:
    gwr_reduced_data = pickle.load(f)

# Util functions

In [3]:
def create_results_table(data):
    results = pd.DataFrame(
        [
            {
                "outer_split": item["outer_split"],
                **item["hps"],
                "mae": item["mae"],
                "mse": item["mse"],
                "r2": item["r2"],
            }
            for item in data
        ]
    )

    if "k" in results.columns and "max_distance" in results.columns:
        results.insert(
            2, "Weighting param", results["k"].fillna(results["max_distance"])
        )
        results["Weighting param"] = results["Weighting param"].astype(int)
        results = results.drop(columns=["k", "max_distance"])

    numeric_cols = results.select_dtypes(include=["float64", "float32"]).columns
    results[numeric_cols] = results[numeric_cols].round(6)

    return results

In [4]:
def create_stats_table(df, model_name, dataset_name):
    df = df[["mae", "mse", "r2"]].copy()
    df = df.transpose()
    split_scores = df.columns
    df["Best score"] = df[split_scores].apply(
        lambda row: row.max() if row.name == "r2" else row.min(), axis=1
    )
    df["Worst score"] = df[split_scores].apply(
        lambda row: row.min() if row.name == "r2" else row.max(), axis=1
    )
    df["Mean score"] = df[split_scores].mean(axis=1)
    df["Std dev"] = df[split_scores].std(axis=1)
    df["Best split"] = df[split_scores].apply(
        lambda row: row.idxmax() if row.name == "r2" else row.idxmin(), axis=1
    )
    df["Worst split"] = df[split_scores].apply(
        lambda row: row.idxmin() if row.name == "r2" else row.idxmax(), axis=1
    )
    df = df.reset_index().rename(columns={"index": "Metric"})
    df.insert(0, "Model", model_name)
    df.insert(1, "Dataset", dataset_name)
    numeric_cols = df.select_dtypes(include=["float64", "float32"]).columns
    df[numeric_cols] = df[numeric_cols].round(6)
    return df

# Create model tables

In [5]:
ann_full_results = create_results_table(ann_full_data)
ann_full_results = ann_full_results.drop(columns=["outer_loop_split"])
ann_full_stats = create_stats_table(ann_full_results, "FNN", "Full")

ann_fe_results = create_results_table(ann_fe_data)
ann_fe_results = ann_fe_results.drop(columns=["outer_loop_split"])
ann_fe_stats = create_stats_table(ann_fe_results, "FNN", "FE")

ann_reduced_results = create_results_table(ann_reduced_data)
ann_reduced_results = ann_reduced_results.drop(columns=["outer_loop_split"])
ann_reduced_stats = create_stats_table(ann_reduced_results, "FNN", "Reduced")

sar_fe_results = create_results_table(sar_fe_data)
sar_fe_stats = create_stats_table(sar_fe_results, "SAR", "FE")

sar_reduced_results = create_results_table(sar_reduced_data)
sar_reduced_stats = create_stats_table(sar_reduced_results, "SAR", "Reduced")

gwr_reduced_results = create_results_table(gwr_reduced_data)
gwr_reduced_stats = create_stats_table(gwr_reduced_results, "GWR", "Reduced")

# Summaries

In [6]:
summary = pd.concat(
    [
        ann_full_stats,
        ann_fe_stats,
        ann_reduced_stats,
        sar_fe_stats,
        sar_reduced_stats,
        gwr_reduced_stats,
    ]
)

summary = summary.drop(columns=[0, 1, 2, 3, 4])
exp_1_summary = (
    summary[summary["Dataset"] == "Full"]
    .drop(columns=["Dataset", "Worst split", "Best split"])
)
exp_2_summary = (
    summary[summary["Dataset"] == "FE"]
    .drop(columns=["Dataset", "Worst split", "Best split"])
)
exp_3_summary = (
    summary[summary["Dataset"] == "Reduced"]
    .drop(columns=["Dataset", "Worst split", "Best split"])
)
mae_summary = (
    summary[summary["Metric"] == "mae"]
    .sort_values("Mean score", ascending=True)
    .drop(columns=["Metric", "Worst split", "Best split"])
)
mse_summary = (
    summary[summary["Metric"] == "mse"]
    .sort_values("Mean score", ascending=True)
    .drop(columns=["Metric", "Worst split", "Best split"])
)
r2_summary = (
    summary[summary["Metric"] == "r2"]
    .sort_values("Mean score", ascending=False)
    .drop(columns=["Metric", "Worst split", "Best split"])
)

# Hypothesis testing

In [7]:
comparisons = [
    # MAE
    ("ANN", "SAR", "FE", "MAE", wilcoxon(ann_fe_results["mae"], sar_fe_results["mae"], alternative="less")),
    ("ANN", "SAR", "Reduced", "MAE", wilcoxon(ann_reduced_results["mae"], sar_reduced_results["mae"], alternative="less")),
    ("ANN", "GWR", "Reduced", "MAE", wilcoxon(ann_reduced_results["mae"], gwr_reduced_results["mae"], alternative="less")),
    
    # MSE
    ("ANN", "SAR", "FE", "MSE", wilcoxon(ann_fe_results["mse"], sar_fe_results["mse"], alternative="less")),
    ("ANN", "SAR", "Reduced", "MSE", wilcoxon(ann_reduced_results["mse"], sar_reduced_results["mse"], alternative="less")),
    ("ANN", "GWR", "Reduced", "MSE", wilcoxon(ann_reduced_results["mse"], gwr_reduced_results["mse"], alternative="less")),
    
    # R2
    ("ANN", "SAR", "FE", "R2", wilcoxon(ann_fe_results["r2"], sar_fe_results["r2"], alternative="greater")),
    ("ANN", "SAR", "Reduced", "R2", wilcoxon(ann_reduced_results["r2"], sar_reduced_results["r2"], alternative="greater")),
    ("ANN", "GWR", "Reduced", "R2", wilcoxon(ann_reduced_results["r2"], gwr_reduced_results["r2"], alternative="greater")),
]

# Build the dataframe
h_test_summary = pd.DataFrame([
    {
        "Metric": metric,
        "Model 1": model_1,
        "Model 2": model_2,
        "Dataset": dataset,
        "Statistic": result.statistic,
        "P-value": result.pvalue
    }
    for model_1, model_2, dataset, metric, result in comparisons
])

In [8]:
ann_full_comparisons = [
    # MAE
    ("ANN", "Full", "SAR", "FE", "MAE", wilcoxon(ann_full_results["mae"], sar_fe_results["mae"], alternative="less")),
    ("ANN", "Full", "SAR", "Reduced", "MAE", wilcoxon(ann_full_results["mae"], sar_reduced_results["mae"], alternative="less")),
    ("ANN", "Full", "GWR", "Reduced", "MAE", wilcoxon(ann_full_results["mae"], gwr_reduced_results["mae"], alternative="less")),

    # MSE
    ("ANN", "Full", "SAR", "FE", "MSE", wilcoxon(ann_full_results["mse"], sar_fe_results["mse"], alternative="less")),
    ("ANN", "Full", "SAR", "Reduced", "MSE", wilcoxon(ann_full_results["mse"], sar_reduced_results["mse"], alternative="less")),
    ("ANN", "Full", "GWR", "Reduced", "MSE", wilcoxon(ann_full_results["mse"], gwr_reduced_results["mse"], alternative="less")),

    # R2
    ("ANN", "Full", "SAR", "FE", "R2", wilcoxon(ann_full_results["r2"], sar_fe_results["r2"], alternative="greater")),
    ("ANN", "Full", "SAR", "Reduced", "R2", wilcoxon(ann_full_results["r2"], sar_reduced_results["r2"], alternative="greater")),
    ("ANN", "Full", "GWR", "Reduced", "R2", wilcoxon(ann_full_results["r2"], gwr_reduced_results["r2"], alternative="greater"))
    
]

# Build the dataframe
ann_full_h_test_summary = pd.DataFrame([
    {
        "Metric": metric,
        "Model 1": model_1,
        "Model 1 dataset": model_1_dataset,
        "Model 2": model_2,
        "Model 2 dataset": model_2_datset,
        "Statistic": result.statistic,
        "P-value": result.pvalue
    }
    for model_1, model_1_dataset, model_2, model_2_datset, metric, result in ann_full_comparisons
])

# Save output

In [9]:
sections = [
    ("Summary", summary),
    ("Experiment 1", exp_1_summary),
    ("Experiment 2", exp_2_summary),
    ("Experiment 3", exp_3_summary),
    ("MAE summary", mae_summary),
    ("MSE summary", mse_summary),
    ("R2 summary", r2_summary),
    ("ANN full results", ann_full_results),
    ("ANN FE results", ann_fe_results),
    ("ANN reduced results", ann_reduced_results),
    ("SAR FE results", sar_fe_results),
    ("SAR reduced results", sar_reduced_results),
    ("GWR reduced results", gwr_reduced_results),
    ("Hypothesis testing results", h_test_summary),
    ("FNN hypothesis testing results", ann_full_h_test_summary)

]

combined_results = []

with open("outputs/data_analyses/combined_results.csv", "w") as f:
    for i, (title, result) in enumerate(sections):
        f.write(f"{title}\n")
        result = result.replace({
            "mae" : "MAE",
            "mse" : "MSE",
            "r2" : "R2",
        })
        result = result.rename(columns = {
            "mae" : "MAE",
            "mse" : "MSE",
            "r2" : "R2",
            "outer_split" : "Outer CV split",
            "no_of_layers" : "No. of layers",
            "no_of_nodes" : "No. of nodes",
            "batch_size" : "Batch size",
            "learning_rate" : "Learning rate",
            "loss_function" : "Loss function",
            "weighting_method" : "Weighting method",
            "kernel" : "Kernel",
            "criterion" : "Criterion"
        })
        result.to_csv(f, index=False, lineterminator="\n")
        if i < len(sections) - 1:
            f.write("\n")
