# Imports

In [26]:
import pickle
import json
import pandas as pd

# Format decimals

In [27]:
pd.options.display.float_format = "{:.6f}".format

# Load data

In [28]:
with open("outputs/model_results/ann_full.pkl", "rb") as f:
    ann_full_data = pickle.load(f)

with open("outputs/model_results/ann_fe.pkl", "rb") as f:
    ann_fe_data = pickle.load(f)

with open("outputs/model_results/ann_fe_reduced.pkl", "rb") as f:
    ann_reduced_data = pickle.load(f)

with open("outputs/model_results/sar_fe.json", "r") as f:
    sar_fe_data = json.load(f)

with open("outputs/model_results/sar_fe_reduced.json", "r") as f:
    sar_reduced_data = json.load(f)

with open("outputs/model_results/gwr_fe_reduced.pkl", "rb") as f:
    gwr_reduced_data = pickle.load(f)

# Util functions

In [29]:
def create_results_table(data):
    results = pd.DataFrame(
        [
            {
                "outer_split": item["outer_split"],
                **item["hps"],
                "mae": item["mae"],
                "mse": item["mse"],
                "r2": item["r2"],
            }
            for item in data
        ]
    )

    if "k" in results.columns and "max_distance" in results.columns:
        results.insert(
            2, "Weighting param", results["k"].fillna(results["max_distance"])
        )
        results["Weighting param"] = results["Weighting param"].astype(int)
        results = results.drop(columns=["k", "max_distance"])

    numeric_cols = results.select_dtypes(include=["float64", "float32"]).columns
    results[numeric_cols] = results[numeric_cols].round(6)

    return results

In [30]:
def create_stats_table(df, model_name, dataset_name):
    df = df[["mae", "mse", "r2"]].copy()
    df = df.transpose()
    split_scores = df.columns
    df["Best score"] = df[split_scores].apply(
        lambda row: row.max() if row.name == "r2" else row.min(), axis=1
    )
    df["Worst score"] = df[split_scores].apply(
        lambda row: row.min() if row.name == "r2" else row.max(), axis=1
    )
    df["Mean score"] = df[split_scores].mean(axis=1)
    df["Best split"] = df[split_scores].apply(
        lambda row: row.idxmax() if row.name == "r2" else row.idxmin(), axis=1
    )
    df["Worst split"] = df[split_scores].apply(
        lambda row: row.idxmin() if row.name == "r2" else row.idxmax(), axis=1
    )
    df = df.reset_index().rename(columns={"index": "Metric"})
    df.insert(0, "Model", model_name)
    df.insert(1, "Dataset", dataset_name)
    numeric_cols = df.select_dtypes(include=["float64", "float32"]).columns
    df[numeric_cols] = df[numeric_cols].round(6)
    return df

# Create model tables

In [31]:
ann_full_results = create_results_table(ann_full_data)
ann_full_results = ann_full_results.drop(columns=["outer_loop_split"])
ann_full_stats = create_stats_table(ann_full_results, "FNN", "Full")

ann_fe_results = create_results_table(ann_fe_data)
ann_fe_results = ann_fe_results.drop(columns=["outer_loop_split"])
ann_fe_stats = create_stats_table(ann_fe_results, "FNN", "FE")

ann_reduced_results = create_results_table(ann_reduced_data)
ann_reduced_results = ann_reduced_results.drop(columns=["outer_loop_split"])
ann_reduced_stats = create_stats_table(ann_reduced_results, "FNN", "Reduced")

sar_fe_results = create_results_table(sar_fe_data)
sar_fe_stats = create_stats_table(sar_fe_results, "SAR", "FE")

sar_reduced_results = create_results_table(sar_reduced_data)
sar_reduced_stats = create_stats_table(sar_reduced_results, "SAR", "Reduced")

gwr_reduced_results = create_results_table(gwr_reduced_data)
gwr_reduced_stats = create_stats_table(gwr_reduced_results, "GWR", "Reduced")

# Summaries

In [32]:
summary = pd.concat(
    [
        ann_full_stats,
        ann_fe_stats,
        ann_reduced_stats,
        sar_fe_stats,
        sar_reduced_stats,
        gwr_reduced_stats,
    ]
)

summary = summary.drop(columns=[0, 1, 2, 3, 4])
exp_1_summary = (
    summary[summary["Dataset"] == "Full"]
    .drop(columns=["Dataset", "Worst split", "Best split"])
)
exp_2_summary = (
    summary[summary["Dataset"] == "FE"]
    .drop(columns=["Dataset", "Worst split", "Best split"])
)
exp_3_summary = (
    summary[summary["Dataset"] == "Reduced"]
    .drop(columns=["Dataset", "Worst split", "Best split"])
)
mae_summary = (
    summary[summary["Metric"] == "mae"]
    .sort_values("Mean score", ascending=True)
    .drop(columns=["Metric", "Worst split", "Best split"])
)
mse_summary = (
    summary[summary["Metric"] == "mse"]
    .sort_values("Mean score", ascending=True)
    .drop(columns=["Metric", "Worst split", "Best split"])
)
r2_summary = (
    summary[summary["Metric"] == "r2"]
    .sort_values("Mean score", ascending=False)
    .drop(columns=["Metric", "Worst split", "Best split"])
)

In [33]:
exp_1_summary

Unnamed: 0,Model,Metric,Best score,Worst score,Mean score
0,FNN,mae,0.022492,0.042764,0.031006
1,FNN,mse,0.000822,0.00258,0.001606
2,FNN,r2,0.680459,-0.628751,0.3274


## All metrics

In [34]:
summary

Unnamed: 0,Model,Dataset,Metric,Best score,Worst score,Mean score,Best split,Worst split
0,FNN,Full,mae,0.022492,0.042764,0.031006,0,4
1,FNN,Full,mse,0.000822,0.00258,0.001606,0,4
2,FNN,Full,r2,0.680459,-0.628751,0.3274,1,4
0,FNN,FE,mae,0.025643,258.53549,51.733573,1,4
1,FNN,FE,mse,0.001384,34282500.964439,6856500.194506,1,4
2,FNN,FE,r2,0.553897,-21645689183.549255,-4329137836.448632,1,4
0,FNN,Reduced,mae,0.028205,0.042735,0.035207,0,4
1,FNN,Reduced,mse,0.001248,0.002707,0.001978,0,3
2,FNN,Reduced,r2,0.505411,-0.595766,0.206481,1,4
0,SAR,FE,mae,0.0277,0.0453,0.03414,0,3


## MAE - Lowest to highest

In [35]:
mae_summary

Unnamed: 0,Model,Dataset,Best score,Worst score,Mean score
0,FNN,Full,0.022492,0.042764,0.031006
0,SAR,FE,0.0277,0.0453,0.03414
0,FNN,Reduced,0.028205,0.042735,0.035207
0,SAR,Reduced,0.0308,0.0486,0.03672
0,GWR,Reduced,0.037145,0.057098,0.046388
0,FNN,FE,0.025643,258.53549,51.733573


## MSE - Lowest to highest

In [36]:
mse_summary

Unnamed: 0,Model,Dataset,Best score,Worst score,Mean score
1,FNN,Full,0.000822,0.00258,0.001606
1,FNN,Reduced,0.001248,0.002707,0.001978
1,SAR,FE,0.0012,0.0036,0.00202
1,SAR,Reduced,0.0015,0.0039,0.00228
1,GWR,Reduced,0.002275,0.005164,0.003661
1,FNN,FE,0.001384,34282500.964439,6856500.194506


## R2 - Highest to lowest

In [37]:
r2_summary

Unnamed: 0,Model,Dataset,Best score,Worst score,Mean score
2,FNN,Full,0.680459,-0.628751,0.3274
2,SAR,FE,0.4157,0.1173,0.2894
2,FNN,Reduced,0.505411,-0.595766,0.206481
2,SAR,Reduced,0.4,-0.1149,0.16852
2,GWR,Reduced,-0.087753,-0.58459,-0.331267
2,FNN,FE,0.553897,-21645689183.549255,-4329137836.448632


# Individual models

## ANN - Full dataset

In [38]:
ann_full_results

Unnamed: 0,outer_split,no_of_layers,no_of_nodes,batch_size,learning_rate,loss_function,mae,mse,r2
0,0,3,"[372, 208, 173]",30,0.015686,mae,0.022492,0.000822,0.56087
1,1,4,"[142, 258, 116, 402]",10,0.001662,mae,0.024638,0.000991,0.680459
2,2,2,"[235, 385]",37,0.021922,mse,0.030103,0.001595,0.455013
3,3,4,"[261, 197, 334, 281]",58,0.006583,mse,0.035033,0.002044,0.569407
4,4,4,"[245, 198, 302, 225]",15,0.069612,mae,0.042764,0.00258,-0.628751


In [39]:
ann_full_stats

Unnamed: 0,Model,Dataset,Metric,0,1,2,3,4,Best score,Worst score,Mean score,Best split,Worst split
0,FNN,Full,mae,0.022492,0.024638,0.030103,0.035033,0.042764,0.022492,0.042764,0.031006,0,4
1,FNN,Full,mse,0.000822,0.000991,0.001595,0.002044,0.00258,0.000822,0.00258,0.001606,0,4
2,FNN,Full,r2,0.56087,0.680459,0.455013,0.569407,-0.628751,0.680459,-0.628751,0.3274,1,4


## ANN - Feature engineering

In [40]:
ann_fe_results

Unnamed: 0,outer_split,no_of_layers,no_of_nodes,batch_size,learning_rate,loss_function,mae,mse,r2
0,0,2,"[26, 30]",20,0.045979,mae,0.02989,0.001395,0.25492
1,1,2,"[23, 22]",17,0.057087,mse,0.025643,0.001384,0.553897
2,2,3,"[19, 15, 29]",36,0.056168,mae,0.04239,0.002932,-0.001613
3,3,2,"[15, 16]",46,0.069274,mse,0.034452,0.002379,0.498895
4,4,4,"[14, 27, 16, 8]",49,0.038735,mse,258.53549,34282500.964439,-21645689183.549255


In [41]:
ann_fe_stats

Unnamed: 0,Model,Dataset,Metric,0,1,2,3,4,Best score,Worst score,Mean score,Best split,Worst split
0,FNN,FE,mae,0.02989,0.025643,0.04239,0.034452,258.53549,0.025643,258.53549,51.733573,1,4
1,FNN,FE,mse,0.001395,0.001384,0.002932,0.002379,34282500.964439,0.001384,34282500.964439,6856500.194506,1,4
2,FNN,FE,r2,0.25492,0.553897,-0.001613,0.498895,-21645689183.549255,0.553897,-21645689183.549255,-4329137836.448632,1,4


## ANN - Reduced dataset

In [42]:
ann_reduced_results

Unnamed: 0,outer_split,no_of_layers,no_of_nodes,batch_size,learning_rate,loss_function,mae,mse,r2
0,0,1,[5],34,0.029194,mae,0.028205,0.001248,0.333451
1,1,1,[3],35,0.04503,mse,0.030152,0.001535,0.505411
2,2,1,[6],35,0.023675,mse,0.033473,0.001875,0.359559
3,3,3,"[2, 7, 6]",42,0.012075,mse,0.041472,0.002707,0.42975
4,4,1,[5],35,0.098566,mse,0.042735,0.002527,-0.595766


In [43]:
ann_reduced_stats

Unnamed: 0,Model,Dataset,Metric,0,1,2,3,4,Best score,Worst score,Mean score,Best split,Worst split
0,FNN,Reduced,mae,0.028205,0.030152,0.033473,0.041472,0.042735,0.028205,0.042735,0.035207,0,4
1,FNN,Reduced,mse,0.001248,0.001535,0.001875,0.002707,0.002527,0.001248,0.002707,0.001978,0,3
2,FNN,Reduced,r2,0.333451,0.505411,0.359559,0.42975,-0.595766,0.505411,-0.595766,0.206481,1,4


## SAR - Feature engineering

In [44]:
sar_fe_results

Unnamed: 0,outer_split,weighting_method,Weighting param,mae,mse,r2
0,1,knn,5,0.0277,0.0012,0.3713
1,2,distance,1516,0.032,0.0018,0.4157
2,3,distance,1297,0.0358,0.0021,0.2968
3,4,knn,7,0.0453,0.0036,0.2459
4,5,knn,3,0.0299,0.0014,0.1173


In [45]:
sar_fe_stats

Unnamed: 0,Model,Dataset,Metric,0,1,2,3,4,Best score,Worst score,Mean score,Best split,Worst split
0,SAR,FE,mae,0.0277,0.032,0.0358,0.0453,0.0299,0.0277,0.0453,0.03414,0,3
1,SAR,FE,mse,0.0012,0.0018,0.0021,0.0036,0.0014,0.0012,0.0036,0.00202,0,3
2,SAR,FE,r2,0.3713,0.4157,0.2968,0.2459,0.1173,0.4157,0.1173,0.2894,1,4


## SAR - Reduced dataset

In [46]:
sar_reduced_results

Unnamed: 0,outer_split,weighting_method,Weighting param,mae,mse,r2
0,1,knn,5,0.0308,0.0015,0.1757
1,2,distance,1516,0.0324,0.0019,0.4
2,3,distance,1297,0.0378,0.0023,0.2099
3,4,knn,7,0.0486,0.0039,0.1719
4,5,knn,3,0.034,0.0018,-0.1149


In [47]:
sar_reduced_stats

Unnamed: 0,Model,Dataset,Metric,0,1,2,3,4,Best score,Worst score,Mean score,Best split,Worst split
0,SAR,Reduced,mae,0.0308,0.0324,0.0378,0.0486,0.034,0.0308,0.0486,0.03672,0,3
1,SAR,Reduced,mse,0.0015,0.0019,0.0023,0.0039,0.0018,0.0015,0.0039,0.00228,0,3
2,SAR,Reduced,r2,0.1757,0.4,0.2099,0.1719,-0.1149,0.4,-0.1149,0.16852,1,4


## GWR - Reduced dataset

In [48]:
gwr_reduced_results

Unnamed: 0,outer_split,kernel,criterion,mae,mse,r2
0,0,bisquare,AICc,0.037145,0.002275,-0.214966
1,1,bisquare,AICc,0.051282,0.004464,-0.438794
2,2,bisquare,BIC,0.047725,0.003894,-0.330233
3,3,bisquare,BIC,0.057098,0.005164,-0.087753
4,4,bisquare,CV,0.03869,0.00251,-0.58459


In [49]:
gwr_reduced_stats

Unnamed: 0,Model,Dataset,Metric,0,1,2,3,4,Best score,Worst score,Mean score,Best split,Worst split
0,GWR,Reduced,mae,0.037145,0.051282,0.047725,0.057098,0.03869,0.037145,0.057098,0.046388,0,3
1,GWR,Reduced,mse,0.002275,0.004464,0.003894,0.005164,0.00251,0.002275,0.005164,0.003661,0,3
2,GWR,Reduced,r2,-0.214966,-0.438794,-0.330233,-0.087753,-0.58459,-0.087753,-0.58459,-0.331267,3,4


# Save output

In [50]:
sections = [
    ("Summary", summary),
    ("Experiment 1", exp_1_summary),
    ("Experiment 2", exp_2_summary),
    ("Experiment 3", exp_3_summary),
    ("MAE Summary", mae_summary),
    ("MSE Summary", mse_summary),
    ("R2 Summary", r2_summary),
    ("ANN Full Results", ann_full_results),
    ("ANN FE Results", ann_fe_results),
    ("ANN Reduced Results", ann_reduced_results),
    ("SAR FE Results", sar_fe_results),
    ("SAR Reduced Results", sar_reduced_results),
    ("GWR Reduced Results", gwr_reduced_results)
]

combined_results = []

with open("outputs/data_analyses/combined_results.csv", "w") as f:
    for i, (title, result) in enumerate(sections):
        f.write(f"{title}\n")
        result = result.replace({
            "mae" : "MAE",
            "mse" : "MSE",
            "r2" : "R2",
        })
        result = result.rename(columns = {
            "mae" : "MAE",
            "mse" : "MSE",
            "r2" : "R2",
            "outer_split" : "Outer CV split",
            "no_of_layers" : "No. of layers",
            "no_of_nodes" : "No. of nodes",
            "batch_size" : "Batch size",
            "learning_rate" : "Learning rate",
            "loss_function" : "Loss function",
            "weighting_method" : "Weighting method",
            "kernel" : "Kernel",
            "criterion" : "Criterion"
        })
        result.to_csv(f, index=False, lineterminator="\n")
        if i < len(sections) - 1:
            f.write("\n")
