In [7]:
import pickle
import json
import pandas as pd

In [8]:
pd.options.display.float_format = '{:.6f}'.format

# Full dataset

## ANN

In [9]:
with open("outputs/model_results/ann_full.pkl", "rb") as f:
    ann_full_data = pickle.load(f)

In [10]:
ann_full_results = pd.DataFrame(
    [
        {
            "outer_split": item["outer_split"],
            "layers": item["hps"]["no_of_layers"],
            "nodes": item["hps"]["no_of_nodes"],
            "batch_size": item["hps"]["batch_size"],
            "learning_rate": item["hps"]["learning_rate"],
            "loss_function": item["hps"]["loss_function"],
            "mae": item["mae"],
            "mse": item["mse"],
            "r2": item["r2"]
        }
        for item in ann_full_data
    ]
)

In [11]:
ann_full_results

Unnamed: 0,outer_split,layers,nodes,batch_size,learning_rate,loss_function,mae,mse,r2
0,0,3,"[372, 208, 173]",30,0.015686,mae,0.022492,0.000822,0.56087
1,1,4,"[142, 258, 116, 402]",10,0.001662,mae,0.024638,0.000991,0.680459
2,2,2,"[235, 385]",37,0.021922,mse,0.030103,0.001595,0.455013
3,3,4,"[261, 197, 334, 281]",58,0.006583,mse,0.035033,0.002044,0.569407
4,4,4,"[245, 198, 302, 225]",15,0.069612,mae,0.042764,0.00258,-0.628751


# Dataset with feature engineering

## ANN

In [12]:
with open("outputs/model_results/ann_fe.pkl", "rb") as f:
    ann_fe_data = pickle.load(f)

In [13]:
ann_fe_results = pd.DataFrame(
    [
        {
            "outer_split": item["outer_split"],
            "layers": item["hps"]["no_of_layers"],
            "nodes": item["hps"]["no_of_nodes"],
            "batch_size": item["hps"]["batch_size"],
            "learning_rate": item["hps"]["learning_rate"],
            "loss_function": item["hps"]["loss_function"],
            "mae": item["mae"],
            "mse": item["mse"],
            "r2": item["r2"]
        }
        for item in ann_fe_data
    ]
)

In [14]:
ann_fe_results

Unnamed: 0,outer_split,layers,nodes,batch_size,learning_rate,loss_function,mae,mse,r2
0,0,2,"[26, 30]",20,0.045979,mae,0.02989,0.001395,0.25492
1,1,2,"[23, 22]",17,0.057087,mse,0.025643,0.001384,0.553897
2,2,3,"[19, 15, 29]",36,0.056168,mae,0.04239,0.002932,-0.001613
3,3,2,"[15, 16]",46,0.069274,mse,0.034452,0.002379,0.498895
4,4,4,"[14, 27, 16, 8]",49,0.038735,mse,258.53549,34282500.964439,-21645689183.549255


## SAR

In [33]:
with open("outputs/model_results/sar_fe.json", "r") as f:
    sar_fe_data = json.load(f)

In [34]:
sar_fe_results = pd.DataFrame(
    [
        {
            "outer_split": item["outer_split"],
            "weighting_method": item["hps"]["weighting_method"],
            "hp_param": list(item["hps"].values())[1] if len(item["hps"]) > 1 else None,
            "mae": item["mae"],
            "mse": item["mse"],
            "r2": item["r2"],
        }
        for item in sar_fe_data
    ]
)

In [35]:
sar_fe_results

Unnamed: 0,outer_split,weighting_method,hp_param,mae,mse,r2
0,1,knn,5,0.0277,0.0012,0.3713
1,2,distance,1516,0.032,0.0018,0.4157
2,3,distance,1297,0.0358,0.0021,0.2968
3,4,knn,7,0.0453,0.0036,0.2459
4,5,knn,3,0.0299,0.0014,0.1173


# Dataset with feature engineering, ethnicity dummy variables removed

## ANN

In [18]:
with open("outputs/model_results/ann_fe_reduced.pkl", "rb") as f:
    ann_fe_reduced_data = pickle.load(f)

In [19]:
ann_fe_reduced_results = pd.DataFrame(
    [
        {
            "outer_split": item["outer_split"],
            "layers": item["hps"]["no_of_layers"],
            "nodes": item["hps"]["no_of_nodes"],
            "batch_size": item["hps"]["batch_size"],
            "learning_rate": item["hps"]["learning_rate"],
            "loss_function": item["hps"]["loss_function"],
            "mae": item["mae"],
            "mse": item["mse"],
            "r2": item["r2"],
        }
        for item in ann_fe_reduced_data
    ]
)

In [20]:
ann_fe_reduced_results

Unnamed: 0,outer_split,layers,nodes,batch_size,learning_rate,loss_function,mae,mse,r2
0,0,1,[5],34,0.029194,mae,0.028205,0.001248,0.333451
1,1,1,[3],35,0.04503,mse,0.030152,0.001535,0.505411
2,2,1,[6],35,0.023675,mse,0.033473,0.001875,0.359559
3,3,3,"[2, 7, 6]",42,0.012075,mse,0.041472,0.002707,0.42975
4,4,1,[5],35,0.098566,mse,0.042735,0.002527,-0.595766


## SAR

In [21]:
with open("outputs/model_results/sar_fe_reduced.json", "r") as f:
    sar_fe_reduced_data = json.load(f)

In [22]:
sar_fe_reduced_results = pd.DataFrame(
    [
        {
            "outer_split": item["outer_split"],
            "weighting_method": item["hps"]["weighting_method"],
            "hp_param": list(item["hps"].values())[1] if len(item["hps"]) > 1 else None,
            "mae": item["mae"],
            "mse": item["mse"],
            "r2": item["r2"],
        }
        for item in sar_fe_reduced_data
    ]
)

In [23]:
sar_fe_reduced_results

Unnamed: 0,outer_split,weighting_method,hp_param,mae,mse,r2
0,1,knn,5,0.0308,0.0015,0.1757
1,2,distance,1516,0.0324,0.0019,0.4
2,3,distance,1297,0.0378,0.0023,0.2099
3,4,knn,7,0.0486,0.0039,0.1719
4,5,knn,3,0.034,0.0018,-0.1149


## GWR

In [24]:
with open("outputs/model_results/gwr_fe_reduced.pkl", "rb") as f:
    gwr_fe_reduced_data = pickle.load(f)

In [25]:
gwr_fe_reduced_results = pd.DataFrame(
    [
        {
            "outer_split": item["outer_split"],
            "kernel": item["hps"]["kernel"],
            "criterion": item["hps"]["criterion"],
            "mae": item["mae"],
            "mse": item["mse"],
            "r2": item["r2"],
        }
        for item in gwr_fe_reduced_data
    ]
)

In [26]:
gwr_fe_reduced_results

Unnamed: 0,outer_split,kernel,criterion,mae,mse,r2
0,0,bisquare,AICc,0.037145,0.002275,-0.214966
1,1,bisquare,AICc,0.051282,0.004464,-0.438794
2,2,bisquare,BIC,0.047725,0.003894,-0.330233
3,3,bisquare,BIC,0.057098,0.005164,-0.087753
4,4,bisquare,CV,0.03869,0.00251,-0.58459
