# Imports

In [63]:
import numpy as np
import pandas as pd
from mgwr.gwr import GWR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load data

In [64]:
df = pd.read_csv("datasets/5_split/df_full.csv")

In [65]:
features = df.copy()

# Dependent variables
label = features.pop('very_good_health')

# CV fold ids
fold_ids = features.pop("fold_id_python")
folds = np.unique(fold_ids)

# Drop unneeded features
features = features.drop(columns = ["fold_id_r"])

In [66]:
# Initialise evaluation results array
fold_results = []

# Loop through folds
for fold in folds:
    print(f"\n --- Training on fold {fold} ---")

    # Separate data into training and validation sets
    is_in_validation_set = fold_ids == fold
    is_in_training_set = ~is_in_validation_set

    train_features = features.loc[is_in_training_set]
    train_labels = label.loc[is_in_training_set]

    validation_features = features.loc[is_in_validation_set]
    validation_labels = label.loc[is_in_validation_set]

    # Training variables
    train_coords = list(zip(train_features["x_coord"], train_features["y_coord"]))
    train_target = train_labels.values.reshape((-1, 1))
    train_greenspace = train_features["greenspace_proportion"].values.reshape((-1, 1))
    train_imd = train_features["imd"].values.reshape((-1, 1))
    train_predictors = np.hstack([train_greenspace, train_imd])

    # Validation variables
    val_coords = np.array(list(zip(validation_features["x_coord"], validation_features["y_coord"])))
    val_target = validation_labels.values.reshape((-1, 1))
    val_greenspace = validation_features["greenspace_proportion"].values.reshape((-1, 1))
    val_imd = validation_features["imd"].values.reshape((-1, 1))
    val_predictors = np.hstack([val_greenspace, val_imd])

    # Build model
    model = GWR(
        train_coords,
        train_target,
        train_predictors,
        bw = 90
    )

    # Fit model
    results = model.predict(
        val_coords, val_predictors
    )

    predictions = results.predy

        # Obtain accuracy scores
    mae = mean_absolute_error(validation_labels, predictions)
    mse = mean_squared_error(validation_labels, predictions)
    r2 = r2_score(validation_labels, predictions)

    # Add scores for current fold to results
    fold_results.append({
        "fold": fold,
        "MAE": mae,
        "MSE": mse,
        "R2": r2
    })


 --- Training on fold 0 ---

 --- Training on fold 1 ---

 --- Training on fold 2 ---

 --- Training on fold 3 ---

 --- Training on fold 4 ---

 --- Training on fold 5 ---

 --- Training on fold 6 ---

 --- Training on fold 7 ---

 --- Training on fold 8 ---

 --- Training on fold 9 ---


In [69]:
fold_results

[{'fold': np.int64(0),
  'MAE': 0.0543053963442819,
  'MSE': 0.004961383996704583,
  'R2': -0.7065945492104151},
 {'fold': np.int64(1),
  'MAE': 0.027123682047136128,
  'MSE': 0.0011920652751800636,
  'R2': -0.34312564570379656},
 {'fold': np.int64(2),
  'MAE': 0.034871033290712894,
  'MSE': 0.0019299599473334742,
  'R2': -0.3705247072147455},
 {'fold': np.int64(3),
  'MAE': 0.057497451691062164,
  'MSE': 0.005460269217379997,
  'R2': -0.9265210807682489},
 {'fold': np.int64(4),
  'MAE': 0.052492172523303855,
  'MSE': 0.004533063023619785,
  'R2': -3.5087251663133836},
 {'fold': np.int64(5),
  'MAE': 0.04853494395942475,
  'MSE': 0.0037537493249751677,
  'R2': -1.7841997683765083},
 {'fold': np.int64(6),
  'MAE': 0.045763562422018525,
  'MSE': 0.0035573390457153274,
  'R2': -1.0781617519949185},
 {'fold': np.int64(7),
  'MAE': 0.03549443606370346,
  'MSE': 0.002070647449677877,
  'R2': -0.5966284157456418},
 {'fold': np.int64(8),
  'MAE': 0.0651703715960313,
  'MSE': 0.0067402299025904