# Imports

In [14]:
import numpy as np
import pandas as pd
from mgwr.gwr import GWR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [15]:
np.random.seed(42)

# Load data

In [16]:
df = pd.read_csv("datasets/5_split/df_fe.csv")

# Separate features

In [17]:
features = df.copy()

# Dependent variables
labels = features.pop('very_good_health')

# CV folds
fold_ids = features.pop("fold_id_python")
folds = np.unique(fold_ids)

# Drop unneeded features
features = features.drop(columns = ["fold_id_r"])

# Define function to get inputs for GWR

In [18]:
def get_gwr_inputs(features, labels):
    coords = np.array(list(zip(features["x_coord"], features["y_coord"])))
    target = labels.values.reshape((-1, 1))
    # predictor_cols = [col for col in features.columns if col not in ["x_coord", "y_coord"]]
    predictor_cols = ["greenspace_proportion", "imd", "f_m_ratio", "mean_age"]
    predictors = np.hstack(
        [features[col].values.reshape((-1, 1)) for col in predictor_cols]
    )
    return coords, target, predictors


# Build and evaluate model

In [19]:
# Initialise evaluation results array
evaluation_results = []

# Cross-validation loop
for fold in folds:
    print(f"\n --- Training on fold {fold} ---")

    # Separate data into training and validation sets
    is_in_validation_set = fold_ids == fold
    is_in_training_set = ~is_in_validation_set

    train_features = features.loc[is_in_training_set]
    train_labels = labels.loc[is_in_training_set]

    val_features = features.loc[is_in_validation_set]
    val_labels = labels.loc[is_in_validation_set]

    print("Getting inputs...")
    # Get inputs
    train_coords, train_target, train_predictors = get_gwr_inputs(train_features, train_labels)
    val_coords, val_target, val_predictors = get_gwr_inputs(val_features, val_labels)

    print("Building model...")
    # Build model
    model = GWR(
        train_coords,
        train_target,
        train_predictors,
        bw = 200,
        kernel = "Gaussian"
    )

    print("Getting predictions...")
    # Get predictions
    results = model.predict(
        val_coords, val_predictors
    )
    predictions = results.predy

    print("Calculating accuracy scores...")
    # Get accuracy scores
    mae = mean_absolute_error(val_labels, predictions)
    mse = mean_squared_error(val_labels, predictions)
    r2 = r2_score(val_labels, predictions)

    print("Adding scores to results...")
    # Add scores for current fold to results
    evaluation_results.append({
        "fold": fold,
        "MAE": mae,
        "MSE": mse,
        "R2": r2
    })


 --- Training on fold 0 ---
Getting inputs...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 1 ---
Getting inputs...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 2 ---
Getting inputs...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 3 ---
Getting inputs...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 4 ---
Getting inputs...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 5 ---
Getting inputs...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 6 ---
Getting inputs...
Building model...
Getting predictions...
Calculating accuracy scores...

# Print results

In [20]:
evaluation_results

[{'fold': np.int64(0),
  'MAE': 0.046119052591221135,
  'MSE': 0.00343419342452548,
  'R2': -0.1812784060097421},
 {'fold': np.int64(1),
  'MAE': 0.04267578564430943,
  'MSE': 0.0028495635446980495,
  'R2': -2.2106646805632715},
 {'fold': np.int64(2),
  'MAE': 0.03831059007868893,
  'MSE': 0.0024833665680395927,
  'R2': -0.7635159959002316},
 {'fold': np.int64(3),
  'MAE': 0.05174820047795516,
  'MSE': 0.004305438712032673,
  'R2': -0.5190676705619663},
 {'fold': np.int64(4),
  'MAE': 0.04282852276705129,
  'MSE': 0.002895896889812182,
  'R2': -1.880348920390389},
 {'fold': np.int64(5),
  'MAE': 0.048471797345910346,
  'MSE': 0.003800480391982562,
  'R2': -1.81886074722033},
 {'fold': np.int64(6),
  'MAE': 0.04340680547674356,
  'MSE': 0.0030885444112414345,
  'R2': -0.8042966336060753},
 {'fold': np.int64(7),
  'MAE': 0.04766384512183564,
  'MSE': 0.0034439940094915496,
  'R2': -1.6555842232183937},
 {'fold': np.int64(8),
  'MAE': 0.06622029721324862,
  'MSE': 0.006874284895029505,
  