# Imports

In [7]:
import numpy as np
import pandas as pd
from mgwr.gwr import GWR
from mgwr.sel_bw import Sel_BW
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [8]:
np.random.seed(42)

# Load data

In [9]:
df = pd.read_csv("datasets/5_split/df_fe.csv")

# Separate features

In [10]:
features = df.copy()

# Dependent variables
labels = features.pop('very_good_health')

# CV folds
fold_ids = features.pop("fold_id_python")
folds = np.unique(fold_ids)

# Drop unneeded features
features = features.drop(columns = ["fold_id_r"])

# Define function to get inputs for GWR

In [11]:
def get_gwr_inputs(features, labels, calculate_bandwidth = False):
    coords = np.array(list(zip(features["x_coord"], features["y_coord"])))
    target = labels.values.reshape((-1, 1))
    # predictor_cols = [col for col in features.columns if col not in ["x_coord", "y_coord"]]
    predictor_cols = ["greenspace_proportion", "imd", "f_m_ratio", "mean_age"]
    predictors = np.hstack(
        [features[col].values.reshape((-1, 1)) for col in predictor_cols]
    )
    bandwidth = None
    if calculate_bandwidth:
        print("Calculating bandwidth...")
        bandwidth = Sel_BW(coords, target, predictors).search()
    return coords, target, predictors, bandwidth


# Build and evaluate model

In [12]:
# Initialise evaluation results array
evaluation_results = []

# Cross-validation loop
for fold in folds:
    print(f"\n --- Training on fold {fold} ---")

    # Separate data into training and validation sets
    is_in_validation_set = fold_ids == fold
    is_in_training_set = ~is_in_validation_set

    train_features = features.loc[is_in_training_set]
    train_labels = labels.loc[is_in_training_set]

    val_features = features.loc[is_in_validation_set]
    val_labels = labels.loc[is_in_validation_set]

    print("Getting inputs...")
    # Get inputs
    train_coords, train_target, train_predictors, bandwidth = get_gwr_inputs(train_features, train_labels, calculate_bandwidth = True)
    val_coords, val_target, val_predictors, _ = get_gwr_inputs(val_features, val_labels)

    print("Building model...")
    # Build model
    model = GWR(
        train_coords,
        train_target,
        train_predictors,
        bw = bandwidth,
        kernel = "Gaussian"
    )

    print("Getting predictions...")
    # Get predictions
    results = model.predict(
        val_coords, val_predictors
    )
    predictions = results.predy

    print("Calculating accuracy scores...")
    # Get accuracy scores
    mae = mean_absolute_error(val_labels, predictions)
    mse = mean_squared_error(val_labels, predictions)
    r2 = r2_score(val_labels, predictions)

    print("Adding scores to results...")
    # Add scores for current fold to results
    evaluation_results.append({
        "fold": fold,
        "MAE": mae,
        "MSE": mse,
        "R2": r2
    })


 --- Training on fold 0 ---
Getting inputs...
Calculating bandwidth...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 1 ---
Getting inputs...
Calculating bandwidth...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 2 ---
Getting inputs...
Calculating bandwidth...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 3 ---
Getting inputs...
Calculating bandwidth...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 4 ---
Getting inputs...
Calculating bandwidth...
Building model...
Getting predictions...
Calculating accuracy scores...
Adding scores to results...

 --- Training on fold 5 ---
Getting inputs...
Calculating bandwidth...
Building model...
Getting predictions...
Calculating accuracy scores

# Print results

In [13]:
evaluation_results

[{'fold': np.int64(0),
  'MAE': 0.04631222084903485,
  'MSE': 0.0035334732870310145,
  'R2': -0.21542824652014025},
 {'fold': np.int64(1),
  'MAE': 0.03964742610238474,
  'MSE': 0.0025025157527455762,
  'R2': -1.81963845124388},
 {'fold': np.int64(2),
  'MAE': 0.037249271772628996,
  'MSE': 0.002359464872609838,
  'R2': -0.6755295404886086},
 {'fold': np.int64(3),
  'MAE': 0.05149424737092872,
  'MSE': 0.004294011832507372,
  'R2': -0.5150359784573317},
 {'fold': np.int64(4),
  'MAE': 0.040698334017276246,
  'MSE': 0.0026851909378744813,
  'R2': -1.670774241361348},
 {'fold': np.int64(5),
  'MAE': 0.04880765333335799,
  'MSE': 0.0038723628353104504,
  'R2': -1.8721767959858315},
 {'fold': np.int64(6),
  'MAE': 0.04437466505633912,
  'MSE': 0.0032474244503925805,
  'R2': -0.8971127571962778},
 {'fold': np.int64(7),
  'MAE': 0.046553640233379656,
  'MSE': 0.0033057863859062976,
  'R2': -1.549015517317629},
 {'fold': np.int64(8),
  'MAE': 0.06586482344476617,
  'MSE': 0.006775076700181589