# Imports

In [None]:
import numpy as np
import pandas as pd
from mgwr.gwr import MGWR
from mgwr.sel_bw import Sel_BW
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
np.random.seed(42)

# Load data

In [None]:
df = pd.read_csv("datasets/5_split/df_fe.csv")

# Separate features

In [None]:
features = df.copy()

# Dependent variables
labels = features.pop("very_good_health")

# CV folds
fold_ids = features.pop("fold_id_python")
folds = np.unique(fold_ids)

# Drop unneeded features
features = features.drop(columns=["fold_id_r"])

# Define function to get inputs for MGWR

In [None]:
def get_gwr_inputs(features, labels, calculate_bandwidths=False):
    coords = np.array(list(zip(features["x_coord"], features["y_coord"])))
    target = labels.values.reshape((-1, 1))
    # predictor_cols = [col for col in features.columns if col not in ["x_coord", "y_coord"]]
    predictor_cols = ["greenspace_proportion", "imd", "f_m_ratio", "mean_age"]
    predictors = np.hstack(
        [features[col].values.reshape((-1, 1)) for col in predictor_cols]
    )
    mgwr_bandwidths = None
    if calculate_bandwidths:
        print("Calculating bandwidths...")
        mgwr_bandwidths = Sel_BW(
            coords, target, predictors, kernel="Exponential", multi=True
        ).search()  # Calculate separate bandwidth for each feature
    return coords, target, predictors, mgwr_bandwidths

# Build and evaluate model

In [None]:
# Initialise evaluation results array
evaluation_results = []

# Cross-validation loop
for fold in folds:
    print(f"\n --- Training on fold {fold} ---")

    # Separate data into training and validation sets
    is_in_validation_set = fold_ids == fold
    is_in_training_set = ~is_in_validation_set

    train_features = features.loc[is_in_training_set]
    train_labels = labels.loc[is_in_training_set]

    val_features = features.loc[is_in_validation_set]
    val_labels = labels.loc[is_in_validation_set]

    # Get inputs
    print("Getting inputs...")
    train_coords, train_target, train_predictors, bandwidths = get_gwr_inputs(
        train_features, train_labels, calculate_bandwidths=True
    )
    val_coords, val_target, val_predictors, _ = get_gwr_inputs(val_features, val_labels)

    # Build model
    print("Building model...")
    model = MGWR(
        train_coords,
        train_target,
        train_predictors,
        selector=bandwidths,
        kernel="Gaussian",
    )

    # Get predictions
    print("Getting predictions...")
    results = model.predict(val_coords, val_predictors)
    predictions = results.predy

    # Get accuracy scores
    print("Calculating accuracy scores...")
    mae = mean_absolute_error(val_labels, predictions)
    mse = mean_squared_error(val_labels, predictions)
    r2 = r2_score(val_labels, predictions)

    # Add scores for current fold to results
    print("Adding scores to results...")
    evaluation_results.append({"fold": fold, "MAE": mae, "MSE": mse, "R2": r2})

# Print results

In [None]:
evaluation_results