# Imports

In [1]:
import numpy as np
import geopandas as gpd
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Set random seeds

In [2]:
tf.random.set_seed(42)
np.random.seed(42)

# Load data

In [3]:
train_df = gpd.read_file("datasets/4_split/train_df.gpkg")
test_df = gpd.read_file("datasets/4_split/test_df.gpkg")

In [4]:
train_features = train_df.copy()
test_features = test_df.copy()

# Dependent variables
train_labels = train_features.pop('very_good_health')
test_labels = test_features.pop('very_good_health')

# CV fold ids
train_fold_ids = train_features.pop("fold_id_python")

# Independent variables
train_features = train_features.drop(columns = ["lsoa", "good_health", "fair_health", "bad_health", "very_bad_health", "total_area", "greenspace_area", "fold_id_r", "geometry"])
test_features = test_features.drop(columns = ["lsoa", "good_health", "fair_health", "bad_health", "very_bad_health", "total_area", "greenspace_area", "geometry"])

# Define build model function

In [5]:
def build_model(normaliser):

    model = tf.keras.Sequential([
        normaliser,
        layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.L1(0.0005)),
        layers.Dense(32, activation='relu', kernel_regularizer=tf.keras.regularizers.L1(0.0005)),
        layers.Dense(1)
    ])

    model.compile(
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse'
    )

    return model

# Train and evaluate model using cross-validation

In [6]:
# Get fold numbers
unique_folds = np.unique(train_fold_ids)

# Initialise evaluation results array
fold_results = []


# Loop through folds
for fold in unique_folds:
    print(f"\n --- Training on fold {fold} ---")

    # Separate data into training and validation sets
    is_in_validation_set = train_fold_ids == fold
    is_in_training_set = ~is_in_validation_set

    fold_train_features = train_features.loc[is_in_training_set]
    fold_train_labels = train_labels.loc[is_in_training_set]

    fold_validation_features = train_features.loc[is_in_validation_set]
    fold_validation_labels = train_labels.loc[is_in_validation_set]

    # Define normalisation layer inside loop to avoid data leakage
    normaliser = tf.keras.layers.Normalization(axis = -1)
    normaliser.adapt(np.array(fold_train_features))
    
    # Build model
    model = build_model(normaliser)

    # Define early stopper inside loop as it stores state
    early_stop = EarlyStopping(
        monitor = "val_loss",
        patience = 10,
        restore_best_weights = True,
        verbose = 1
    )

    # Fit model
    model.fit(
        fold_train_features,
        fold_train_labels,
        epochs = 100,
        validation_data = (fold_validation_features, fold_validation_labels),
        callbacks = [early_stop]
    )

    # Make predictions using fitted model
    predictions = model.predict(fold_validation_features).flatten()

    # Obtain accuracy scores
    mae = mean_absolute_error(fold_validation_labels, predictions)
    mse = mean_squared_error(fold_validation_labels, predictions)
    r2 = r2_score(fold_validation_labels, predictions)

    # Add scores for current fold to results
    fold_results.append({
        "fold": fold,
        "MAE": mae,
        "MSE": mse,
        "R2": r2
    })


 --- Training on fold 0.0 ---
Epoch 1/100
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 0.7347 - val_loss: 0.6223
Epoch 2/100
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.5791 - val_loss: 0.5497
Epoch 3/100
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.5045 - val_loss: 0.4788
Epoch 4/100
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4346 - val_loss: 0.4077
Epoch 5/100
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.3658 - val_loss: 0.3372
Epoch 6/100
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.2989 - val_loss: 0.2695
Epoch 7/100
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.2357 - val_loss: 0.2076
Epoch 8/100
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.1792 - val_loss: 0.1549
E

# Print results

In [7]:
fold_results

[{'fold': np.float64(0.0),
  'MAE': 0.02696391267506315,
  'MSE': 0.001080190800275806,
  'R2': 0.012387273078673045},
 {'fold': np.float64(1.0),
  'MAE': 0.02464305059069036,
  'MSE': 0.0009299375999901865,
  'R2': 0.5376177539353587},
 {'fold': np.float64(2.0),
  'MAE': 0.031989786772239286,
  'MSE': 0.0019227339629300357,
  'R2': 0.3826028368466535},
 {'fold': np.float64(3.0),
  'MAE': 0.02291154293839065,
  'MSE': 0.0008430355532936767,
  'R2': 0.05317509571133028},
 {'fold': np.float64(4.0),
  'MAE': 0.02172983180497423,
  'MSE': 0.0007680797642253032,
  'R2': 0.160161186086742},
 {'fold': np.float64(5.0),
  'MAE': 0.02354507439092407,
  'MSE': 0.0008535188776997366,
  'R2': 0.5115630101056532},
 {'fold': np.float64(6.0),
  'MAE': 0.02456099611529307,
  'MSE': 0.0009434449277424541,
  'R2': 0.6502037857771149},
 {'fold': np.float64(7.0),
  'MAE': 0.02004308203014212,
  'MSE': 0.0006366672593797174,
  'R2': 0.42127151493939663},
 {'fold': np.float64(8.0),
  'MAE': 0.030397515411979