# Imports

In [1]:
import numpy as np
import geopandas as gpd
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Set random seeds

In [2]:
tf.random.set_seed(42)
np.random.seed(42)

# Load data

In [3]:
train_df = gpd.read_file("datasets/4_split/train_df.gpkg")
test_df = gpd.read_file("datasets/4_split/test_df.gpkg")

In [None]:
train_features = train_df.copy()
test_features = test_df.copy()

# Dependent variables
train_labels = train_features.pop('very_good_health')
test_labels = test_features.pop('very_good_health')

# CV fold ids
train_fold_ids = train_features.pop("fold_id_python")

# Independent variables
train_features = train_features.drop(columns = ["lsoa", "good_health", "fair_health", "bad_health", "very_bad_health", "total_area", "greenspace_area", "fold_id_r", "geometry"])
test_features = test_features.drop(columns = ["lsoa", "good_health", "fair_health", "bad_health", "very_bad_health", "total_area", "greenspace_area", "geometry"])

# Define build model function

In [6]:
def build_model(normaliser):

    model = tf.keras.Sequential([
        normaliser,
        layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.L1(0.0005)),
        layers.Dense(32, activation='relu', kernel_regularizer=tf.keras.regularizers.L1(0.0005)),
        layers.Dense(1)
    ])

    model.compile(
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse'
    )

    return model

# Train and evaluate model using cross-validation

In [None]:
# Get fold numbers
unique_folds = np.unique(train_fold_ids)

# Initialise evaluation results array
fold_results = []


# Loop through folds
for fold in unique_folds:
    print(f"\n --- Training on fold {fold} ---")

    # Separate data into training and validation sets
    is_in_validation_set = train_fold_ids == fold
    is_in_training_set = ~is_in_validation_set

    fold_train_features = train_features.loc[is_in_training_set]
    fold_train_labels = train_labels.loc[is_in_training_set]

    fold_validation_features = train_features.loc[is_in_validation_set]
    fold_validation_labels = train_labels.loc[is_in_validation_set]

    # Define normalisation layer inside loop to avoid data leakage
    normaliser = tf.keras.layers.Normalization(axis = -1)
    normaliser.adapt(np.array(fold_train_features))
    
    # Build model
    model = build_model(normaliser)

    # Define early stopper inside loop as it stores state
    early_stop = EarlyStopping(
        monitor = "val_loss",
        patience = 10,
        restore_best_weights = True,
        verbose = 1
    )

    # Fit model
    model.fit(
        fold_train_features,
        fold_train_labels,
        epochs = 100,
        validation_data = (fold_validation_features, fold_validation_labels),
        callbacks = [early_stop]
    )

    # Make predictions using fitted model
    predictions = model.predict(fold_validation_features).flatten()

    # Obtain accuracy scores
    mae = mean_absolute_error(fold_validation_labels, predictions)
    mse = mean_squared_error(fold_validation_labels, predictions)
    r2 = r2_score(fold_validation_labels, predictions)

    # Add scores for current fold to results
    fold_results.append({
        "fold": fold,
        "MAE": mae,
        "MSE": mse,
        "R2": r2
    })


 --- Training on fold 0.0 ---
Epoch 1/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.6990 - val_loss: 0.6109
Epoch 2/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.5582 - val_loss: 0.5247
Epoch 3/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.4756 - val_loss: 0.4417
Epoch 4/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.3947 - val_loss: 0.3592
Epoch 5/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.3157 - val_loss: 0.2805
Epoch 6/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.2418 - val_loss: 0.2087
Epoch 7/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1768 - val_loss: 0.1493
Epoch 8/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1250 - val_loss: 0.1047
E

# Print results

In [10]:
fold_results

[{'fold': np.float64(0.0),
  'MAE': 0.029539798373056553,
  'MSE': 0.0016639818795474912,
  'R2': 0.383007946281528},
 {'fold': np.float64(1.0),
  'MAE': 0.024490011431371497,
  'MSE': 0.0009356802712935588,
  'R2': -0.005067144214509156},
 {'fold': np.float64(2.0),
  'MAE': 0.023288932291157076,
  'MSE': 0.0008592088827184198,
  'R2': 0.562619979413902},
 {'fold': np.float64(3.0),
  'MAE': 0.02464760760702782,
  'MSE': 0.0009222239181245162,
  'R2': 0.5414351311070215},
 {'fold': np.float64(4.0),
  'MAE': 0.024916927697479933,
  'MSE': 0.0009082308829704062,
  'R2': 0.3157905282898059},
 {'fold': np.float64(5.0),
  'MAE': 0.025878124612977352,
  'MSE': 0.001075389653357101,
  'R2': 0.3655032957309159},
 {'fold': np.float64(6.0),
  'MAE': 0.032774191812830686,
  'MSE': 0.0017352777410311237,
  'R2': 0.5709993493496637},
 {'fold': np.float64(7.0),
  'MAE': 0.022532375192462168,
  'MSE': 0.0007725489739096703,
  'R2': 0.5045435957167039},
 {'fold': np.float64(8.0),
  'MAE': 0.02151762912