# Imports

In [1]:
import numpy as np
import geopandas as gpd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import r2_score

# Load dataset

In [2]:
raw_df = gpd.read_file("datasets/combined/lsoa_greenspace.gpkg")

In [3]:
df = raw_df.copy()

In [4]:
df["x_coord"] = df["geometry"].centroid.x
df["y_coord"] = df["geometry"].centroid.y

In [5]:
df = df.drop(columns = ["lsoa", "geometry", "good_health", "fair_health", "bad_health", "very_bad_health", "total_area", "greenspace_area"])


# Split data

In [6]:
train_df = df.sample(frac=0.8, random_state=0)
test_df = df.drop(train_df.index)

train_features = train_df.copy()
test_features = test_df.copy()

train_labels = train_features.pop('very_good_health')
test_labels = test_features.pop('very_good_health')

# Normalise features

In [7]:
normaliser = tf.keras.layers.Normalization(axis=-1)
normaliser.adapt(np.array(train_features))

# Specify model architecture

In [8]:
model = tf.keras.Sequential([
    normaliser,
    layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.L1(0.0005)),
    layers.Dense(32, activation='relu', kernel_regularizer=tf.keras.regularizers.L1(0.0005)),
    layers.Dense(1)
])

# Set model hyperparameters

In [9]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mean_absolute_error'
)

# Train model

In [10]:
%%time
history = model.fit(
    train_features,
    train_labels,
    epochs=200,
    batch_size=32,
    validation_split=0.2,
    verbose=1,
)

Epoch 1/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.8831 - val_loss: 0.7727
Epoch 2/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.6936 - val_loss: 0.6838
Epoch 3/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.6109 - val_loss: 0.6157
Epoch 4/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.5461 - val_loss: 0.5548
Epoch 5/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4916 - val_loss: 0.5016
Epoch 6/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4420 - val_loss: 0.4522
Epoch 7/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.3965 - val_loss: 0.4047
Epoch 8/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.3536 - val_loss: 0.3594
Epoch 9/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━

# Calculate performance metrics

In [11]:
final_validation_loss = history.history['val_loss'][-1]

test_loss = model.evaluate(
    test_features,
    test_labels
)

predictions = model.predict(test_features)
r2 = r2_score(test_labels, predictions)

[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0274 
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


# Print performance metrics

In [12]:
print(f"Final validation loss: {final_validation_loss}")
print(f"MAE: {test_loss}")
print(f"R2: {r2}")

Final validation loss: 0.02884526364505291
MAE: 0.027417872101068497
R2: 0.7463890214768185
