# Imports

In [1]:
import numpy as np
import geopandas as gpd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import r2_score

# Load dataset

In [2]:
raw_df = gpd.read_file("datasets/combined/lsoa_greenspace.gpkg")

In [3]:
df = raw_df.copy()

In [4]:
df["x_coord"] = df["geometry"].centroid.x
df["y_coord"] = df["geometry"].centroid.y

In [5]:
df = df.drop(columns = ["lsoa", "geometry", "good_health", "fair_health", "bad_health", "very_bad_health", "total_area", "greenspace_area"])


# Split data

In [6]:
train_df = df.sample(frac=0.8, random_state=0)
test_df = df.drop(train_df.index)

train_features = train_df.copy()
test_features = test_df.copy()

train_labels = train_features.pop('very_good_health')
test_labels = test_features.pop('very_good_health')

# Normalise features

In [7]:
normaliser = tf.keras.layers.Normalization(axis=-1)

In [8]:
normaliser.adapt(np.array(train_features))

In [9]:
model = tf.keras.Sequential([
    normaliser,
    layers.Dense(units=1)
])

In [10]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error'
)

In [11]:
model.fit(
    train_features,
    train_labels,
    epochs=100,
    verbose=0,
    validation_split=0.2
)

<keras.src.callbacks.history.History at 0x27157c3d940>

In [12]:
test_loss = model.evaluate(
    test_features,
    test_labels
)

[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.2904 


In [13]:
predictions = model.predict(test_features)
r2 = r2_score(test_labels, predictions)

[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [14]:
print(f"MAE: {test_loss}")
print(f"R2: {r2}")

MAE: 1.2903989553451538
R2: -932.9156330948115
