# Imports

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import r2_score

# Load dataset

In [4]:
raw_train_df = pd.read_csv("datasets/split/train_df.csv")
raw_test_df = pd.read_csv("datasets/split/test_df.csv")

In [5]:
train_df = raw_train_df.copy()
test_df = raw_test_df.copy()

# Split data

In [6]:
train_features = train_df.copy()
test_features = test_df.copy()

train_labels = train_features.pop('very_good_health')
test_labels = test_features.pop('very_good_health')

# Normalise features

In [7]:
normaliser = tf.keras.layers.Normalization(axis=-1)
normaliser.adapt(np.array(train_features))

# Specify model architecture

In [8]:
model = tf.keras.Sequential([
    normaliser,
    layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.L1(0.0005)),
    layers.Dense(32, activation='relu', kernel_regularizer=tf.keras.regularizers.L1(0.0005)),
    layers.Dense(1)
])

# Set model hyperparameters

In [9]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mean_absolute_error'
)

# Train model

In [10]:
%%time
history = model.fit(
    train_features,
    train_labels,
    epochs=200,
    batch_size=32,
    validation_split=0.2,
    verbose=1,
)

Epoch 1/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.9884 - val_loss: 0.7942
Epoch 2/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.7183 - val_loss: 0.6904
Epoch 3/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.6209 - val_loss: 0.6167
Epoch 4/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.5514 - val_loss: 0.5595
Epoch 5/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4942 - val_loss: 0.5058
Epoch 6/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4441 - val_loss: 0.4553
Epoch 7/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4006 - val_loss: 0.4082
Epoch 8/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.3575 - val_loss: 0.3636
Epoch 9/200
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━

# Calculate performance metrics

In [11]:
final_validation_loss = history.history['val_loss'][-1]

test_loss = model.evaluate(
    test_features,
    test_labels
)

predictions = model.predict(test_features)
r2 = r2_score(test_labels, predictions)

[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0269 
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


# Print performance metrics

In [12]:
print(f"Final validation loss: {final_validation_loss}")
print(f"MAE: {test_loss}")
print(f"R2: {r2}")

Final validation loss: 0.028177406638860703
MAE: 0.02686399593949318
R2: 0.7384080380610439
