# Imports

In [25]:
import numpy as np
import pandas as pd
import geopandas as gpd
import tensorflow as tf
import tensorflow.keras as keras
import libpysal.weights as weights
import pysal.explore as esda
from utils.db_utils import engine

# Set random seed

In [26]:
tf.random.set_seed(42)
np.random.seed(42)

# Load data

In [27]:
raw_df = gpd.read_postgis("SELECT * FROM engineered_dataset", engine, "geometry")

# Separate features

In [28]:
features = raw_df.copy()
features["x_coord"] = features["geometry"].centroid.x
features["y_coord"] = features["geometry"].centroid.y
features = features.drop(columns=["geometry"])
labels = features.pop("very_good_health")

# Build model

In [29]:
scores = []

In [30]:
for i in range(10):

    # Build model
    normaliser = keras.layers.Normalization(axis=-1)
    normaliser.adapt(np.array(features))

    model = keras.Sequential([
        normaliser,
        keras.layers.Dense(19, activation="relu"),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(24, activation="relu"),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1)
    ])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.061224), loss="mse"
    )

    # Build early stopper
    early_stopper = keras.callbacks.EarlyStopping(
        monitor="val_loss", patience=20, restore_best_weights=True
    )

    # Fit model
    model.fit(
        features,
        labels,
        batch_size=36,
        epochs=200,
        validation_split=0.2,
        callbacks=[early_stopper],
        verbose=1,
    )

    # Calculate Moran's I of residuals and add to scores
    predictions = model.predict(features).flatten()
    residuals = labels - predictions
    features["residuals"] = residuals
    w = weights.KNN.from_dataframe(raw_df, k=8)
    moran = esda.esda.Moran(features["residuals"], w)
    scores.append(moran.I)

Epoch 1/200
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.0522 - val_loss: 0.0037
Epoch 2/200
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0028 - val_loss: 0.0039
Epoch 3/200
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0027 - val_loss: 0.0037
Epoch 4/200
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0027 - val_loss: 0.0040
Epoch 5/200
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0026 - val_loss: 0.0033
Epoch 6/200
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0024 - val_loss: 0.0033
Epoch 7/200
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0027 - val_loss: 0.0034
Epoch 8/200
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0024 - val_loss: 0.0030
Epoch 9/200
[1m104/104[0m [32

# Calculate Moran's I mean and standard deviation

In [31]:
scores_df = pd.DataFrame(scores, columns = ["Moran's I"])
scores_df.loc["Mean"] = np.mean(scores)
scores_df.loc["Std dev"] = np.std(scores)
scores_df["Moran's I"] = scores_df["Moran's I"].round(6)

In [32]:
scores_df

Unnamed: 0,Moran's I
0,0.548793
1,0.269844
2,0.232499
3,0.37769
4,0.14028
5,0.244089
6,0.229919
7,0.587092
8,0.346291
9,0.244404


# Save output

In [33]:
scores_df.to_csv("outputs/data_analyses/regularised_moran.csv")