## Spatial

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.dummy import DummyRegressor

from sklearn.metrics import mean_absolute_error

import os
import pandas as pd
maes_dummy = []
maes_gpr = []
data_folder = r'../../survey_processing/processed_data/'
for i in range(5):
    fold = i + 1
    print('Fold', fold)
    target='deprived_sev'
    train_df = pd.read_csv(f'{data_folder}train_fold_{fold}.csv', index_col=0).dropna(subset = [target])
    test_df = pd.read_csv(f'{data_folder}test_fold_{fold}.csv', index_col=0).dropna(subset = [target])


    X_test, y_test = test_df[['LATNUM', 'LONGNUM']], test_df[target]
    X_train, y_train = train_df[['LATNUM', 'LONGNUM']], train_df[target]
    dummy = DummyRegressor()
    gpr = make_pipeline(StandardScaler(), GaussianProcessRegressor())

    dummy.fit(X_train, y_train)
    gpr.fit(X_train, y_train)
    # Predict on test data
    y_pred_dummy = dummy.predict(X_test)
    y_pred_gpr = gpr.predict(X_test)

    # Evaluate the model using Mean Absolute Error (MAE)
    mae_dummy = mean_absolute_error(y_test, y_pred_dummy)
    mae_gpr = mean_absolute_error(y_test, y_pred_gpr)
    maes_dummy.append(mae_dummy)
    maes_gpr.append(mae_gpr)
    print("Mean Absolute Error on Test Set (Mean Prediction):", mae_dummy)
    print("Mean Absolute Error on Test Set (GPR):", mae_gpr)


Fold 1
Mean Absolute Error on Test Set (Mean Prediction): 0.28993896515870826
Mean Absolute Error on Test Set (GPR): 0.24367737636565168
Fold 2
Mean Absolute Error on Test Set (Mean Prediction): 0.2974678297958738
Mean Absolute Error on Test Set (GPR): 0.24340490709614693
Fold 3
Mean Absolute Error on Test Set (Mean Prediction): 0.29090025463197977
Mean Absolute Error on Test Set (GPR): 0.24351044205309838
Fold 4
Mean Absolute Error on Test Set (Mean Prediction): 0.298344487596565
Mean Absolute Error on Test Set (GPR): 0.24446421775968433
Fold 5
Mean Absolute Error on Test Set (Mean Prediction): 0.28851258984629424
Mean Absolute Error on Test Set (GPR): 0.24299212737466608


In [2]:
import numpy as np
print(np.mean(maes_dummy), np.std(maes_dummy)/np.sqrt(5))
print(np.mean(maes_gpr), np.std(maes_gpr)/np.sqrt(5))

0.29303282540588416 0.0018158786084546872
0.24360981412984944 0.0002161452148912077


## Temporal

In [1]:
from sklearn.svm import SVR
from sklearn.dummy import DummyRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import  RandomForestRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, RationalQuadratic
from sklearn.metrics import mean_absolute_error
import pandas as pd
target = 'deprived_sev'
data_folder = r'../../survey_processing/processed_data/'
# Define the kernel: A constant kernel multiplied by an RBF kernel, plus a Rational Quadratic kernel
kernel = ConstantKernel(1.0) * RBF(length_scale=1.0) + RationalQuadratic(length_scale=1.0, alpha=0.1)

train_df = pd.read_csv(f'{data_folder}before_2020.csv', index_col=0).dropna(subset = [target])
test_df = pd.read_csv(f'{data_folder}after_2020.csv', index_col=0).dropna(subset = [target])

X_train, y_train = train_df[['LATNUM', 'LONGNUM']], train_df[target]
X_test, y_test = test_df[['LATNUM', 'LONGNUM']], test_df[target]
dummy = DummyRegressor()
gpr = make_pipeline(StandardScaler(), GaussianProcessRegressor(kernel=kernel))

dummy.fit(X_train, y_train)
gpr.fit(X_train, y_train)
# Predict on test data
y_pred_dummy = dummy.predict(X_test)
y_pred_gpr = gpr.predict(X_test)

# Evaluate the model using Mean Absolute Error (MAE)
mae_dummy = mean_absolute_error(y_test, y_pred_dummy)
mae_gpr = mean_absolute_error(y_test, y_pred_gpr)
maes_dummy.append(mae_dummy)
maes_gpr.append(mae_gpr)
print("Mean Absolute Error on Test Set (Mean Prediction):", mae_dummy)
print("Mean Absolute Error on Test Set (GPR):", mae_gpr)