# Causal model of urban heat intensity (UHI) using OLS

### Initialization

In [4]:
# import general packages
import numpy as np
import pandas as pd
import geopandas as gpd
import warnings
import os
import pickle
import yaml
import statsmodels.api as sm

from sklearn.preprocessing import PolynomialFeatures
from pysal.lib import weights
from scipy import stats
from scipy.stats import f
from scipy.spatial.distance import cdist

In [2]:
# ignore warnings & adjust location
warnings.filterwarnings("ignore")
# home_directory = os.path.expanduser( '~' )
home_directory = 'C:/Users/stefan/OneDrive - bwedu/04_semester/DS_Project/'
os.chdir(home_directory + '/DS_Project/modules')
config_path = 'config.yml'
with open(config_path, 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
# path = config['data']['data'] + '/uhi_model/'
path = home_directory
path_raw = path + 'raw/'
path_visual = path + 'visual/'
path_grid = path + 'grid/'
path_model = path + 'model/'

In [3]:
from models.UHI_modeling.UHI import *

### Data loading and preparation

In [4]:
# define model parameters for final computation
mode = "poly"
all = False

In [5]:
# load features and target
grid_size_meters = 250
with open(path + 'final_' + str(grid_size_meters) + '_e.pkl', 'rb') as file:
    final = pd.read_pickle(file)
final = final[['geometry','id','nLST','wLST','impervious','building','low vegetation','water','trees','road','avg_height']]
final.head()

Unnamed: 0,geometry,id,nLST,wLST,impervious,building,low vegetation,water,trees,road,avg_height
0,"POLYGON ((11.40587 48.04366, 11.40587 48.04591...",1000001,31.156486,31.127429,0.13127,0.0,0.074685,0.0,0.789707,0.004335,0.0
1,"POLYGON ((11.40587 48.04815, 11.40923 48.04815...",1000002,33.57891,33.665254,0.059593,0.0,0.42682,0.0,0.496387,0.017202,0.0
2,"POLYGON ((11.40587 48.05040, 11.40923 48.05040...",1000003,35.387467,35.400349,0.0,0.0,0.976624,0.0,0.0,0.023376,0.0
3,"POLYGON ((11.40587 48.05264, 11.40923 48.05264...",1000004,34.819586,34.901132,0.0,0.0,0.983499,0.0,0.0,0.016501,0.0
4,"POLYGON ((11.40587 48.05489, 11.40923 48.05489...",1000005,33.233571,33.182384,0.000732,9.9e-05,0.980496,0.0,0.0,0.018674,0.00876


In [6]:
# add constant and define features
final = sm.add_constant(final)
features_interact = ['building','low vegetation','water','trees','road']
final = add_feature_lags(final, features=features_interact)
features_no_interact = ['const','avg_height','lag_building','lag_low vegetation','lag_water','lag_trees','lag_road']
lst = "wLST"
target = final[lst]
target_log = np.log(final[lst])

In [7]:
X_log_all = create_log_interactions(final, features_interact, features_no_interact)
X_log = create_log_interactions(final, features_interact, features_no_interact, all=False)
X_poly = create_polynomials(final, features_interact, features_no_interact)

### Modeling

In [8]:
# initialize and fit models
model_init_poly = sm.OLS(target, X_poly)
model_poly = model_init_poly.fit(cov_type='HC3')

model_init_log = sm.OLS(target_log, X_log)
model_log = model_init_log.fit(cov_type='HC3')

model_init_log_all = sm.OLS(target, X_log_all)
model_log_all = model_init_log_all.fit(cov_type='HC3')

In [9]:
model_log.summary()

0,1,2,3
Dep. Variable:,wLST,R-squared:,0.825
Model:,OLS,Adj. R-squared:,0.824
Method:,Least Squares,F-statistic:,2254.0
Date:,"Tue, 11 Jul 2023",Prob (F-statistic):,0.0
Time:,14:04:19,Log-Likelihood:,15631.0
No. Observations:,8528,AIC:,-31220.0
Df Residuals:,8506,BIC:,-31060.0
Df Model:,21,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
building,0.0349,0.004,8.571,0.000,0.027,0.043
low vegetation,0.0017,0.003,0.520,0.603,-0.005,0.008
water,-0.0262,0.004,-7.440,0.000,-0.033,-0.019
trees,-0.0157,0.003,-4.561,0.000,-0.022,-0.009
road,-0.0064,0.003,-2.065,0.039,-0.012,-0.000
building low vegetation,-0.0032,0.001,-6.063,0.000,-0.004,-0.002
building water,0.0014,0.001,1.987,0.047,1.87e-05,0.003
building trees,-0.0021,0.001,-2.641,0.008,-0.004,-0.001
building road,-0.0039,0.001,-6.975,0.000,-0.005,-0.003

0,1,2,3
Omnibus:,570.422,Durbin-Watson:,0.476
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1481.609
Skew:,-0.385,Prob(JB):,0.0
Kurtosis:,4.891,Cond. No.,527.0


In [10]:
# compuate average marginal effects for all features of interest
features = ['building', 'low vegetation', 'water', 'trees', 'road']

model = model_poly
mode = "poly"
all = False

for feature in features:
    avg_marginal_effect = compute_avg_marginal_effect(model, final, feature, features_interact, features_no_interact, mode="poly", all=True, delta=0.001, step=1)
    f_statistic, p_value = test_joint_significance(model, final, features_interact, features_no_interact, target, features_exclude=[feature], mode="poly", all=True)
    print(f"Average marginal effect for '{feature}': {np.round(avg_marginal_effect,4)}")
    print(f"p-value of joint significance test for '{feature}': {np.round(p_value,10)}")

Average marginal effect for 'building': 3.7565
p-value of joint significance test for 'building': 0.0
Average marginal effect for 'low vegetation': -0.342
p-value of joint significance test for 'low vegetation': 0.0
Average marginal effect for 'water': -3.9831
p-value of joint significance test for 'water': 0.0
Average marginal effect for 'trees': -2.2626
p-value of joint significance test for 'trees': 0.0
Average marginal effect for 'road': 1.3139
p-value of joint significance test for 'road': 0.0


In [11]:
# exemplary prediction
id_example = 1000132
feature = 'trees'
example = final[final.id == id_example]
print('Example temperature value: \n', np.round(target[final.id == id_example].item(),2))
print('OLS temperature prediction: \n', np.round(predict_LST_example(example, features_interact, features_no_interact, model, mode="poly", all=True),2))
example[feature] += 0.01
print('OLS temperature prediction after delta: \n', np.round(predict_LST_example(example, features_interact, features_no_interact, model, mode="poly", all=True),2))

Example temperature value: 
 36.73
OLS temperature prediction: 
 37.01
OLS temperature prediction after delta: 
 36.99


In [12]:
# add predictions to final dataframe
# final['pred'] = model_fit.predict(X_poly)

In [13]:
with open(path_model + 'Causal_Model_' + str(grid_size_meters) + '_e.pkl', 'wb') as file:
    pickle.dump(model, file)

In [14]:
with open(path + 'gpd_' + str(grid_size_meters) + '_e.pkl', 'wb') as file:
    pickle.dump(final, file)