# Causal model of urban heat intensity (UHI) using OLS

### Initialization

In [16]:
# import general packages
import numpy as np
import pandas as pd
import geopandas as gpd
import warnings
import os
import pickle
import yaml
import statsmodels.api as sm

from sklearn.preprocessing import PolynomialFeatures
import pysal.lib
from pysal.model import spreg
from pysal.lib import weights
from scipy import stats
from scipy.stats import f
from scipy.spatial.distance import cdist

In [2]:
# ignore warnings & adjust location
warnings.filterwarnings("ignore")
# home_directory = os.path.expanduser( '~' )
home_directory = 'C:/Users/stefan/OneDrive - bwedu/04_semester/DS_Project/'
os.chdir(home_directory + '/DS_Project/modules')
config_path = 'config.yml'
with open(config_path, 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
# path = config['data']['data'] + '/uhi_model/'
path = home_directory
path_raw = path + 'raw/'
path_visual = path + 'visual/'
path_grid = path + 'grid/'
path_model = path + 'model/'

In [3]:
from models.UHI_modeling.UHI import *

### Data loading and preparation

In [4]:
# load features and target
grid_size_meters = 250
with open(path + 'final_' + str(grid_size_meters) + '_d.pkl', 'rb') as file:
    final = pd.read_pickle(file)
final = final[['geometry','id','nLST','wLST','impervious','building','low vegetation','water','trees','road','avg_height']]
final.head()

Unnamed: 0,geometry,id,nLST,wLST,impervious,building,low vegetation,water,trees,road,avg_height
0,"POLYGON ((11.40587 48.04366, 11.40587 48.04591...",1000001,31.156486,31.127429,0.13127,0.0,0.074685,0.0,0.789707,0.004335,0.0
1,"POLYGON ((11.40587 48.04815, 11.40923 48.04815...",1000002,33.57891,33.665254,0.059593,0.0,0.42682,0.0,0.496387,0.017202,0.0
2,"POLYGON ((11.40587 48.05040, 11.40923 48.05040...",1000003,35.387467,35.400349,0.0,0.0,0.976624,0.0,0.0,0.023376,0.0
3,"POLYGON ((11.40587 48.05264, 11.40923 48.05264...",1000004,34.819586,34.901132,0.0,0.0,0.983499,0.0,0.0,0.016501,0.0
4,"POLYGON ((11.40587 48.05489, 11.40923 48.05489...",1000005,33.233571,33.182384,0.000732,9.9e-05,0.980496,0.0,0.0,0.018674,0.00876


In [5]:
# add constant and define features
final = sm.add_constant(final)
features = ['const','building','low vegetation','water','trees','road','avg_height']
features_interact = ['building','low vegetation','water','trees','road']
final = add_feature_lags(final, features=features_interact)
features_no_interact = ['const','avg_height','lag_building','lag_low vegetation','lag_water','lag_trees','lag_road']
target = "wLST"

In [6]:
# create polynomials
X_poly = create_polynomials(final, features_interact, features_no_interact)
X_poly.head()

Unnamed: 0,building,low vegetation,water,trees,road,building^2,building low vegetation,building water,building trees,building road,...,trees^2,trees road,road^2,const,avg_height,lag_building,lag_low vegetation,lag_water,lag_trees,lag_road
0,0.0,0.074685,0.0,0.789707,0.004335,0.0,0.0,0.0,0.0,0.0,...,0.623637,0.003423,1.9e-05,1.0,0.0,0.0,0.424976,0.0,0.46649,0.011462
1,0.0,0.42682,0.0,0.496387,0.017202,0.0,0.0,0.0,0.0,0.0,...,0.2464,0.008539,0.000296,1.0,0.0,0.0,0.503348,0.0,0.390225,0.009505
2,0.0,0.976624,0.0,0.0,0.023376,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000546,1.0,0.0,1.2e-05,0.553788,0.0,0.332636,0.011046
3,0.0,0.983499,0.0,0.0,0.016501,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000272,1.0,0.0,0.007417,0.753419,0.0,0.173842,0.013784
4,9.9e-05,0.980496,0.0,0.0,0.018674,9.827883e-09,9.7e-05,0.0,0.0,2e-06,...,0.0,0.0,0.000349,1.0,0.00876,0.01324,0.903188,1.2e-05,0.046311,0.015133


### Modeling

In [7]:
# initialize and fit model
model_init = sm.OLS(final[target], X_poly)
model_fit = model_init.fit(cov_type='HC3')

In [8]:
# print model summary
model_fit.summary()

0,1,2,3
Dep. Variable:,wLST,R-squared:,0.819
Model:,OLS,Adj. R-squared:,0.818
Method:,Least Squares,F-statistic:,2158.0
Date:,"Sat, 08 Jul 2023",Prob (F-statistic):,0.0
Time:,16:35:13,Log-Likelihood:,-14728.0
No. Observations:,8528,AIC:,29510.0
Df Residuals:,8501,BIC:,29700.0
Df Model:,26,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
building,22.3318,3.087,7.233,0.000,16.281,28.383
low vegetation,13.2374,2.164,6.118,0.000,8.997,17.478
water,1.7010,4.725,0.360,0.719,-7.560,10.962
trees,-4.1602,1.813,-2.295,0.022,-7.714,-0.607
road,9.9824,2.782,3.588,0.000,4.530,15.435
building^2,-20.4026,3.426,-5.954,0.000,-27.118,-13.687
building low vegetation,-41.4164,5.807,-7.133,0.000,-52.797,-30.036
building water,-12.9424,9.134,-1.417,0.157,-30.845,4.960
building trees,-7.5712,3.965,-1.910,0.056,-15.342,0.200

0,1,2,3
Omnibus:,431.387,Durbin-Watson:,0.476
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1303.234
Skew:,-0.214,Prob(JB):,1.01e-283
Kurtosis:,4.867,Cond. No.,2150.0


In [9]:
m1 = spreg.OLS(
    final[target].values,
    X_poly.values,
    name_y = 'target', name_x = X_poly.columns.tolist()
)
print(m1.summary)

REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :     unknown
Weights matrix      :        None
Dependent Variable  :      target                Number of Observations:        8528
Mean dependent var  :     35.9476                Number of Variables   :          27
S.D. dependent var  :      3.1984                Degrees of Freedom    :        8501
R-squared           :      0.8189
Adjusted R-squared  :      0.8184
Sum squared residual:   15793.749                F-statistic           :   1478.7981
Sigma-square        :       1.858                Prob(F-statistic)     :           0
S.E. of regression  :       1.363                Log likelihood        :  -14728.438
Sigma-square ML     :       1.852                Akaike info criterion :   29510.875
S.E of regression ML:      1.3609                Schwarz criterion     :   29701.255

-----------------------------------------------------------------------------

In [15]:
# compuate average marginal effects for all features of interest
features = ['building', 'low vegetation', 'water', 'trees', 'road']

for feature in features:
    avg_marginal_effect = compute_marginal_effect_at_avg(model_fit, final, feature, features_interact, features_no_interact, delta=0.001, step=0.05)
    f_statistic, p_value = test_joint_significance(model_unrestricted=model_fit, final=final, features_interact=features_interact, features_no_interact=features_no_interact, target=target, features_exclude=[feature])
    print(f"Average marginal effect for '{feature}': {np.round(avg_marginal_effect,4)}")
    print(f"p-value of joint significance test for '{feature}': {np.round(p_value,10)}")

Average marginal effect for 'building': 0.0102
Average marginal effect for 'low vegetation': -0.094
Average marginal effect for 'water': -0.2846
Average marginal effect for 'trees': -0.1914
Average marginal effect for 'road': -0.026


In [11]:
# exemplary prediction
id_example = 1000132
feature = 'trees'
example = final[final.id == id_example]
print('Example temperature value: \n', np.round(final[final.id == id_example][target].item(),2))
print('OLS temperature prediction: \n', np.round(predict_LST_example(example, features_interact, features_no_interact, model_fit),2))
example[feature] += 0.2
print('OLS temperature prediction after delta: \n', np.round(predict_LST_example(example, features_interact, features_no_interact, model_fit),2))

Example temperature value: 
 36.73
OLS temperature prediction: 
 37.07
OLS temperature prediction after delta: 
 36.87


In [12]:
# add predictions to final dataframe
final['pred'] = model_fit.predict(X_poly)

In [13]:
with open(path_model + 'Causal_Model_' + str(grid_size_meters) + '_e.pkl', 'wb') as file:
    pickle.dump(model_fit, file)

In [14]:
with open(path + 'gpd_' + str(grid_size_meters) + '_c.pkl', 'wb') as file:
    pickle.dump(final, file)