# Causal model of urban heat intensity (UHI) using OLS

### Initialization

In [1]:
# import general packages
import numpy as np
import pandas as pd
import geopandas as gpd
import warnings
import os
import pickle
import yaml
import folium
import statsmodels.api as sm

from sklearn.preprocessing import PolynomialFeatures
from pysal.lib import weights
from scipy import stats
from scipy.stats import f
from scipy.spatial.distance import cdist

In [2]:
# ignore warnings & adjust location
warnings.filterwarnings("ignore")
# home_directory = os.path.expanduser( '~' )
home_directory = 'C:/Users/stefan/Documents/10_DS_Project/'
os.chdir(home_directory + '/DS_Project/modules')
config_path = 'config.yml'
with open(config_path, 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
# path = config['data']['data'] + '/uhi_model/'
path = home_directory
path_raw = path + 'raw/'
path_visual = path + 'visual/'
path_grid = path + 'grid/'
path_model = path + 'model/'

In [3]:
from models.UHI_modeling.UHI import *

### Data loading and preparation

In [4]:
# define model parameters for final computation
mode = "log"
all = True

In [5]:
# load features and target
grid_size_meters = 250
with open(path + 'final_' + str(grid_size_meters) + '_e.pkl', 'rb') as file:
    final = pd.read_pickle(file)
final = final[['geometry','id','nLST','wLST','impervious','building','low vegetation','water','trees','road','avg_height']]
final.head()

Unnamed: 0,geometry,id,nLST,wLST,impervious,building,low vegetation,water,trees,road,avg_height
0,"POLYGON ((11.40587 48.04366, 11.40587 48.04591...",1000001,31.156486,31.127429,0.13127,0.0,0.074685,0.0,0.789707,0.004335,0.0
1,"POLYGON ((11.40587 48.04815, 11.40923 48.04815...",1000002,33.57891,33.665254,0.059593,0.0,0.42682,0.0,0.496387,0.017202,0.0
2,"POLYGON ((11.40587 48.05040, 11.40923 48.05040...",1000003,35.387467,35.400349,0.0,0.0,0.976624,0.0,0.0,0.023376,0.0
3,"POLYGON ((11.40587 48.05264, 11.40923 48.05264...",1000004,34.819586,34.901132,0.0,0.0,0.983499,0.0,0.0,0.016501,0.0
4,"POLYGON ((11.40587 48.05489, 11.40923 48.05489...",1000005,33.233571,33.182384,0.000732,9.9e-05,0.980496,0.0,0.0,0.018674,0.00876


In [6]:
# add constant and define features
final = sm.add_constant(final)
features_interact = ['building','low vegetation','water','trees','road']
final = add_feature_lags(final, features=features_interact)
features_no_interact = ['const','avg_height','lag_building','lag_low vegetation','lag_water','lag_trees','lag_road']
lst = "wLST"
target = final[lst]

In [7]:
X_log = create_log_interactions(final, features_interact, features_no_interact, all=all)
X_log.head()

Unnamed: 0,building,low vegetation,water,trees,road,building low vegetation,building water,building trees,building road,low vegetation water,...,water trees,water road,trees road,const,avg_height,lag_building,lag_low vegetation,lag_water,lag_trees,lag_road
0,0.0,2.136359,0.0,4.38166,0.360091,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.577798,1.0,0.0,0.0,3.772705,0.0,3.863861,0.763716
1,0.0,3.776936,0.0,3.924716,1.000707,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,3.927492,1.0,0.0,0.0,3.938368,0.0,3.689442,0.668062
2,0.0,4.591704,0.0,0.0,1.205243,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.001238,4.032092,0.0,3.534082,0.744106
3,0.0,4.598648,0.0,0.0,0.974604,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.554888,4.335223,0.0,2.911494,0.866448
4,0.009865,4.59562,0.0,0.0,1.053397,0.045335,0.0,0.0,0.010392,0.0,...,0.0,0.0,0.0,1.0,0.00876,0.84328,4.514357,0.001237,1.728307,0.921586


### Modeling

In [8]:
# initialize and fit model
model_init_log = sm.OLS(target, X_log)
model_log = model_init_log.fit(cov_type='HC3')

In [9]:
model_log.summary()

0,1,2,3
Dep. Variable:,wLST,R-squared:,0.806
Model:,OLS,Adj. R-squared:,0.806
Method:,Least Squares,F-statistic:,2082.0
Date:,"Thu, 13 Jul 2023",Prob (F-statistic):,0.0
Time:,17:13:59,Log-Likelihood:,-15016.0
No. Observations:,8528,AIC:,30080.0
Df Residuals:,8506,BIC:,30230.0
Df Model:,21,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
building,1.0562,0.143,7.380,0.000,0.776,1.337
low vegetation,-0.5311,0.100,-5.298,0.000,-0.728,-0.335
water,-2.0571,0.110,-18.669,0.000,-2.273,-1.841
trees,-1.2352,0.107,-11.581,0.000,-1.444,-1.026
road,-0.5575,0.109,-5.130,0.000,-0.771,-0.345
building low vegetation,-0.1339,0.018,-7.559,0.000,-0.169,-0.099
building water,0.1182,0.026,4.614,0.000,0.068,0.168
building trees,-0.1142,0.027,-4.173,0.000,-0.168,-0.061
building road,-0.0563,0.021,-2.709,0.007,-0.097,-0.016

0,1,2,3
Omnibus:,315.899,Durbin-Watson:,0.518
Prob(Omnibus):,0.0,Jarque-Bera (JB):,922.53
Skew:,-0.077,Prob(JB):,4.73e-201
Kurtosis:,4.604,Cond. No.,316.0


In [10]:
# compute average marginal effects for all features of interest
features = ['building', 'low vegetation', 'water', 'trees', 'road']

model = model_log

for feature in features:
    avg_marginal_effect = compute_avg_marginal_effect_adv(model, final, feature, features_interact, features_no_interact, mode, all, delta=0.001, step=0.01)
    f_statistic, p_value = test_joint_significance(model, final, features_interact, features_no_interact, target, features_exclude=[feature], mode=mode, all=all)
    print(f"Average marginal effect for '{feature}': {np.round(avg_marginal_effect,4)}")
    print(f"p-value of joint significance test for '{feature}': {np.round(p_value,10)}")

Average marginal effect for 'building': 0.1635
p-value of joint significance test for 'building': 0.0
Average marginal effect for 'low vegetation': -0.0108
p-value of joint significance test for 'low vegetation': 0.0
Average marginal effect for 'water': -0.4135
p-value of joint significance test for 'water': 0.0
Average marginal effect for 'trees': -0.0643
p-value of joint significance test for 'trees': 0.0
Average marginal effect for 'road': 0.0328
p-value of joint significance test for 'road': 0.0


In [11]:
# exemplary prediction
id_example = 1000132
feature = 'trees'
delta = 0.05
example = final[final.id == id_example]
print('Example temperature value: \n', np.round(target[final.id == id_example].item(),2))
print('OLS temperature prediction: \n', np.round(predict_LST_example(example, features_interact, features_no_interact, model, mode, all),2))
example[feature] += delta
for i in features_interact:
    if i != feature:
        example[i] = (example[i] - (delta / len(features_interact))).clip(lower=0)
print('OLS temperature prediction after delta: \n', np.round(predict_LST_example(example, features_interact, features_no_interact, model, mode, all),2))

Example temperature value: 
 36.73
OLS temperature prediction: 
 37.45
OLS temperature prediction after delta: 
 37.33


In [12]:
# add predictions to final dataframe
final['pred'] = model.predict(X_log)

In [13]:
with open(path_model + 'Causal_Model_' + str(grid_size_meters) + '_e.pkl', 'wb') as file:
    pickle.dump(model, file)

In [14]:
final.head()

Unnamed: 0,const,geometry,id,nLST,wLST,impervious,building,low vegetation,water,trees,road,avg_height,lag_building,lag_low vegetation,lag_water,lag_trees,lag_road,pred
0,1.0,"POLYGON ((11.40587 48.04366, 11.40587 48.04591...",1000001,31.156486,31.127429,0.13127,0.0,0.074685,0.0,0.789707,0.004335,0.0,0.0,0.424976,0.0,0.46649,0.011462,32.431425
1,1.0,"POLYGON ((11.40587 48.04815, 11.40923 48.04815...",1000002,33.57891,33.665254,0.059593,0.0,0.42682,0.0,0.496387,0.017202,0.0,0.0,0.503348,0.0,0.390225,0.009505,33.381792
2,1.0,"POLYGON ((11.40587 48.05040, 11.40923 48.05040...",1000003,35.387467,35.400349,0.0,0.0,0.976624,0.0,0.0,0.023376,0.0,1.2e-05,0.553788,0.0,0.332636,0.011046,35.288913
3,1.0,"POLYGON ((11.40587 48.05264, 11.40923 48.05264...",1000004,34.819586,34.901132,0.0,0.0,0.983499,0.0,0.0,0.016501,0.0,0.007417,0.753419,0.0,0.173842,0.013784,36.610997
4,1.0,"POLYGON ((11.40587 48.05489, 11.40923 48.05489...",1000005,33.233571,33.182384,0.000732,9.9e-05,0.980496,0.0,0.0,0.018674,0.00876,0.01324,0.903188,1.2e-05,0.046311,0.015133,38.060432


In [15]:
with open(path + 'gpd_' + str(grid_size_meters) + '_e.pkl', 'wb') as file:
    pickle.dump(final, file)

In [16]:
# visualize all information
cols = ['id','nLST','wLST','impervious'] + features_interact + features_no_interact
coordinates = config['bboxes']['munich_grid']
center = [(coordinates[1] + coordinates[3]) / 2, (coordinates[0] + coordinates[2]) / 2]
grid_map = folium.Map(location=center, zoom_start=13, control = True)

for idx, grids in final.iterrows():
        values = [f"{column}: {np.round(grids[column], 2)}" for column in cols]
        tooltip_text = "<br>".join(values)
        folium.GeoJson(
                grids.geometry,
                style_function = lambda x: {'fillColor': 'transparent', 'color': 'black', 'colorOpacity': 0.7},
                popup=folium.Popup(tooltip_text, max_width=300),
                name = grids['id'],
                control = False).add_to(grid_map)

tile = folium.TileLayer(
        tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
        attr = 'Esri', name = 'Esri Satellite', overlay = False, control = True).add_to(grid_map)

folium.LayerControl().add_to(grid_map)

grid_map.save(path_visual + 'all_features_model_grid.html')