In [16]:
import numpy as np
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gp
import multiprocessing as mp
from mgwr.gwr import GWR,MGWR
from mgwr.sel_bw import Sel_BW

In [17]:
#Load the funda data
funda_data = gp.read_file("data/test/test_sample(100p).gpkg")

In [18]:
#show funda data
funda_data.head()

Unnamed: 0,...1,house_id,house_type,building_type,price,price_m2,room,bedroom,bathroom,living_area,...,ym_list,year_list,descrip,zip,letters,city,addressline_city,addressline_zip,addresszip,geometry
0,0.0,42037381.0,huis,Resale property,495000.0,3960.0,4.0,3.0,1.0,125.0,...,2023-03-01,2023.0,\nUnique and spacious living in a popular r...,1944,KL,Beverwijk\r\n,Beverwijk\r\n,1944 KL,"Wildemanskruid 37, 1944 KL, Beverwijk\r\n",POINT (104832.002 501219.996)
1,1.0,42032180.0,huis,Resale property,450000.0,3982.3,5.0,4.0,1.0,113.0,...,2023-03-01,2023.0,\n***This property is listed bij a MVA Cert...,2133,CD,Hoofddorp\r\n,Hoofddorp\r\n,2133 CD,"Birkholm 185, 2133 CD, Hoofddorp\r\n",POINT (106101.113 480140.256)
2,2.0,88477708.0,huis,Resale property,749000.0,5761.5,4.0,3.0,1.0,130.0,...,2023-02-01,2023.0,\nBloemendaal makelaars biedt aan: Zuiderdij...,1606,ME,Venhuizen\r\n,Venhuizen\r\n,1606 ME,"Zuiderdijk 3, 1606 ME, Venhuizen\r\n",POINT (145231.999 521329.999)
3,3.0,42037325.0,huis,Resale property,550000.0,4198.5,5.0,4.0,1.0,131.0,...,2023-03-01,2023.0,\nThis text has been automatically translate...,1216,HP,Hilversum\r\n,Hilversum\r\n,1216 HP,"Gomarushof 112, 1216 HP, Hilversum\r\n",POINT (137976.519 470153.606)
4,4.0,42017210.0,huis,Resale property,440000.0,3728.8,5.0,4.0,1.0,118.0,...,2023-02-01,2023.0,\nIn de buurt Getsewoud-Zuid te Nieuw-Vennep...,2151,HH,Nieuw-Vennep\r\n,Nieuw-Vennep\r\n,2151 HH,"Swaenstein 24, 2151 HH, Nieuw-Vennep\r\n",POINT (102036.839 476175.131)


In [19]:
#create array with the dependent variable
b_y = funda_data['price'].values.reshape((-1,1))

In [20]:
#create an array with the indepentend variables (order matters for the extraction of params later)
b_X = funda_data[['room','living_area','house_age']].values 

In [21]:
#create coordinate tuple for the model
u = funda_data['geometry'].x
v = funda_data['geometry'].y
b_coords = list(zip(u, v))

In [22]:
#Parrallelization is more favored when you your data are large and/or your machine have many many cores.
#mgwr has soft dependency of numba, please install numba if you need better performance (pip install numba).
n_proc = 3 #two processors
pool = mp.Pool(n_proc) 

In [23]:
%%time
#Run basic GWR in parrallel mode
gwr_selector = Sel_BW(b_coords, b_y, b_X)
gwr_bw = gwr_selector.search(pool=pool) #add pool to Sel_BW.search
print(gwr_bw)
gwr_results = GWR(b_coords, b_y, b_X, gwr_bw).fit(pool=pool)

54.0
CPU times: total: 375 ms
Wall time: 9.26 s


In [25]:
#show summary
gwr_results.summary()

Model type                                                         Gaussian
Number of observations:                                                1354
Number of covariates:                                                     4

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                       59897539272782.562
Log-likelihood:                                                  -18516.434
AIC:                                                              37040.868
AICc:                                                             37042.913
BIC:                                                           59897539263047.961
R2:                                                                   0.622
Adj. R2:                                                              0.621

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- -----

In [29]:
%%time
#run MGWR in parrallel mode. Note: max_iter_multi needs to be specified
mgwr_selector = Sel_BW(b_coords, b_y, b_X, multi=True)
mgwr_bw = mgwr_selector.search(pool=pool, max_iter_multi=10, criterion = "AICc") #add pool to Sel_BW.search
print(mgwr_bw)
mgwr_results = MGWR(b_coords, b_y, b_X, selector=mgwr_selector).fit(pool=pool)

Backfitting:   0%|          | 0/10 [00:00<?, ?it/s]

[93. 48. 46. 70.]


Inference:   0%|          | 0/3 [00:00<?, ?it/s]

CPU times: total: 15.4 s
Wall time: 2min 2s


In [31]:
#show MGWR model summary
mgwr_results.summary()

Model type                                                         Gaussian
Number of observations:                                                1354
Number of covariates:                                                     4

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                       59897539272782.562
Log-likelihood:                                                  -18516.434
AIC:                                                              37040.868
AICc:                                                             37042.913
BIC:                                                           59897539263047.961
R2:                                                                   0.622
Adj. R2:                                                              0.621

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- -----

In [42]:
#recreate R format table
df = gp.GeoDataFrame()
df['Intercept'] = mgwr_results.params[:,0]
df['room'] = mgwr_results.params[:,1]
df['living_area'] = mgwr_results.params[:,2]
df['house_age'] = mgwr_results.params[:,3]
df['yhat'] = mgwr_results.predy
df['residual'] = mgwr_results.y.reshape((-1,1)) - mgwr_results.predy
df['intercept_SE'] = mgwr_results.params[:,0]
df['room_SE'] = mgwr_results.params[:,1]
df['living_area_SE'] = mgwr_results.params[:,2]
df['house_age_SE'] = mgwr_results.params[:,3]
df['intercept_TV'] = mgwr_results.tvalues[:,0]
df['room_TV'] = mgwr_results.tvalues[:,1]
df['living_area_TV'] = mgwr_results.tvalues[:,2]
df['house_age_TV'] = mgwr_results.tvalues[:,3]
df['geometry'] = funda_data['geometry']

In [40]:
#show the GeoDataFrame and write it to a folder
df.head()
df.to_file('data/test/test_mgwr.gpkg')