In [1]:
import os
import sys
import time
import argparse
import geopandas as gpd
import fiona
import rasterio as rio
from rasterstats import zonal_stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mgwr.gwr import MGWR
from mgwr.sel_bw import Sel_BW
from mgwr.utils import shift_colormap, truncate_colormap
from datetime import datetime
import glob
import pathlib


In [2]:
folder_root = os.getcwd()
print(folder_root)

folder_results =  os.path.join(folder_root, 'output')
print(folder_results)

e:\citarum\GeoAnalisisGEE
e:\citarum\GeoAnalisisGEE\output


In [3]:
kecamatan_polygon_file = os.path.join(folder_root, 'input', 'vektor', 'batas_kecamatan_citarum_48S.shp')
print(kecamatan_polygon_file)
gdf = gpd.read_file(kecamatan_polygon_file)

crs = gdf.crs
gdf['x_utm'] = gdf.centroid.x
gdf['y_utm'] = gdf.centroid.y

e:\citarum\GeoAnalisisGEE\input\vektor\batas_kecamatan_citarum_48S.shp


In [4]:
list_raster_file = glob.glob(os.path.join(folder_root, 'input', 'raster', '*.tif'))
df_raster_file = pd.DataFrame(list_raster_file)
df_raster_file = df_raster_file.astype('string')
print(df_raster_file)
print(len(df_raster_file))
list(df_raster_file)
df_raster_file.rename(columns = {0:'filenamelong'}, inplace = True)

df_raster_file['filename'] = df_raster_file["filenamelong"].str.split("\\").str[-1]
df_raster_file['parameter'] = df_raster_file["filename"].str.split(".").str[0]
print(df_raster_file)


                                                   0
0  e:\citarum\GeoAnalisisGEE\input\raster\2020_de...
1  e:\citarum\GeoAnalisisGEE\input\raster\2020_de...
2  e:\citarum\GeoAnalisisGEE\input\raster\2020_et...
3  e:\citarum\GeoAnalisisGEE\input\raster\2020_fr...
4  e:\citarum\GeoAnalisisGEE\input\raster\2020_pa...
5  e:\citarum\GeoAnalisisGEE\input\raster\2020_pr...
6
                                        filenamelong                filename  \
0  e:\citarum\GeoAnalisisGEE\input\raster\2020_de...            2020_dem.tif   
1  e:\citarum\GeoAnalisisGEE\input\raster\2020_de...  2020_depth_to_root.tif   
2  e:\citarum\GeoAnalisisGEE\input\raster\2020_et...            2020_eto.tif   
3  e:\citarum\GeoAnalisisGEE\input\raster\2020_fr...         2020_fractp.tif   
4  e:\citarum\GeoAnalisisGEE\input\raster\2020_pa...           2020_pawc.tif   
5  e:\citarum\GeoAnalisisGEE\input\raster\2020_pr...  2020_precipitation.tif   

            parameter  
0            2020_dem  
1  2020_depth_to_r

In [5]:
vars = []
zstats_merged = []

for ind in df_raster_file.index:
    raster_data = rio.open(df_raster_file['filenamelong'][ind], "r")  
    parameter = df_raster_file["parameter"][ind]
    print(parameter)
    profile = raster_data.profile
    transform = profile['transform']
    nodata = raster_data.nodata

    img = raster_data.read(1)
    img = img.astype('float32') 
    img[img==nodata] = np.nan
    zstats = zonal_stats(gdf, img, affine=transform, prefix= f'{parameter}_', nodata=nodata, stats='mean')
    zstats_merged.append(zstats) # zstats_merged is now a 2D list of lists [bands, shapes]
    vars.append(f'{parameter}_mean')
 

2020_dem
2020_depth_to_root
2020_eto
2020_fractp
2020_pawc
2020_precipitation


In [6]:
# Flip the dimensions using zip
zstats_merged_list = list(zip(*zstats_merged))
# Aggregate into a single list (dimension: shapes) containing bands
final_zstats = [{k: v for d in s for k, v in d.items()} for s in zstats_merged_list]
# Convert zonal statistic results to pandas dataframe
df_zstats = pd.DataFrame(final_zstats)

In [7]:
print(df_zstats)
pd.DataFrame(df_zstats).to_csv('sample.csv')    

     2020_dem_mean  2020_depth_to_root_mean  2020_eto_mean  2020_fractp_mean  \
0      1837.818220               710.461660     876.374197          0.242982   
1      1629.247037              1500.000000    1523.854819          0.490222   
2       665.892403              1412.489325    1788.269518          0.454936   
3       129.179042               920.715664    1977.316302          0.335631   
4       338.112009               842.813500    1891.811341          0.428654   
..             ...                      ...            ...               ...   
170     891.696451               500.000000    1833.100000          0.465885   
171     720.570509               536.442208    1886.590887          0.472731   
172     743.960604               502.740981    1894.872615          0.473634   
173     820.127722               500.000000    1858.025568          0.540017   
174     216.835217               947.868718    1992.714877          0.839729   

     2020_pawc_mean  2020_precipitation

In [8]:
 # Merge zones and zonal statistics data frame
gdf = gdf.join(df_zstats)

# Clean the data
gdf = gdf.dropna()
print('Final input data shape: {}'.format(gdf.shape))

# Prepare datasets input
y = gdf[vars[0]].values.reshape((-1,1))
X = gdf[list(vars[1:])].values
coords = list(zip(gdf['x_utm'],gdf['y_utm']))

print('Independent variables: {}'.format(X.shape))
print('Dependent variable: {}'.format(y.shape))

X = (X - X.mean(axis=0)) / X.std(axis=0)
y = (y - y.mean(axis=0)) / y.std(axis=0)

# Calibrate MGWR model
print('Start MGWR ....')
start_time = time.time()
mgwr_selector = Sel_BW(coords, y, X, multi=True)
mgwr_bw = mgwr_selector.search(multi_bw_min=[2])
print('MGWR bandwidth: {}'.format(mgwr_bw))
mgwr_results = MGWR(coords, y, X, mgwr_selector).fit()
mgwr_results.summary()
elapsed_time = (time.time() - start_time)/60
print('MGWR complete. Elapsed time (minutes): '+str(elapsed_time))

print('Prepare MGWR results for mapping ...')
# Obtain t-vals filtered based on multiple testing correction
mgwr_filtered_t = mgwr_results.filter_tvals()



Final input data shape: (169, 17)
Independent variables: (169, 5)
Dependent variable: (169, 1)
Start MGWR ....


Backfitting:   0%|          | 0/200 [00:00<?, ?it/s]

MGWR bandwidth: [ 56. 164.  71.  51.  71. 168.]


Inference:   0%|          | 0/1 [00:00<?, ?it/s]

Model type                                                         Gaussian
Number of observations:                                                 169
Number of covariates:                                                     6

Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares:                                             22.897
Log-likelihood:                                                     -70.895
AIC:                                                                153.789
AICc:                                                               156.485
BIC:                                                               -813.276
R2:                                                                   0.865
Adj. R2:                                                              0.860

Variable                              Est.         SE  t(Est/SE)    p-value
------------------------------- ---------- ---------- ------

In [9]:
print('Prepare MGWR results for mapping ...')
    # Obtain t-vals filtered based on multiple testing correction
mgwr_filtered_t = mgwr_results.filter_tvals()

    # Plot MGWR parameters
for i in range(1, 6):
    if i-1==0:
        param = 'mgwr_intercept'
    else:
        param = f'mgwr_b{i}'
    
    # Add MGWR parameters to GeoDataframe
    gdf[param] = mgwr_results.params[:,i-1]
    
    # Set color map
    vmin = gdf[param].min()
    vmax = gdf[param].max()
    cmap = plt.cm.seismic

    # Create scalar mappable for colorbar and stretch colormap across range of data values
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))

    # Plot
    fig, ax = plt.subplots(nrows=1, ncols=1)
    fig.set_size_inches(8, 6)
    ax.set_title(param, fontsize=10)
    gdf.plot(column=param,
        ax=ax,
        legend=True,
        cmap=sm.cmap, vmin=vmin, vmax=vmax, **{'edgecolor':'black', 'alpha':.65})
    if (mgwr_filtered_t[:,0] == 0).any(): #If there are insignificant parameters plot gray polygons over them
        gdf[mgwr_filtered_t[:,0] == 0].plot(color='lightgrey', ax=ax, **{'edgecolor':'black'})
    plt.savefig(folder_results + '\\png\\'+ param +'.png', dpi=100)
    plt.close()

print('Save output as vector shapefile ....')
with fiona.Env(OSR_WKT_FORMAT='WKT2_2018'):
    gdf.to_file(folder_results + '\\shp\\output1.shp')

print('{}-Done!'.format(datetime.now()))

Prepare MGWR results for mapping ...
Save output as vector shapefile ....


  gdf.to_file(folder_results + '\\shp\\output1.shp')


2023-01-21 12:15:46.051379-Done!


In [1]:
import ee
import geemap

In [None]:
Map = geemap.Map(center=(-6.3893832360229545,107.31268018020506), zoom=7)
Map

In [None]:
result1_shp = gpd.read_file(folder_results + '\\shp\\output1.shp')
citarum_kecamatan = geemap.shp_to_ee(folder_results + '\\shp\\output1.shp')

Map.addLayer(citarum_kecamatan, {}, 'citarum_kecamatan')