In [1]:
import warnings

warnings.filterwarnings("ignore")


In [2]:
# Load the data
import geopandas as gpd

# Compute spatial weights
import libpysal
from libpysal.weights import Queen, Rook

# Normalize the data
from sklearn.preprocessing import StandardScaler

# Compute spatial autocorrelation
from esda.moran import Moran


### Load the data

In [3]:
geo_df = gpd.read_file("geo_df_stats_norm_no-nan_prj.geojson")
geo_df.head()

Unnamed: 0,NAME_EN,ADM0_ISO,name_en,adm0_iso,pop_est,gdp_md,num_of_blds,pop_2021,hdi_2021,deaths_2021,num_of_blds_norm,gdp_md_norm,pop_2021_norm,hdi_2021_norm,deaths_2021_norm,geometry
0,Indonesia,IDN,Indonesia,IDN,541251136.0,2238380.0,203216.0,273753191.0,0.705,696.0,0.024289,0.05195,0.19382,0.572379,0.179151,"MULTIPOLYGON (((11261888.019 534711.268, 11261..."
1,Malaysia,MYS,Malaysia,MYS,63899554.0,729362.0,11160.0,33573874.0,0.803,117.0,0.001334,0.016926,0.023764,0.735441,0.030116,"MULTIPOLYGON (((11261888.019 534711.268, 11261..."
2,Chile,CHL,Chile,CHL,37904076.0,564636.0,1361.0,19493184.0,0.855,0.0,0.000163,0.013103,0.013794,0.821963,0.0,"MULTIPOLYGON (((-6512441.828 -2231734.484, -65..."
3,Bolivia,BOL,Bolivia,BOL,23026200.0,81790.0,8063.0,12079472.0,0.692,127.0,0.000964,0.001896,0.008545,0.550749,0.03269,"MULTIPOLYGON (((-6512441.828 -2231734.484, -65..."
4,Peru,PER,Peru,PER,65020906.0,453696.0,128.0,33715471.0,0.762,103.0,1.5e-05,0.010528,0.023864,0.667221,0.026512,"MULTIPOLYGON (((-6512441.828 -2231734.484, -65..."


### Data preparation

In [4]:
# Set a unique index
geo_df.set_index('ADM0_ISO', inplace=True)


In [5]:
geo_df.head()

Unnamed: 0_level_0,NAME_EN,name_en,adm0_iso,pop_est,gdp_md,num_of_blds,pop_2021,hdi_2021,deaths_2021,num_of_blds_norm,gdp_md_norm,pop_2021_norm,hdi_2021_norm,deaths_2021_norm,geometry
ADM0_ISO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
IDN,Indonesia,Indonesia,IDN,541251136.0,2238380.0,203216.0,273753191.0,0.705,696.0,0.024289,0.05195,0.19382,0.572379,0.179151,"MULTIPOLYGON (((11261888.019 534711.268, 11261..."
MYS,Malaysia,Malaysia,MYS,63899554.0,729362.0,11160.0,33573874.0,0.803,117.0,0.001334,0.016926,0.023764,0.735441,0.030116,"MULTIPOLYGON (((11261888.019 534711.268, 11261..."
CHL,Chile,Chile,CHL,37904076.0,564636.0,1361.0,19493184.0,0.855,0.0,0.000163,0.013103,0.013794,0.821963,0.0,"MULTIPOLYGON (((-6512441.828 -2231734.484, -65..."
BOL,Bolivia,Bolivia,BOL,23026200.0,81790.0,8063.0,12079472.0,0.692,127.0,0.000964,0.001896,0.008545,0.550749,0.03269,"MULTIPOLYGON (((-6512441.828 -2231734.484, -65..."
PER,Peru,Peru,PER,65020906.0,453696.0,128.0,33715471.0,0.762,103.0,1.5e-05,0.010528,0.023864,0.667221,0.026512,"MULTIPOLYGON (((-6512441.828 -2231734.484, -65..."


### Compute spatial weights

In [6]:
# Create spatial weights matrix (e.g., using Queen contiguity)
w = Queen.from_dataframe(geo_df)


### Normalize the data

In [7]:
# Select the columns containing your indicators
indicators = ['pop_est', 'gdp_md', 'num_of_blds',
              'pop_2021', 'hdi_2021', 'deaths_2021']

# Standardize the data
scaler = StandardScaler()
geo_df[indicators] = scaler.fit_transform(geo_df[indicators])


### Compute Spatial Autocorrelation

In [8]:
for indicator in indicators:
    moran = Moran(geo_df[indicator], w)
    print(f"{indicator} Moran's I: {moran.I:.2f}, p-value: {moran.p_sim:.4f}")


pop_est Moran's I: 0.16, p-value: 0.0150
gdp_md Moran's I: 0.08, p-value: 0.0190
num_of_blds Moran's I: 0.02, p-value: 0.0390
pop_2021 Moran's I: 0.16, p-value: 0.0130
hdi_2021 Moran's I: 0.11, p-value: 0.0190
deaths_2021 Moran's I: 0.09, p-value: 0.0470
