In [1]:
import geopandas as gpd
import pandas as pd
import classes.entropycalculator as ec
from spatialentropy import altieri_entropy, leibovici_entropy
from scipy.stats import entropy
import numpy as np
import gc

from IPython.display import clear_output

from tqdm import tqdm
tqdm.pandas()

# select the part to handle
part = 1

In [2]:
gemeenten = gpd.read_parquet(f"data/gemeenten_parts/gemeenten_{part}.parquet")
gemeenten.head(10)


Unnamed: 0,gemeentecode,gemeentenaam,H2O,OAD,STED,BEV_DICHTH,AANT_INW,AANT_MAN,AANT_VROUW,P_00_14_JR,...,L1_shannon_1,total_amenities_2,L0_shannon_2,L1_shannon_2,RE_L0_0,RE_L1_0,RE_L0_1,RE_L1_1,RE_L0_2,RE_L1_2
57,GM0233,Ermelo,NEE,933.0,4.0,321.0,27496.0,13663.0,13833.0,15.0,...,4.09095,601.0,2.830316,3.797529,0.107902,0.103781,0.164506,0.141391,0.155936,0.138829
58,GM0243,Harderwijk,NEE,1587.0,2.0,1256.0,48906.0,24246.0,24660.0,17.0,...,4.388846,653.0,2.921838,4.111364,0.203081,0.218183,0.213216,0.22656,0.21072,0.227668
59,GM0244,Hattem,NEE,847.0,4.0,545.0,12563.0,6211.0,6352.0,17.0,...,3.864015,246.0,2.660465,3.580018,0.01639,0.027731,0.03157,0.043453,0.03398,0.043757
60,GM0246,Heerde,NEE,627.0,4.0,245.0,19214.0,9495.0,9719.0,16.0,...,3.967502,306.0,2.769011,3.672298,0.004019,0.012237,0.002236,0.016437,0.002572,0.013878
61,GM0252,Heumen,NEE,804.0,4.0,423.0,16824.0,8360.0,8464.0,15.0,...,3.850934,302.0,2.690997,3.572086,0.061554,0.082828,0.013846,0.027648,0.009982,0.020705
62,GM0262,Lochem,NEE,600.0,4.0,161.0,34314.0,16943.0,17371.0,13.0,...,4.117646,631.0,2.852285,3.865832,0.056149,0.06729,0.044888,0.067119,0.040568,0.060414
63,GM0263,Maasdriel,NEE,495.0,5.0,395.0,26020.0,13308.0,12712.0,15.0,...,4.048908,337.0,2.836705,3.753262,0.036041,0.050659,0.036011,0.05813,0.034592,0.058695
64,GM0267,Nijkerk,NEE,1140.0,3.0,649.0,44975.0,22436.0,22539.0,18.0,...,3.893159,532.0,2.708053,3.557398,0.050308,0.05812,0.063886,0.070738,0.058244,0.070922
65,GM0268,Nijmegen,NEE,2423.0,2.0,3456.0,182480.0,88158.0,94322.0,13.0,...,4.374807,2408.0,2.885751,4.085442,0.111479,0.111144,0.062146,0.07427,0.058304,0.067092
66,GM0269,Oldebroek,NEE,654.0,4.0,248.0,24264.0,12173.0,12091.0,18.0,...,4.178032,341.0,2.842961,3.853571,0.018985,0.033034,0.022915,0.040547,0.020586,0.041649


In [3]:
def _get_shannon_entropy(labels, base=2):
    # get the total count of the labels
    total_count = len(labels)
    # get the unique labels and their counts
    _, label_counts = np.unique(labels, return_counts=True)

    probs = label_counts / total_count
    # get the entropy
    return entropy(probs, base=base)

def gm_total_amenities_entropy(gm_name, filter_i):
    L0_BLACKLIST, L1_BLACKLIST = ec.getfilter(filter_i)
    amenity_gdf = gpd.read_parquet(f"data/gm_amenities/amenities_{gm_name}.parquet")
    
    # apply filters
    amenity_gdf = amenity_gdf[~amenity_gdf.L0_category.isin(L0_BLACKLIST)]
    if L1_BLACKLIST:
        for key, value in L1_BLACKLIST.items():
            amenity_gdf = amenity_gdf[
                ~(
                    (amenity_gdf.L0_category == key)
                    & (amenity_gdf.L1_category.isin(value))
                )
            ]
    
    # total number of amenities
    total_amenities = len(amenity_gdf)
    
    points = [[point.x, point.y] for point in amenity_gdf.geometry]
    
    # calculate entropy
    L0 = amenity_gdf.loc[:, f"L0_category"].values
    L1 = amenity_gdf.loc[:, f"L1_category"].values
    L0_entropy_alt = altieri_entropy(points, L0, base=2).entropy
    L1_entropy_alt = altieri_entropy(points, L1, base=2).entropy
    
    del points
    gc.collect()
    
    return total_amenities, L0_entropy_alt, L1_entropy_alt

In [4]:
for filter in [0, 1, 2]:
    print(f"Filter {filter}")
    for i, gm in tqdm(gemeenten.iterrows(), total=len(gemeenten)):
        total_amenities, L0_entropy_alt, L1_entropy_alt = gm_total_amenities_entropy(gm["gemeentenaam"], filter)
        gemeenten.at[i, f"total_amenities_{filter}"] = total_amenities
        gemeenten.at[i, f"L0_altieri_{filter}"] = L0_entropy_alt
        gemeenten.at[i, f"L1_altieri_{filter}"] = L1_entropy_alt
        
        del total_amenities, L0_entropy_alt, L1_entropy_alt
        gc.collect()
    
gemeenten.to_parquet(f"data/gemeenten_parts/calculated/gemeenten_{part}.parquet")


Filter 0


100%|██████████| 57/57 [06:25<00:00,  6.76s/it]


Filter 1


100%|██████████| 57/57 [01:16<00:00,  1.34s/it]


Filter 2


100%|██████████| 57/57 [01:10<00:00,  1.24s/it]
