In [1]:
import geopandas as gpd
import pandas as pd
import classes.entropycalculator as ec
from spatialentropy import altieri_entropy, leibovici_entropy
from scipy.stats import entropy
import numpy as np
import gc

from IPython.display import clear_output

from tqdm import tqdm
tqdm.pandas()

# select the part to handle
part = 3

In [2]:
gemeenten = gpd.read_parquet(f"data/gemeenten_parts/gemeenten_{part}.parquet")
gemeenten.head(10)


Unnamed: 0,gemeentecode,gemeentenaam,H2O,OAD,STED,BEV_DICHTH,AANT_INW,AANT_MAN,AANT_VROUW,P_00_14_JR,...,L1_shannon_1,total_amenities_2,L0_shannon_2,L1_shannon_2,RE_L0_0,RE_L1_0,RE_L0_1,RE_L1_1,RE_L0_2,RE_L1_2
171,GM0627,Waddinxveen,NEE,1565.0,2.0,1175.0,32601.0,16197.0,16404.0,19.0,...,3.985677,511.0,2.863551,3.716458,0.036453,0.045199,0.061472,0.078925,0.056891,0.076393
172,GM0629,Wassenaar,NEE,1445.0,3.0,529.0,27093.0,12899.0,14194.0,16.0,...,3.906483,828.0,2.552987,3.65936,0.058246,0.052274,0.096393,0.078687,0.081597,0.071899
173,GM0632,Woerden,NEE,1402.0,3.0,601.0,53244.0,26295.0,26949.0,17.0,...,3.835326,822.0,2.721898,3.546863,0.065513,0.077453,0.064727,0.087456,0.06063,0.082963
174,GM0637,Zoetermeer,NEE,2529.0,1.0,3689.0,126998.0,62072.0,64926.0,17.0,...,4.494276,1451.0,2.941096,4.251725,0.066446,0.077808,0.073476,0.090726,0.071143,0.085095
175,GM0638,Zoeterwoude,NEE,840.0,4.0,446.0,9443.0,4716.0,4727.0,16.0,...,3.899196,189.0,2.73051,3.629032,0.083054,0.104689,0.105965,0.150499,0.124748,0.141124
176,GM0642,Zwijndrecht,NEE,2100.0,2.0,2217.0,45018.0,21891.0,23127.0,16.0,...,3.751872,458.0,2.714301,3.506866,0.138026,0.148264,0.157464,0.175152,0.159261,0.178409
177,GM0654,Borsele,NEE,324.0,5.0,164.0,23159.0,11762.0,11397.0,17.0,...,3.299721,604.0,2.622666,3.150647,0.088938,0.124713,0.097942,0.142995,0.089245,0.134237
178,GM0664,Goes,NEE,1384.0,3.0,426.0,39433.0,19329.0,20104.0,14.0,...,4.288427,864.0,2.938476,3.971588,0.047553,0.05094,0.061164,0.064408,0.056906,0.06342
179,GM0668,West Maas en Waal,NEE,426.0,5.0,263.0,20065.0,10046.0,10019.0,14.0,...,4.234811,228.0,2.874852,3.982299,0.104466,0.113254,0.14326,0.151614,0.132751,0.149988
180,GM0677,Hulst,NEE,512.0,4.0,137.0,27596.0,13826.0,13770.0,13.0,...,3.614134,542.0,2.705777,3.381269,0.102054,0.125644,0.096643,0.134776,0.093862,0.129513


In [3]:
def _get_shannon_entropy(labels, base=2):
    # get the total count of the labels
    total_count = len(labels)
    # get the unique labels and their counts
    _, label_counts = np.unique(labels, return_counts=True)

    probs = label_counts / total_count
    # get the entropy
    return entropy(probs, base=base)

def gm_total_amenities_entropy(gm_name, filter_i):
    L0_BLACKLIST, L1_BLACKLIST = ec.getfilter(filter_i)
    amenity_gdf = gpd.read_parquet(f"data/gm_amenities/amenities_{gm_name}.parquet")
    
    # apply filters
    amenity_gdf = amenity_gdf[~amenity_gdf.L0_category.isin(L0_BLACKLIST)]
    if L1_BLACKLIST:
        for key, value in L1_BLACKLIST.items():
            amenity_gdf = amenity_gdf[
                ~(
                    (amenity_gdf.L0_category == key)
                    & (amenity_gdf.L1_category.isin(value))
                )
            ]
    
    # total number of amenities
    total_amenities = len(amenity_gdf)
    
    points = [[point.x, point.y] for point in amenity_gdf.geometry]
    
    # calculate entropy
    L0 = amenity_gdf.loc[:, f"L0_category"].values
    L1 = amenity_gdf.loc[:, f"L1_category"].values
    L0_entropy_alt = altieri_entropy(points, L0, base=2).entropy
    L1_entropy_alt = altieri_entropy(points, L1, base=2).entropy
    
    del points
    gc.collect()
    
    return total_amenities, L0_entropy_alt, L1_entropy_alt

In [4]:
for filter in [0, 1, 2]:
    print(f"Filter {filter}")
    for i, gm in tqdm(gemeenten.iterrows(), total=len(gemeenten)):
        total_amenities, L0_entropy_alt, L1_entropy_alt = gm_total_amenities_entropy(gm["gemeentenaam"], filter)
        gemeenten.at[i, f"total_amenities_{filter}"] = total_amenities
        gemeenten.at[i, f"L0_altieri_{filter}"] = L0_entropy_alt
        gemeenten.at[i, f"L1_altieri_{filter}"] = L1_entropy_alt
        
        del total_amenities, L0_entropy_alt, L1_entropy_alt
        gc.collect()
    
gemeenten.to_parquet(f"data/gemeenten_parts/calculated/gemeenten_{part}.parquet")


Filter 0


100%|██████████| 57/57 [01:17<00:00,  1.37s/it]


Filter 1


100%|██████████| 57/57 [00:44<00:00,  1.29it/s]


Filter 2


100%|██████████| 57/57 [00:38<00:00,  1.48it/s]
