In [1]:
import geopandas as gpd
import pandas as pd
import classes.entropycalculator as ec
from spatialentropy import altieri_entropy, leibovici_entropy
from scipy.stats import entropy
import numpy as np
import gc

from IPython.display import clear_output

from tqdm import tqdm
tqdm.pandas()

# select the part to handle
part = 5

In [2]:
gemeenten = gpd.read_parquet(f"data/gemeenten_parts/gemeenten_{part}.parquet")
gemeenten.head(10)


Unnamed: 0,gemeentecode,gemeentenaam,H2O,OAD,STED,BEV_DICHTH,AANT_INW,AANT_MAN,AANT_VROUW,P_00_14_JR,...,L1_shannon_1,total_amenities_2,L0_shannon_2,L1_shannon_2,RE_L0_0,RE_L1_0,RE_L0_1,RE_L1_1,RE_L0_2,RE_L1_2
285,GM1721,Bernheze,NEE,697.0,4.0,360.0,32263.0,16276.0,15987.0,15.0,...,3.999854,361.0,2.811798,3.720711,0.060609,0.08099,0.072753,0.095619,0.065472,0.091495
286,GM1723,Alphen-Chaam,NEE,324.0,5.0,113.0,10463.0,5337.0,5126.0,15.0,...,3.634572,232.0,2.647726,3.464204,0.057241,0.080455,0.069171,0.089068,0.071769,0.087487
287,GM1724,Bergeijk,NEE,505.0,4.0,189.0,19092.0,9803.0,9289.0,14.0,...,3.41103,331.0,2.538922,3.148111,0.12133,0.126555,0.146925,0.150933,0.145909,0.146717
288,GM1728,Bladel,NEE,708.0,4.0,279.0,21009.0,10665.0,10344.0,15.0,...,4.194824,346.0,2.825649,3.908323,0.045326,0.052528,0.059061,0.073828,0.057086,0.072963
289,GM1729,Gulpen-Wittem,NEE,284.0,5.0,194.0,14210.0,7099.0,7111.0,11.0,...,3.822298,371.0,2.731523,3.546306,0.053139,0.078782,0.062804,0.099556,0.051098,0.089712
290,GM1730,Tynaarlo,NEE,510.0,4.0,242.0,34592.0,17027.0,17565.0,17.0,...,4.275292,671.0,2.88159,4.015962,0.148466,0.149642,0.099403,0.124222,0.09533,0.121824
291,GM1731,Midden-Drenthe,NEE,406.0,5.0,100.0,33987.0,16928.0,17059.0,14.0,...,3.921467,758.0,2.740046,3.700674,0.099381,0.118714,0.093116,0.133524,0.09043,0.135081
292,GM1734,Overbetuwe,NEE,815.0,4.0,447.0,48707.0,24178.0,24529.0,17.0,...,3.965084,567.0,2.712804,3.677163,0.136495,0.166955,0.143646,0.188477,0.137215,0.184134
293,GM1735,Hof van Twente,NEE,608.0,4.0,167.0,35455.0,17694.0,17761.0,14.0,...,4.120734,560.0,2.851482,3.836806,0.051142,0.071274,0.067183,0.095645,0.065071,0.094027
294,GM1740,Neder-Betuwe,NEE,454.0,5.0,424.0,25448.0,12787.0,12661.0,21.0,...,3.912112,296.0,2.862864,3.726212,0.030832,0.03881,0.027765,0.040376,0.027145,0.039045


In [3]:
def _get_shannon_entropy(labels, base=2):
    # get the total count of the labels
    total_count = len(labels)
    # get the unique labels and their counts
    _, label_counts = np.unique(labels, return_counts=True)

    probs = label_counts / total_count
    # get the entropy
    return entropy(probs, base=base)

def gm_total_amenities_entropy(gm_name, filter_i):
    L0_BLACKLIST, L1_BLACKLIST = ec.getfilter(filter_i)
    amenity_gdf = gpd.read_parquet(f"data/gm_amenities/amenities_{gm_name}.parquet")
    
    # apply filters
    amenity_gdf = amenity_gdf[~amenity_gdf.L0_category.isin(L0_BLACKLIST)]
    if L1_BLACKLIST:
        for key, value in L1_BLACKLIST.items():
            amenity_gdf = amenity_gdf[
                ~(
                    (amenity_gdf.L0_category == key)
                    & (amenity_gdf.L1_category.isin(value))
                )
            ]
    
    # total number of amenities
    total_amenities = len(amenity_gdf)
    
    points = [[point.x, point.y] for point in amenity_gdf.geometry]
    
    # calculate entropy
    L0 = amenity_gdf.loc[:, f"L0_category"].values
    L1 = amenity_gdf.loc[:, f"L1_category"].values
    L0_entropy_alt = altieri_entropy(points, L0, base=2).entropy
    L1_entropy_alt = altieri_entropy(points, L1, base=2).entropy
    
    del points
    gc.collect()
    
    return total_amenities, L0_entropy_alt, L1_entropy_alt

In [4]:
for filter in [0, 1, 2]:
    print(f"Filter {filter}")
    for i, gm in tqdm(gemeenten.iterrows(), total=len(gemeenten)):
        total_amenities, L0_entropy_alt, L1_entropy_alt = gm_total_amenities_entropy(gm["gemeentenaam"], filter)
        gemeenten.at[i, f"total_amenities_{filter}"] = total_amenities
        gemeenten.at[i, f"L0_altieri_{filter}"] = L0_entropy_alt
        gemeenten.at[i, f"L1_altieri_{filter}"] = L1_entropy_alt
        
        del total_amenities, L0_entropy_alt, L1_entropy_alt
        gc.collect()
    
gemeenten.to_parquet(f"data/gemeenten_parts/calculated/gemeenten_{part}.parquet")


Filter 0


100%|██████████| 57/57 [01:15<00:00,  1.33s/it]


Filter 1


100%|██████████| 57/57 [00:43<00:00,  1.30it/s]


Filter 2


100%|██████████| 57/57 [00:38<00:00,  1.46it/s]
