In [1]:
import geopandas as gpd
import pandas as pd
import classes.entropycalculator as ec
from spatialentropy import altieri_entropy, leibovici_entropy
from scipy.stats import entropy
import numpy as np
import gc

from IPython.display import clear_output

from tqdm import tqdm
tqdm.pandas()

# select the part to handle
part = 4

In [2]:
gemeenten = gpd.read_parquet(f"data/gemeenten_parts/gemeenten_{part}.parquet")
gemeenten.head(10)


Unnamed: 0,gemeentecode,gemeentenaam,H2O,OAD,STED,BEV_DICHTH,AANT_INW,AANT_MAN,AANT_VROUW,P_00_14_JR,...,L1_shannon_1,total_amenities_2,L0_shannon_2,L1_shannon_2,RE_L0_0,RE_L1_0,RE_L0_1,RE_L1_1,RE_L0_2,RE_L1_2
228,GM0873,Woensdrecht,NEE,668.0,4.0,242.0,22191.0,11115.0,11076.0,14.0,...,3.995033,378.0,2.831744,3.820836,0.033664,0.048719,0.048315,0.068698,0.037982,0.059149
229,GM0879,Zundert,NEE,592.0,4.0,187.0,22518.0,11499.0,11019.0,14.0,...,3.848268,245.0,2.846615,3.658776,0.056548,0.074313,0.047933,0.07522,0.04982,0.07443
230,GM0880,Wormerland,NEE,1491.0,3.0,431.0,16612.0,8172.0,8440.0,14.0,...,3.202103,355.0,2.338886,3.073013,0.117213,0.103997,0.199558,0.158888,0.199365,0.163104
231,GM0882,Landgraaf,NEE,1455.0,3.0,1512.0,37175.0,18307.0,18868.0,13.0,...,3.913631,359.0,2.848503,3.649399,0.029163,0.041614,0.028068,0.048309,0.030075,0.049183
232,GM0888,Beek,NEE,887.0,4.0,766.0,16132.0,7850.0,8282.0,13.0,...,4.451729,300.0,2.903915,4.164672,0.043254,0.043936,0.050305,0.042975,0.047239,0.044804
233,GM0889,Beesel,NEE,762.0,4.0,482.0,13449.0,6762.0,6687.0,13.0,...,4.222049,186.0,2.847475,3.986236,0.011131,0.016772,0.02059,0.026993,0.019858,0.025433
234,GM0893,Bergen (L.),NEE,315.0,5.0,127.0,13119.0,6604.0,6515.0,12.0,...,3.555421,202.0,2.624842,3.356438,0.051133,0.079298,0.084078,0.121233,0.081972,0.115553
235,GM0899,Brunssum,NEE,1665.0,2.0,1606.0,27682.0,13599.0,14083.0,13.0,...,3.651496,205.0,2.849665,3.554235,0.052389,0.075638,0.067233,0.100841,0.063081,0.097771
236,GM0907,Gennep,NEE,661.0,4.0,373.0,17764.0,8985.0,8779.0,14.0,...,3.925153,228.0,2.828637,3.706458,0.126298,0.141634,0.071699,0.08931,0.061626,0.081519
237,GM0917,Heerlen,NEE,1841.0,2.0,1940.0,87122.0,43456.0,43666.0,13.0,...,4.07516,990.0,2.920074,3.780242,0.117369,0.151231,0.159971,0.205089,0.137306,0.169777


In [3]:
def _get_shannon_entropy(labels, base=2):
    # get the total count of the labels
    total_count = len(labels)
    # get the unique labels and their counts
    _, label_counts = np.unique(labels, return_counts=True)

    probs = label_counts / total_count
    # get the entropy
    return entropy(probs, base=base)

def gm_total_amenities_entropy(gm_name, filter_i):
    L0_BLACKLIST, L1_BLACKLIST = ec.getfilter(filter_i)
    amenity_gdf = gpd.read_parquet(f"data/gm_amenities/amenities_{gm_name}.parquet")
    
    # apply filters
    amenity_gdf = amenity_gdf[~amenity_gdf.L0_category.isin(L0_BLACKLIST)]
    if L1_BLACKLIST:
        for key, value in L1_BLACKLIST.items():
            amenity_gdf = amenity_gdf[
                ~(
                    (amenity_gdf.L0_category == key)
                    & (amenity_gdf.L1_category.isin(value))
                )
            ]
    
    # total number of amenities
    total_amenities = len(amenity_gdf)
    
    points = [[point.x, point.y] for point in amenity_gdf.geometry]
    
    # calculate entropy
    L0 = amenity_gdf.loc[:, f"L0_category"].values
    L1 = amenity_gdf.loc[:, f"L1_category"].values
    L0_entropy_alt = altieri_entropy(points, L0, base=2).entropy
    L1_entropy_alt = altieri_entropy(points, L1, base=2).entropy
    
    del points
    gc.collect()
    
    return total_amenities, L0_entropy_alt, L1_entropy_alt

In [4]:
for filter in [0, 1, 2]:
    print(f"Filter {filter}")
    for i, gm in tqdm(gemeenten.iterrows(), total=len(gemeenten)):
        total_amenities, L0_entropy_alt, L1_entropy_alt = gm_total_amenities_entropy(gm["gemeentenaam"], filter)
        gemeenten.at[i, f"total_amenities_{filter}"] = total_amenities
        gemeenten.at[i, f"L0_altieri_{filter}"] = L0_entropy_alt
        gemeenten.at[i, f"L1_altieri_{filter}"] = L1_entropy_alt
        
        del total_amenities, L0_entropy_alt, L1_entropy_alt
        gc.collect()
    
gemeenten.to_parquet(f"data/gemeenten_parts/calculated/gemeenten_{part}.parquet")


Filter 0


100%|██████████| 57/57 [00:50<00:00,  1.13it/s]


Filter 1


100%|██████████| 57/57 [00:33<00:00,  1.69it/s]


Filter 2


100%|██████████| 57/57 [00:35<00:00,  1.63it/s]
