In [1]:
import geopandas as gpd
import pandas as pd
import classes.entropycalculator as ec
from spatialentropy import altieri_entropy, leibovici_entropy
from scipy.stats import entropy
import numpy as np
import gc

from IPython.display import clear_output

from tqdm import tqdm
tqdm.pandas()

# select the part to handle
part = 2

In [2]:
gemeenten = gpd.read_parquet(f"data/gemeenten_parts/gemeenten_{part}.parquet")
gemeenten.head(10)


Unnamed: 0,gemeentecode,gemeentenaam,H2O,OAD,STED,BEV_DICHTH,AANT_INW,AANT_MAN,AANT_VROUW,P_00_14_JR,...,L1_shannon_1,total_amenities_2,L0_shannon_2,L1_shannon_2,RE_L0_0,RE_L1_0,RE_L0_1,RE_L1_1,RE_L0_2,RE_L1_2
114,GM0388,Enkhuizen,NEE,1399.0,3.0,1491.0,18885.0,9458.0,9427.0,15.0,...,4.276599,221.0,2.818619,3.966315,0.122551,0.119384,0.100239,0.109884,0.093794,0.106318
115,GM0392,Haarlem,NEE,3632.0,1.0,5662.0,165396.0,81110.0,84286.0,17.0,...,4.419858,2282.0,2.847964,4.138774,0.112731,0.119079,0.12881,0.129247,0.126839,0.131294
116,GM0394,Haarlemmermeer,NEE,1535.0,2.0,823.0,162300.0,80689.0,81611.0,17.0,...,3.589451,3742.0,2.61563,3.351302,0.173381,0.176337,0.104439,0.112439,0.100255,0.104773
117,GM0396,Heemskerk,NEE,2374.0,2.0,1446.0,39431.0,19281.0,20150.0,15.0,...,3.961083,468.0,2.689258,3.662773,0.190199,0.188025,0.218789,0.213396,0.205555,0.211739
118,GM0397,Heemstede,NEE,1786.0,2.0,3029.0,27778.0,13143.0,14635.0,18.0,...,3.80334,612.0,2.63214,3.555453,0.013928,0.018408,0.012832,0.019982,0.013752,0.01898
119,GM0399,Heiloo,NEE,1375.0,3.0,1300.0,24319.0,12012.0,12307.0,14.0,...,4.04249,306.0,2.919451,3.795664,0.140928,0.165559,0.187709,0.206413,0.188768,0.214238
120,GM0400,Den Helder,NEE,1687.0,2.0,1254.0,56539.0,27996.0,28543.0,14.0,...,4.338977,965.0,2.873984,4.034949,0.168547,0.165263,0.074329,0.097285,0.074364,0.093694
121,GM0402,Hilversum,NEE,2787.0,1.0,2046.0,93327.0,45960.0,47367.0,17.0,...,4.327325,1361.0,2.862375,4.039374,0.169844,0.154923,0.18527,0.161206,0.181783,0.168952
122,GM0405,Hoorn,NEE,1723.0,2.0,3692.0,75216.0,37063.0,38153.0,16.0,...,4.120882,1585.0,2.785992,3.812829,0.102866,0.113418,0.114164,0.126279,0.101959,0.118817
123,GM0406,Huizen,NEE,2054.0,2.0,2609.0,41252.0,19889.0,21363.0,15.0,...,3.965853,560.0,2.683123,3.664461,0.173506,0.198292,0.249077,0.277244,0.244116,0.284729


In [3]:
def _get_shannon_entropy(labels, base=2):
    # get the total count of the labels
    total_count = len(labels)
    # get the unique labels and their counts
    _, label_counts = np.unique(labels, return_counts=True)

    probs = label_counts / total_count
    # get the entropy
    return entropy(probs, base=base)

def gm_total_amenities_entropy(gm_name, filter_i):
    L0_BLACKLIST, L1_BLACKLIST = ec.getfilter(filter_i)
    amenity_gdf = gpd.read_parquet(f"data/gm_amenities/amenities_{gm_name}.parquet")
    
    # apply filters
    amenity_gdf = amenity_gdf[~amenity_gdf.L0_category.isin(L0_BLACKLIST)]
    if L1_BLACKLIST:
        for key, value in L1_BLACKLIST.items():
            amenity_gdf = amenity_gdf[
                ~(
                    (amenity_gdf.L0_category == key)
                    & (amenity_gdf.L1_category.isin(value))
                )
            ]
    
    # total number of amenities
    total_amenities = len(amenity_gdf)
    
    points = [[point.x, point.y] for point in amenity_gdf.geometry]
    
    # calculate entropy
    L0 = amenity_gdf.loc[:, f"L0_category"].values
    L1 = amenity_gdf.loc[:, f"L1_category"].values
    L0_entropy_alt = altieri_entropy(points, L0, base=2).entropy
    L1_entropy_alt = altieri_entropy(points, L1, base=2).entropy
    
    del points
    gc.collect()
    
    return total_amenities, L0_entropy_alt, L1_entropy_alt

In [4]:
for filter in [0, 1, 2]:
    print(f"Filter {filter}")
    for i, gm in tqdm(gemeenten.iterrows(), total=len(gemeenten)):
        total_amenities, L0_entropy_alt, L1_entropy_alt = gm_total_amenities_entropy(gm["gemeentenaam"], filter)
        gemeenten.at[i, f"total_amenities_{filter}"] = total_amenities
        gemeenten.at[i, f"L0_altieri_{filter}"] = L0_entropy_alt
        gemeenten.at[i, f"L1_altieri_{filter}"] = L1_entropy_alt
        
        del total_amenities, L0_entropy_alt, L1_entropy_alt
        gc.collect()
    
gemeenten.to_parquet(f"data/gemeenten_parts/calculated/gemeenten_{part}.parquet")


Filter 0


100%|██████████| 57/57 [03:11<00:00,  3.36s/it]


Filter 1


100%|██████████| 57/57 [00:56<00:00,  1.01it/s]


Filter 2


100%|██████████| 57/57 [00:56<00:00,  1.01it/s]
