In [6]:
import geopandas as gpd
import pandas as pd
import classes.entropycalculator as ec
from spatialentropy import altieri_entropy, leibovici_entropy
from scipy.stats import entropy
import numpy as np
import gc
import shapely

from IPython.display import clear_output

from tqdm import tqdm
tqdm.pandas()


In [12]:
gemeenten = gpd.read_parquet("data/gemeenten_amenities.parquet")
wijken = gpd.read_parquet("data/wijken.parquet")

wijken.head(1)


Unnamed: 0,wijkcode,wijknaam,gemeentecode,gemeentenaam,IND_WBI,H2O,OAD,STED,BEV_DICHTH,AANT_INW,...,P_GEBBL_EU,P_GEBBL_NE,OPP_TOT,OPP_LAND,OPP_WATER,JRSTATCODE,JAAR,layer,path,geometry
0,WK001400,Centrum,GM0014,Groningen,1.0,NEE,6647.0,1.0,10132.0,23150.0,...,16.0,12.0,241.0,228.0,13.0,2023WK001400,2023,2023 — wijk_2023_v1zw,\\cbsp.nl\Productie\primair\TOP\Werk\KWB_buurt...,"POLYGON ((6.56023 53.22768, 6.56086 53.22668, ..."


In [9]:
def _get_shannon_entropy(labels, base=2):
    # get the total count of the labels
    total_count = len(labels)
    # get the unique labels and their counts
    _, label_counts = np.unique(labels, return_counts=True)

    probs = label_counts / total_count
    # get the entropy
    return entropy(probs, base=base)

def wk_total_amenities_entropy(gm_name, wijkarea, filter_i):
    L0_BLACKLIST, L1_BLACKLIST = ec.getfilter(filter_i)
    amenity_gdf = gpd.read_parquet(f"data/gm_amenities/amenities_{gm_name}.parquet")
    
    # filter out amenities not in the wijk
    amenity_gdf = amenity_gdf[amenity_gdf.within(wijkarea)]
    amenity_gdf.reset_index(drop=True, inplace=True)
    
    # apply filters
    amenity_gdf = amenity_gdf[~amenity_gdf.L0_category.isin(L0_BLACKLIST)]
    if L1_BLACKLIST:
        for key, value in L1_BLACKLIST.items():
            amenity_gdf = amenity_gdf[
                ~(
                    (amenity_gdf.L0_category == key)
                    & (amenity_gdf.L1_category.isin(value))
                )
            ]
    
    # total number of amenities
    total_amenities = len(amenity_gdf)
    
    points = [[point.x, point.y] for point in amenity_gdf.geometry]
    
    # calculate entropy
    L0 = amenity_gdf.loc[:, f"L0_category"].values
    L1 = amenity_gdf.loc[:, f"L1_category"].values
    L0_entropy = _get_shannon_entropy(L0, base=2)
    L1_entropy = _get_shannon_entropy(L1, base=2)
    # L0_entropy = altieri_entropy(points, L0, base=2).entropy
    # L1_entropy = altieri_entropy(points, L1, base=2).entropy
    
    del points
    gc.collect()
    
    return total_amenities, L0_entropy, L1_entropy

In [14]:
for filter in [0, 1, 2]:
    for i, wijk in tqdm(wijken.iterrows(), total=len(wijken)):
        gm_name = wijk.gemeentenaam
        wijkarea = wijk.geometry
        total_amenities, L0_entropy, L1_entropy = wk_total_amenities_entropy(gm_name, wijkarea, filter)
        
        wijken.at[i, f"total_amenities_{filter}"] = total_amenities
        wijken.at[i, f"L0_shannon_{filter}"] = L0_entropy
        wijken.at[i, f"L1_shannon_{filter}"] = L1_entropy
        
        del total_amenities, L0_entropy, L1_entropy
        gc.collect()

wijken.to_parquet("data/wijken_entropy.parquet")

 29%|██▉       | 965/3324 [32:55<1:20:28,  2.05s/it] 


KeyboardInterrupt: 

In [None]:
wijken