In [14]:
import geopandas as gpd
import classes.entropycalculator as ec
import gc
import numpy as np
import pandas as pd

from tqdm import tqdm
tqdm.pandas()

from spatialentropy import altieri_entropy, leibovici_entropy
from scipy.stats import entropy

categorisation = pd.read_excel("data/categorisation.xlsx")

gm_list = ['Eindhoven', 'Amsterdam', 'Rotterdam', 'Utrecht', 'Groningen', 'Maastricht', 'Leeuwarden', 'Arnhem', 'Zwolle', "'s-Gravenhage"]
scales = ['wijken', 'buurten']
filters = [0, 1, 2]

In [15]:
def get_categorisation_filtered(categorisation, L0_filter, L1_filter):
    # Filter the categorisation dataframe based on the L0 and L1 categories
    categorisation = categorisation[~categorisation["L0 category"].isin(L0_filter)]
    if L1_filter:
        for key, value in L1_filter.items():
            categorisation = categorisation[
                ~(
                    (categorisation["L0 category"] == key)
                    & (categorisation["L1 category"].isin(value))
                )
            ]
    L0_cats = categorisation["L0 category"].unique()
    L1_cats = categorisation["L1 category"].unique()
    return L0_cats, L1_cats

In [16]:
def append_amenity_counts(gemeente):
    
    amenities_gm = gpd.read_parquet(f"data/gm_amenities/amenities_{gemeente}.parquet")
    
    for filteri in filters:
        
        L0_filter, L1_filter = ec.getfilter(filteri)
        L0_cats, L1_cats = get_categorisation_filtered(categorisation, L0_filter, L1_filter)
        
        for scale in scales:
            
            gdf = gpd.read_parquet(f"results/filter{filteri}/{scale}/{gemeente}_{scale}_{filteri}.parquet")
            
            for idx, row in tqdm(gdf.iterrows(), total=gdf.shape[0]):
                amenities = amenities_gm[amenities_gm.within(row["geometry"])]
                
                if amenities.empty:
                    for cat in L0_cats:
                        gdf.at[idx, f"L0_c_{cat}"] = 0
                    for cat in L1_cats:
                        gdf.at[idx, f"L1_c_{cat}"] = 0
                    continue
                
                L0_counts, L1_counts = amenities.value_counts("L0_category"), amenities.value_counts("L1_category")
                
                for cat in L0_cats:
                    gdf.at[idx, f"L0_c_{cat}"] = L0_counts.get(cat, 0)
                    
                for cat in L1_cats:
                    gdf.at[idx, f"L1_c_{cat}"] = L1_counts.get(cat, 0)
                    
            gdf.to_parquet(f"results/filter{filteri}/{scale}/{gemeente}_{scale}_{filteri}.parquet")
            
            del gdf
            del amenities
            del L0_counts
            del L1_counts
            gc.collect()
            

In [19]:
for gm in gm_list:
    print(f"Processing {gm}")
    append_amenity_counts(gm)

Processing Eindhoven


100%|██████████| 20/20 [00:00<00:00, 49.83it/s]
100%|██████████| 112/112 [00:00<00:00, 231.51it/s]
100%|██████████| 20/20 [00:00<00:00, 49.98it/s]
100%|██████████| 112/112 [00:00<00:00, 239.83it/s]
100%|██████████| 20/20 [00:00<00:00, 52.10it/s]
100%|██████████| 112/112 [00:00<00:00, 233.53it/s]


Processing Amsterdam


100%|██████████| 110/110 [00:03<00:00, 32.66it/s]
100%|██████████| 489/489 [00:03<00:00, 156.80it/s]
100%|██████████| 110/110 [00:03<00:00, 35.65it/s]
100%|██████████| 489/489 [00:02<00:00, 170.57it/s]
100%|██████████| 110/110 [00:03<00:00, 33.79it/s]
100%|██████████| 489/489 [00:02<00:00, 177.72it/s]


Processing Rotterdam


100%|██████████| 20/20 [00:01<00:00, 10.93it/s]
100%|██████████| 87/87 [00:01<00:00, 52.68it/s]
100%|██████████| 20/20 [00:01<00:00, 11.12it/s]
100%|██████████| 87/87 [00:01<00:00, 52.67it/s]
100%|██████████| 20/20 [00:01<00:00, 10.61it/s]
100%|██████████| 87/87 [00:01<00:00, 53.14it/s]


Processing Utrecht


100%|██████████| 10/10 [00:02<00:00,  4.92it/s]
100%|██████████| 110/110 [00:00<00:00, 119.23it/s]
100%|██████████| 10/10 [00:02<00:00,  4.87it/s]
100%|██████████| 110/110 [00:00<00:00, 131.65it/s]
100%|██████████| 10/10 [00:02<00:00,  4.81it/s]
100%|██████████| 110/110 [00:00<00:00, 134.50it/s]


Processing Groningen


100%|██████████| 20/20 [00:00<00:00, 25.45it/s]
100%|██████████| 141/141 [00:00<00:00, 191.25it/s]
100%|██████████| 20/20 [00:00<00:00, 25.42it/s]
100%|██████████| 141/141 [00:00<00:00, 209.06it/s]
100%|██████████| 20/20 [00:00<00:00, 25.37it/s]
100%|██████████| 141/141 [00:00<00:00, 204.14it/s]


Processing Maastricht


100%|██████████| 7/7 [00:00<00:00, 35.89it/s]
100%|██████████| 44/44 [00:00<00:00, 181.56it/s]
100%|██████████| 7/7 [00:00<00:00, 34.97it/s]
100%|██████████| 44/44 [00:00<00:00, 188.23it/s]
100%|██████████| 7/7 [00:00<00:00, 38.72it/s]
100%|██████████| 44/44 [00:00<00:00, 192.82it/s]


Processing Leeuwarden


100%|██████████| 22/22 [00:02<00:00, 10.34it/s]
100%|██████████| 128/128 [00:01<00:00, 97.23it/s] 
100%|██████████| 22/22 [00:02<00:00, 10.45it/s]
100%|██████████| 128/128 [00:01<00:00, 97.12it/s] 
100%|██████████| 22/22 [00:02<00:00, 10.45it/s]
100%|██████████| 128/128 [00:01<00:00, 96.91it/s] 


Processing Arnhem


100%|██████████| 24/24 [00:00<00:00, 81.45it/s]
100%|██████████| 83/83 [00:00<00:00, 176.02it/s]
100%|██████████| 24/24 [00:00<00:00, 83.16it/s]
100%|██████████| 83/83 [00:00<00:00, 199.78it/s]
100%|██████████| 24/24 [00:00<00:00, 89.15it/s]
100%|██████████| 83/83 [00:00<00:00, 207.58it/s]


Processing Zwolle


100%|██████████| 16/16 [00:00<00:00, 39.82it/s]
100%|██████████| 77/77 [00:00<00:00, 178.34it/s]
100%|██████████| 16/16 [00:00<00:00, 41.97it/s]
100%|██████████| 77/77 [00:00<00:00, 180.20it/s]
100%|██████████| 16/16 [00:00<00:00, 42.04it/s]
100%|██████████| 77/77 [00:00<00:00, 184.07it/s]


Processing 's-Gravenhage


100%|██████████| 43/43 [00:02<00:00, 15.42it/s]
100%|██████████| 111/111 [00:02<00:00, 44.13it/s]
100%|██████████| 43/43 [00:03<00:00, 13.44it/s]
100%|██████████| 111/111 [00:02<00:00, 44.43it/s]
100%|██████████| 43/43 [00:03<00:00, 13.44it/s]
100%|██████████| 111/111 [00:02<00:00, 42.68it/s]
