In [6]:
import geopandas as gpd
import pandas as pd
import classes.entropycalculator as ec
from spatialentropy import altieri_entropy, leibovici_entropy
from scipy.stats import entropy
import numpy as np
import gc
import shapely

from IPython.display import clear_output

from tqdm import tqdm
tqdm.pandas()


In [7]:
gemeenten = gpd.read_parquet("data/gemeenten_amenities.parquet")
# wijken = gpd.read_parquet("data/wijken.parquet")

In [8]:
def _get_shannon_entropy(labels, base=2):
    # get the total count of the labels
    total_count = len(labels)
    # get the unique labels and their counts
    _, label_counts = np.unique(labels, return_counts=True)

    probs = label_counts / total_count
    # get the entropy
    return entropy(probs, base=base)

def wk_total_amenities_entropy(gm_name, wijkarea, filter_i):
    L0_BLACKLIST, L1_BLACKLIST = ec.getfilter(filter_i)
    amenity_gdf = gpd.read_parquet(f"data/gm_amenities/amenities_{gm_name}.parquet")
    
    # filter out amenities not in the wijk
    amenity_gdf = amenity_gdf[amenity_gdf.within(wijkarea)]
    amenity_gdf.reset_index(drop=True, inplace=True)
    
    # apply filters
    amenity_gdf = amenity_gdf[~amenity_gdf.L0_category.isin(L0_BLACKLIST)]
    if L1_BLACKLIST:
        for key, value in L1_BLACKLIST.items():
            amenity_gdf = amenity_gdf[
                ~(
                    (amenity_gdf.L0_category == key)
                    & (amenity_gdf.L1_category.isin(value))
                )
            ]
    
    # total number of amenities
    total_amenities = len(amenity_gdf)
    
    points = [[point.x, point.y] for point in amenity_gdf.geometry]
    
    # calculate entropy
    L0 = amenity_gdf.loc[:, f"L0_category"].values
    L1 = amenity_gdf.loc[:, f"L1_category"].values
    # L0_entropy = _get_shannon_entropy(L0, base=2)
    # L1_entropy = _get_shannon_entropy(L1, base=2)
    try:
        L0_entropy = altieri_entropy(points, L0, base=2).entropy
    except:
        L0_entropy = 0
    try:
        L1_entropy = altieri_entropy(points, L1, base=2).entropy
    except:
        L1_entropy = 0
    
    del points, amenity_gdf, L0, L1
    gc.collect()
    
    return total_amenities, L0_entropy, L1_entropy

In [9]:
for filter in [1,2]:
    for part in [5]:
        wijken = gpd.read_parquet(f"data/wijken_parts/wijken_{part}a.parquet")

        for i, wijk in tqdm(wijken.iterrows(), total=len(wijken)):
            gm_name = wijk.gemeentenaam
            wijkarea = wijk.geometry
            total_amenities, L0_entropy, L1_entropy = wk_total_amenities_entropy(gm_name, wijkarea, filter)
            
            # wijken.at[i, f"total_amenities_{filter}"] = total_amenities
            wijken.at[i, f"L0_altieri_{filter}"] = L0_entropy
            wijken.at[i, f"L1_altieri_{filter}"] = L1_entropy
            
            del total_amenities, L0_entropy, L1_entropy
            gc.collect()

    wijken.to_parquet(f"data/wijken_parts/wijken_{part}a.parquet")

  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
100%|██████████| 554/554 [04:35<00:00,  2.01it/s]
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
  w = w / w.sum()
  pz = pz / pz.sum()
100%|██████████| 554/554 [04:25<00:00,  2.09it/s]


In [10]:
wijken.head(3)

Unnamed: 0,wijkcode,wijknaam,gemeentecode,gemeentenaam,IND_WBI,H2O,OAD,STED,BEV_DICHTH,AANT_INW,...,L1_shannon_1,total_amenities_2,L0_shannon_2,L1_shannon_2,L0_altieri_0,L1_altieri_0,L0_altieri_1,L1_altieri_1,L0_altieri_2,L1_altieri_2
2770,WK189404,Wijk 04 Baarlo,GM1894,Peel en Maas,1.0,NEE,578.0,4.0,379.0,6525.0,...,4.068821,111.0,2.815561,3.859455,6.167353,7.416161,5.271475,7.450864,5.174209,7.150595
2771,WK189405,Wijk 05 Meijel,GM1894,Peel en Maas,1.0,NEE,511.0,4.0,329.0,6495.0,...,3.839635,107.0,2.704935,3.532372,6.437904,7.838048,5.028334,6.759525,4.96694,6.222664
2772,WK189500,Wijk 00 Winschoten,GM1895,Oldambt,1.0,NEE,1349.0,3.0,929.0,18640.0,...,4.317456,253.0,2.98056,4.083537,6.438359,8.171252,5.957922,8.679044,5.958244,8.236982
