# Yearly Observations by municipality

Om de regionale verschillen ook in kaart te kunnen brengen, groeperen we niet enkel per jaar, maar ook per gemeente

In [1]:
import pandas as pd
import geopandas as gpd
import folium
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm # Logaritmische verdeling
import matplotlib.cm as cm
import matplotlib.colors as colors

# set the max columns to none
pd.set_option('display.max_columns', None)
# set the max columns to none
pd.set_option('display.max_rows', None)

## Gemeenten in België

In [3]:
## Geodataframe gemeenten in belgië
belgium = gpd.read_file('../resources/geodata/kontur_boundaries_BE_20230628.gpkg')
belgium_gemeenten = belgium[(belgium["osm_admin_level"] == '8')].copy()
belgium_gemeenten.head(5)

Unnamed: 0,admin_level,osm_admin_level,name,name_en,population,hasc,geometry
55,9,8,Tenneville,,2861.0,,"MULTIPOLYGON (((5.39024 50.07851, 5.39032 50.0..."
56,9,8,Rendeux,,2596.0,,"MULTIPOLYGON (((5.44198 50.22504, 5.44237 50.2..."
57,9,8,Boechout,,12318.0,,"MULTIPOLYGON (((4.47911 51.16276, 4.47986 51.1..."
58,9,8,Ville de Bruxelles - Stad Brussel,Brussels,220447.0,,"MULTIPOLYGON (((4.314 50.8934, 4.318 50.89451,..."
59,9,8,Spiere-Helkijn,,1939.0,,"MULTIPOLYGON (((3.32376 50.72324, 3.32429 50.7..."


## Load clean or gold data

In [42]:
yearly = f'../2_cleaning/clean_data/observations_yearly_clean.parquet'
boomklever = f'../3_transformation/gold/observations_bk.parquet'
halsbandparkiet = f'../3_transformation/gold/observations_hp.parquet'

df_yearly_birds = pd.read_parquet(yearly, engine="pyarrow")
df_boomklever = pd.read_parquet(boomklever, engine="pyarrow")
df_halsbandparkiet = pd.read_parquet(halsbandparkiet, engine="pyarrow")

## Load and transform clean data

In [44]:

## TODO dit kan met herbruikbare code    
# Halsbandparkiet Group observations by year and merge with yearly
df_halsbandparkiet["year"] = df_halsbandparkiet["date"].dt.year 
df_halsbandparkiet_yearly = df_halsbandparkiet.reset_index().groupby("year").agg({'observation_id': 'nunique', 'observer_id': 'nunique'}).rename(columns={'observation_id': 'observation_count', 'observer_id': 'observers_count'})
df_halsbandparkiet_yearly = df_yearly_birds.merge(df_halsbandparkiet_yearly, on='year', how='left')
    # Aandeel per jaarlijks miljoen vogelwaarnemingen
df_halsbandparkiet_yearly['observations_pym'] = df_halsbandparkiet_yearly['observation_count'] * 1_000_000 / df_halsbandparkiet_yearly['allbirds_observation_count'] 
    # 5 jaarlijks gemiddelde 
df_halsbandparkiet_yearly['observations_pym_5yr_avg'] = df_halsbandparkiet_yearly['observations_pym'].rolling(window=5, min_periods=1).mean()
    # % groei over 5 jaar
df_halsbandparkiet_yearly['observations_growth_5yr_%'] = df_halsbandparkiet_yearly['observations_pym_5yr_avg'].pct_change(periods=5) * 100

# Boomklever Group observations by year and merge with yearly
df_boomklever["year"] = df_boomklever["date"].dt.year
df_boomklever_yearly = df_boomklever.reset_index().groupby("year").agg({'observation_id': 'nunique', 'observer_id': 'nunique'}).rename(columns={'observation_id': 'observation_count', 'observer_id': 'observers_count'})
df_boomklever_yearly = df_yearly_birds.merge(df_boomklever_yearly, on='year', how='left')
df_boomklever_yearly['observations_pym'] = df_boomklever_yearly['observation_count'] * 1_000_000 / df_boomklever_yearly['allbirds_observation_count']
    # 5 jaarlijks gemiddelde 
df_boomklever_yearly['observations_pym_5yr_avg'] = df_boomklever_yearly['observations_pym'].rolling(window=5, min_periods=1).mean()
    # % groei over 5 jaar
df_boomklever_yearly['observations_growth_5yr_%'] = df_boomklever_yearly['observations_pym_5yr_avg'].pct_change(periods=5) * 100

# # merge the observation dataframes
df_observations_yearly = pd.merge(df_halsbandparkiet_yearly, df_boomklever_yearly, on=['year', 'allbirds_observation_count'], how='outer', suffixes=("_hp", "_bk"))

# # merge with year
df_observations_yearly.fillna(0, inplace=True) # geen waarnemingen -> 0 ipv NaN
df_observations_yearly.sort_index(ascending=True).head(10)



Unnamed: 0_level_0,allbirds_observation_count,observation_count_hp,observers_count_hp,observations_pym_hp,observations_pym_5yr_avg_hp,observations_growth_5yr_%_hp,observation_count_bk,observers_count_bk,observations_pym_bk,observations_pym_5yr_avg_bk,observations_growth_5yr_%_bk
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1971,2242,3,3,1338.09099,1338.09099,0.0,1.0,1.0,446.03033,446.03033,0.0
1972,5281,9,6,1704.222685,1521.156838,0.0,1.0,1.0,189.358076,317.694203,0.0
1973,6547,3,2,458.225141,1166.846272,0.0,0.0,0.0,0.0,317.694203,0.0
1974,9548,9,7,942.605781,1110.786149,0.0,0.0,0.0,0.0,317.694203,0.0
1975,9115,33,8,3620.405924,1612.710104,0.0,3.0,2.0,329.127811,321.505406,0.0
1976,7035,9,7,1279.317697,1600.955446,19.644737,0.0,0.0,0.0,259.242944,-41.877732
1977,7394,3,2,405.734379,1341.257785,-11.826463,1.0,1.0,135.244793,232.186302,-26.915159
1978,11301,16,9,1415.803911,1532.773539,31.360366,1.0,1.0,88.487744,184.286783,-41.9924
1979,15202,10,8,657.808183,1475.814019,32.862119,5.0,2.0,328.904092,220.44111,-30.612171
1980,23498,38,17,1617.158907,1075.164616,-33.331811,4.0,3.0,170.227253,180.715971,-43.79069


## Write result to parquet-file in "gold" folder

In [46]:
df_observations_yearly.to_parquet(f'./gold/yearly_observations.parquet', engine="pyarrow")