# Correlation



In [24]:
import pandas as pd
import plotly.express as px

In [25]:
to_plot_year = 2024
percentile = 0.9

labels = {"observations_growth_5yr_%_hp": "Groei halsbandparkiet (%)",
          "observations_growth_5yr_%_bk": "Groei boomklever (%)",
          "observers_per_km2_hp": "Waarnemers halsbandparkiet per km2",
          "observers_per_km2_bk": "Waarnemers boomklever per km2",
          "observations_per_km2_hp": "Waarnemingen halsbandparkiet per km2",
          "observations_per_km2_bk": "Waarnemingen boomklever per km2",
          "location_name": "Locatie"}
hover_data=['location_name', 'observers_per_km2_hp', 'observations_growth_5yr_%_hp', 'observations_growth_5yr_%_bk', 'observers_per_km2_bk']
size="observers_per_km2_hp"
color="observers_per_km2_hp"

In [26]:
df_bylocation = pd.read_parquet(f'../../../3_transformation/gold/yearly_observations_by_location.parquet', engine="pyarrow")
df_bylocation_2024 = df_bylocation[df_bylocation["year"] == to_plot_year].copy()


correlation_set = df_bylocation_2024.copy()

print(f"Number of locations in {to_plot_year}: {correlation_set.shape[0]}")
correlation_set = correlation_set[correlation_set["observation_count_hp"] > df_bylocation_2024['observation_count_hp'].quantile(percentile)].copy()
correlation_set = correlation_set[correlation_set['observation_count_bk'] > df_bylocation_2024['observation_count_bk'].quantile(percentile)].copy()
correlation_set = correlation_set[correlation_set['observations_per_km2_hp'] > df_bylocation_2024['observations_per_km2_hp'].quantile(percentile)].copy()
correlation_set = correlation_set[correlation_set['observations_per_km2_bk'] > df_bylocation_2024['observations_per_km2_bk'].quantile(percentile)].copy()
correlation_set = correlation_set[correlation_set['observers_per_km2_hp'] > df_bylocation_2024['observers_per_km2_hp'].quantile(percentile)].copy()
correlation_set = correlation_set[correlation_set['observers_per_km2_bk'] > df_bylocation_2024['observers_per_km2_bk'].quantile(percentile)].copy()
correlation_set = correlation_set[correlation_set['observers_count_hp'] > df_bylocation_2024['observers_count_hp'].quantile(percentile)].copy()
correlation_set = correlation_set[correlation_set['observers_count_bk'] > df_bylocation_2024['observers_count_bk'].quantile(percentile)].copy()
correlation_set['observations_growth_5yr_%_hp'] = correlation_set['observations_growth_5yr_%_hp'].clip(lower=-100, upper=100)
correlation_set['observations_growth_5yr_%_bk'] = correlation_set['observations_growth_5yr_%_bk'].clip(lower=-100, upper=100)
print(f"Number of locations in {to_plot_year} for percentile above {percentile}: {correlation_set.shape[0]}")

Number of locations in 2024: 13966
Number of locations in 2024 for percentile above 0.9: 95


## Correlatie tussen groei van de boomklever en van de halsbandparkiet

In [27]:
x = "observations_growth_5yr_%_hp"
y = "observations_growth_5yr_%_bk"

title = f'{labels[y]} ifv {labels[x].lower()}'
subtitle = f'Jaar {to_plot_year} - Voorkomen halsbandparkieten en boomklevers boven percentiel: {percentile}'

# Calculate Pearson correlation coefficient
corr = correlation_set[x].corr(correlation_set[y])
print(f"Correlatie coëfficient: {corr}")

fig = px.scatter(correlation_set, x=x, y=y, color=color, size=size, labels=labels, hover_data=hover_data, title=f'{title} (correlatiecoëfficient = {corr:.2f})', subtitle = subtitle)

# Tekstgrootte aanpassen
fig.update_layout(
    title_font=dict(size=22),     # Titelgrootte
    xaxis_title_font=dict(size=18),  # X-as titel
    yaxis_title_font=dict(size=18),  # Y-as titel
    font=dict(size=14),           # Algemene fontgrootte (bijv. tick labels)
)

fig.write_html(f"plots/{to_plot_year} - P{percentile}: {title}.html")
# fig.show()

Correlatie coëfficient: 0.6313269928440254


## Correlatie tussen waarnemingen/km2 van de halsbandparkiet en de groei van de boomklever

In [28]:
x = "observations_per_km2_hp"
y = "observations_growth_5yr_%_bk"

title = f'{labels[y]} ifv {labels[x].lower()}'
subtitle = f'Jaar {to_plot_year} - Voorkomen halsbandparkieten en boomklevers boven percentiel: {percentile}'

# Calculate Pearson correlation coefficient
corr = correlation_set[x].corr(correlation_set[y])
print(f"Correlatie coëfficient: {corr}")

fig = px.scatter(correlation_set, x=x, y=y, color=color, size=size, labels=labels, hover_data=hover_data, title=f'{title} (correlatiecoëfficient = {corr:.2f})', subtitle = subtitle)

# Tekstgrootte aanpassen
fig.update_layout(
    title_font=dict(size=22),     # Titelgrootte
    xaxis_title_font=dict(size=18),  # X-as titel
    yaxis_title_font=dict(size=18),  # Y-as titel
    font=dict(size=14),           # Algemene fontgrootte (bijv. tick labels)
)

fig.write_html(f"plots/{to_plot_year} - P{percentile}: {title}.html")
# fig.show()

Correlatie coëfficient: -0.14722436236824799


## Correlatie tussen waarnemers/km2 van de halsbandparkiet en de groei van de boomklever

In [29]:
x = "observers_per_km2_hp"
y = "observations_growth_5yr_%_bk"

title = f'{labels[y]} ifv {labels[x].lower()}'
subtitle = f'Jaar {to_plot_year} - Voorkomen halsbandparkieten en boomklevers boven percentiel: {percentile}'

# Calculate Pearson correlation coefficient
corr = correlation_set[x].corr(correlation_set[y])
print(f"Correlatie coëfficient: {corr}")

fig = px.scatter(correlation_set, x=x, y=y, color=color, size=size, labels=labels, hover_data=hover_data, title=f'{title} (correlatiecoëfficient = {corr:.2f})', subtitle = subtitle)

# Tekstgrootte aanpassen
fig.update_layout(
    title_font=dict(size=22),     # Titelgrootte
    xaxis_title_font=dict(size=18),  # X-as titel
    yaxis_title_font=dict(size=18),  # Y-as titel
    font=dict(size=14),           # Algemene fontgrootte (bijv. tick labels)
)

fig.write_html(f"plots/{to_plot_year} - P{percentile}: {title}.html")
# fig.show()



Correlatie coëfficient: -0.2186312073323372


## Correlatie tussen waarnemers/km2 van de halsbandparkiet en de boomklever

In [30]:
x = "observers_per_km2_hp"
y = "observers_per_km2_bk"

title = f'{labels[y]} ifv {labels[x].lower()}'
subtitle = f'Jaar {to_plot_year} - Voorkomen halsbandparkieten en boomklevers boven percentiel: {percentile}'

# Calculate Pearson correlation coefficient
corr = correlation_set[x].corr(correlation_set[y])
print(f"Correlatie coëfficient: {corr}")

fig = px.scatter(correlation_set, x=x, y=y, color=color, size=size, labels=labels, hover_data=hover_data, title=f'{title} (correlatiecoëfficient = {corr:.2f})', subtitle = subtitle)

# Tekstgrootte aanpassen
fig.update_layout(
    title_font=dict(size=22),     # Titelgrootte
    xaxis_title_font=dict(size=18),  # X-as titel
    yaxis_title_font=dict(size=18),  # Y-as titel
    font=dict(size=14),           # Algemene fontgrootte (bijv. tick labels)
)

fig.write_html(f"plots/{to_plot_year} - P{percentile}: {title}.html")
# fig.show()

Correlatie coëfficient: 0.8774244602872946
