In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import re

# set the max columns to none
pd.set_option('display.max_columns', None)
# set the max columns to none
pd.set_option('display.max_rows', None)

In [2]:
file_observations = '../1_scraping/scraped_data/cleaned/observation_details_116_clean.csv'

# Load the data
halsbandparkiet = pd.read_csv(file_observations, header=0, index_col="observation_id")
print(f"observation count before dropna {halsbandparkiet.shape[0]}")

# drop observations with missing crucial data
halsbandparkiet = halsbandparkiet.dropna(subset = ['species_id', 'species_name', 'date', 'gps_coordinates']) 
print(f"observation count after dropna {halsbandparkiet.shape[0]}")

# clean columns
halsbandparkiet['date'] = pd.to_datetime(halsbandparkiet['date'], format="mixed", errors='coerce')
halsbandparkiet["species_id"] = halsbandparkiet["species_id"].astype(int) # int parsing

halsbandparkiet["amount"] = halsbandparkiet["amount"].apply(lambda x: re.findall(r"\d+", x)[0]).astype(int) # only keep the amount and cast as int

# location needs to stay a float, because there are a lot of NaN values
# observations_hp["location_id"] = observations_hp["location_id"].astype(int) # int parsing
halsbandparkiet_without_location = halsbandparkiet[halsbandparkiet["location_id"].isnull()]
print(f"observations without location {halsbandparkiet_without_location.shape[0]}")

# Longitude, Latitude
halsbandparkiet["Latitude"] = halsbandparkiet["gps_coordinates"].apply(lambda x: x.split(', ')[0]) # NB
halsbandparkiet["Longitude"] = halsbandparkiet["gps_coordinates"].apply(lambda x: x.split(', ')[1]) # OL

print(f"halsbandparkiet bevat {halsbandparkiet.shape[0]} observaties")
halsbandparkiet.head(5)

observation count before dropna 96950
observation count after dropna 96551
observations without location 857
halsbandparkiet bevat 96551 observaties


Unnamed: 0_level_0,species_id,species_name,species_name_scientific,validation,gps_coordinates,accuracy,source,date,amount,life_stage,activity,location_id,location,observer_id,observer_name,counting_method,method,Latitude,Longitude
observation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
336301369,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.8585, 4.6717",33m,iObs,2024-12-31 16:45:00,1,onbekend,ter plaatse,30919.0,Heverlee - Egenhoven - Egenhoven Bos (VB),40236.0,Frederik Fluyt,onbekend,onbekend,50.8585,4.6717
336233189,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.8044, 4.6010",30m,Showcase,2024-12-31 16:00:00,1,onbekend,ter plaatse,30976.0,Loonbeek - Korenheide (VB),3528.0,Externe bron,geen,onbekend,50.8044,4.601
336205268,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.8584, 4.2921",3m,ObsMapp,2024-12-31 15:29:00,6,onbekend,foeragerend,32596.0,Berchem-Ste-Agathe/St-Agatha-Berchem - Koninck...,72841.0,Sara Velghe,onbekend,gezien en gehoord,50.8584,4.2921
336194978,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.9315, 4.3675",13m,ObsIdentify,2024-12-31 15:21:00,1,onbekend,ter plaatse,23260.0,Grimbergen (VB),359678.0,Kristof Vynckier,geen,onbekend,50.9315,4.3675
336193018,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.7396, 3.3351",14m,Site,2024-12-31 14:57:00,4,onbekend,ter plaatse,27358.0,Kooigem (WV),201247.0,Lore Vander Plancken,geen,gezien,50.7396,3.3351


In [3]:
# create geopandas
gdf_halsbandparkiet = gpd.GeoDataFrame(halsbandparkiet, 
                                       geometry=gpd.points_from_xy(halsbandparkiet.Longitude,halsbandparkiet.Latitude),
                                       crs="EPSG:4326")
gdf_halsbandparkiet.head()

Unnamed: 0_level_0,species_id,species_name,species_name_scientific,validation,gps_coordinates,accuracy,source,date,amount,life_stage,activity,location_id,location,observer_id,observer_name,counting_method,method,Latitude,Longitude,geometry
observation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
336301369,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.8585, 4.6717",33m,iObs,2024-12-31 16:45:00,1,onbekend,ter plaatse,30919.0,Heverlee - Egenhoven - Egenhoven Bos (VB),40236.0,Frederik Fluyt,onbekend,onbekend,50.8585,4.6717,POINT (4.6717 50.8585)
336233189,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.8044, 4.6010",30m,Showcase,2024-12-31 16:00:00,1,onbekend,ter plaatse,30976.0,Loonbeek - Korenheide (VB),3528.0,Externe bron,geen,onbekend,50.8044,4.601,POINT (4.601 50.8044)
336205268,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.8584, 4.2921",3m,ObsMapp,2024-12-31 15:29:00,6,onbekend,foeragerend,32596.0,Berchem-Ste-Agathe/St-Agatha-Berchem - Koninck...,72841.0,Sara Velghe,onbekend,gezien en gehoord,50.8584,4.2921,POINT (4.2921 50.8584)
336194978,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.9315, 4.3675",13m,ObsIdentify,2024-12-31 15:21:00,1,onbekend,ter plaatse,23260.0,Grimbergen (VB),359678.0,Kristof Vynckier,geen,onbekend,50.9315,4.3675,POINT (4.3675 50.9315)
336193018,116,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"50.7396, 3.3351",14m,Site,2024-12-31 14:57:00,4,onbekend,ter plaatse,27358.0,Kooigem (WV),201247.0,Lore Vander Plancken,geen,gezien,50.7396,3.3351,POINT (3.3351 50.7396)


In [4]:
gdf_halsbandparkiet.info(verbose=True)

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 96551 entries, 336301369 to 233392391
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   species_id               96551 non-null  int64         
 1   species_name             96551 non-null  object        
 2   species_name_scientific  96551 non-null  object        
 3   validation               96551 non-null  object        
 4   gps_coordinates          96551 non-null  object        
 5   accuracy                 96493 non-null  object        
 6   source                   92386 non-null  object        
 7   date                     96551 non-null  datetime64[ns]
 8   amount                   96551 non-null  int64         
 9   life_stage               96180 non-null  object        
 10  activity                 96180 non-null  object        
 11  location_id              95694 non-null  float64       
 12  location         

In [5]:
halsbandparkiet.describe(include='all')

Unnamed: 0,species_id,species_name,species_name_scientific,validation,gps_coordinates,accuracy,source,date,amount,life_stage,activity,location_id,location,observer_id,observer_name,counting_method,method,Latitude,Longitude
count,96551.0,96551,96551,96551,96551,96493,92386,96551,96551.0,96180,96180,95694.0,95694,96394.0,96394,96551,96551,96551.0,96551.0
unique,,1,1,5,63817,378,26,,,19,44,,3075,,6008,7,15,8078.0,11229.0
top,,Halsbandparkiet,Psittacula krameri,Goedgekeurd (automatische validatie),"51.0509, 4.3699",10m,Site,,,onbekend,ter plaatse,,Willebroek (AN),,Wim Van den Bossche,onbekend,onbekend,51.0509,4.3699
freq,,96551,96551,57468,961,24830,46233,,,87789,56128,,7041,,14471,79123,60631,1759.0,1202.0
mean,116.0,,,,,,,2019-07-30 01:02:40.495075072,4.90576,,,60560.293874,,88260.89,,,,,
min,116.0,,,,,,,1971-10-03 00:00:00,1.0,,,23088.0,,185.0,,,,,
25%,116.0,,,,,,,2017-08-14 00:00:00,1.0,,,24033.0,,40420.0,,,,,
50%,116.0,,,,,,,2020-08-19 19:35:00,2.0,,,29069.0,,46456.0,,,,,
75%,116.0,,,,,,,2022-12-03 00:00:00,3.0,,,41806.0,,94075.0,,,,,
max,116.0,,,,,,,2024-12-31 16:45:00,983.0,,,724090.0,,1058369.0,,,,,


In [6]:
import geopandas as gpd
import folium
from folium.plugins import HeatMap
import datetime
from IPython.display import display




# Filteren op datum
start_date = "1970-01-01"
end_date = "2000-01-01"
gdf_halsbandparkiet_range = gdf_halsbandparkiet[(gdf_halsbandparkiet["date"] > start_date) & (gdf_halsbandparkiet["date"] < end_date)].copy()

print(f"in het datumbereik zitten {gdf_halsbandparkiet_range.shape[0]}")

# Extract latitude and longitude from Point geometry
gdf_halsbandparkiet_range["lat"] = gdf_halsbandparkiet_range.geometry.y
gdf_halsbandparkiet_range["lon"] = gdf_halsbandparkiet_range.geometry.x


# Create a folium map centered on the data
m = folium.Map(location=[gdf_halsbandparkiet_range["lat"].mean(), gdf_halsbandparkiet_range["lon"].mean()], zoom_start=7, width=300, height=300)

# Add heatmap layer
heat_data = list(zip(gdf_halsbandparkiet_range["lat"], gdf_halsbandparkiet_range["lon"]))
HeatMap(heat_data, min_opacity=0.5, max_zoom=8, radius=5, overlay=True, show=True).add_to(m)

# Save or display the map
m.save("heatmap.html")
display(m)

in het datumbereik zitten 738


In [None]:
import geopandas as gpd
import folium
import pandas as pd
from folium.plugins import TimestampedGeoJson

# Zorg ervoor dat de datumkolom correct is
gdf_halsbandparkiet["date"] = pd.to_datetime(gdf_halsbandparkiet["date"])

# Extract latitude, longitude en jaartal
gdf_halsbandparkiet["lat"] = gdf_halsbandparkiet.geometry.y
gdf_halsbandparkiet["lon"] = gdf_halsbandparkiet.geometry.x
gdf_halsbandparkiet["year"] = gdf_halsbandparkiet["date"].dt.year  # Alleen jaartal

# Folium map initialiseren
m = folium.Map(
    location=[gdf_halsbandparkiet["lat"].mean(), gdf_halsbandparkiet["lon"].mean()], 
    zoom_start=8
)

# GeoJSON features aanmaken voor de slider
features = []
for year, group in gdf_halsbandparkiet.groupby("year"):
    for _, row in group.iterrows():
        features.append({
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [row["lon"], row["lat"]],
            },
            "properties": {
                "time": str(pd.to_datetime(year, format="%Y")),  # Tijdstempel voor slider
                "style": {"color": "red", "radius": 5, "fillOpacity": 0.6}
            }
        })

# Tijdgebaseerde heatmap toevoegen
TimestampedGeoJson(
    {"type": "FeatureCollection", "features": features},
    period="P1Y",  # Periode per jaar
    add_last_point=True,
    auto_play=True,
    loop=False,
    max_speed=100,
    loop_button=True,
    date_options="YYYY",
    time_slider_drag_update=True
).add_to(m)

# Opslaan en tonen
m.save("heatmap_with_slider.html")
m

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import ipywidgets as widgets
from ipywidgets import interact

# Example: Load region polygons
regions = gpd.read_file('path_to_regions.geojson')

def filter_by_year(gdf, year):
    return gdf[gdf['date'].dt.year == year]

def plot_heatmap(year):
    filtered_gdf = filter_by_year(gdf_halsbandparkiet, year)
    
    # Create a base plot with the region polygons
    # base = regions.plot(color='lightgray', edgecolor='black', figsize=(10, 10))
    
    # Plot the heatmap
    # filtered_gdf.plot(ax=base, marker='o', markersize=10, column='observation_value', cmap='viridis', legend=True, alpha=0.6)
    filtered_gdf.plot(marker='o', markersize=10, column='observation_value', cmap='viridis', legend=True, alpha=0.6)
    
    plt.title(f'Observations Heatmap for Year {year}')
    plt.show()
    
# Get the range of years in your data
min_year = gdf_halsbandparkiet['date'].dt.year.min()
max_year = gdf_halsbandparkiet['date'].dt.year.max()

# Create a slider
year_slider = widgets.IntSlider(value=min_year, min=min_year, max=max_year, step=1, description='Year:')

# Use interact to link the slider to the plot function
interact(plot_heatmap, year=year_slider)




In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns

# Plot heatmap
plt.figure(figsize=(10, 6))
sns.kdeplot(x=gdf_halsbandparkiet.geometry.x, y=gdf_halsbandparkiet.geometry.y, cmap="Reds", fill=True, levels=50, alpha=0.7)

# plt.scatter(x, y, s=5, color="black", alpha=0.5)  # Overlay points
plt.title("Spatial Density Heatmap")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()