In [None]:
import requests
import threading
import os
import time
from datetime import datetime, timedelta
import pandas as pd

In [None]:
start_data = "1980-01-01"
end_date = "2025-04-03"
Location = "Rotterdam"
time_sleep = 1

In [None]:
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
batch_directory = f"batches_{timestamp}"

os.makedirs(batch_directory, exist_ok=True)

species_info = {
    # 185769: "rosse metselbij",
    # 6496: "zwanenbloem",
    # 7261: "zoete kers",
    # 410: "laatvlieger",
    # 712: "kleine vuurvlinder",
    # 6390: "steenbreekvaren",
    # 390: "egel",
    # 2135: "snoek",
    # 428: "vos",
    # 204: "zanglijster",
    6562: "Haagbeuk",
}

In [None]:

def daterange(start_date, end_date, step_years=1):
    """Yield start and end dates for each year step."""
    current = start_date
    while current < end_date:
        next_year = current.replace(year=current.year + step_years)
        yield (current, min(next_year - timedelta(days=1), end_date))
        current = next_year

def fetch_observations_for_species(species_id, results):
    print(f"\n Start ophalen van observaties voor soort {species_info[species_id]} (ID: {species_id})...\n")

    start_dt = datetime.strptime(start_data, "%Y-%m-%d")
    end_dt = datetime.strptime(end_date, "%Y-%m-%d")
    all_observations = []

    for batch_start, batch_end in daterange(start_dt, end_dt):
        retries = 5
        current_page = 1
        batch_observations = []
        date_after = batch_start.strftime("%Y-%m-%d")
        date_before = batch_end.strftime("%Y-%m-%d")
        success = False

        print(f"📅 [{species_info[species_id]}] Ophalen van {date_after} t/m {date_before}...")

        while True:
            api_url = (
                f"https://observation.org/api/v1/species/{species_id}/observations/"
                f"?limit=1000&country_id=166&date_after={date_after}&date_before={date_before}"
                f"&user=&location={Location}&page={current_page}"
            )

            response = requests.get(api_url)

            if response.status_code == 200:
                data = response.json()
                if 'detail' in data:
                    print(f"⚠️ Geen data in deze batch ({date_after} - {date_before})")
                    success = True
                    break

                observations = data.get("results", [])
                batch_observations.extend(observations)

                if len(observations) < 1000:
                    success = True
                    break

                current_page += 1
                time.sleep(time_sleep)

            elif response.status_code == 429 and retries > 0:
                print(f"⏳ Rate limit geraakt... wachten (poging {6 - retries}/5)")
                time.sleep(5)
                retries -= 1
            else:
                print(f"Fout ({response.status_code}) bij ophalen van batch {date_after} - {date_before}")
                break

        all_observations.extend(batch_observations)

        if success:
            print(f"Batch voltooid: {len(batch_observations)} observaties van {date_after} t/m {date_before}\n")
        else:
            print(f"Batch mislukt: {date_after} t/m {date_before}\n")

    with results['lock']:
        results['observations'][species_id] = all_observations

    print(f"Totaal opgehaald voor {species_info[species_id]}: {len(all_observations)} observaties.\n")


def fetch_all_observations():
    all_observations = {'observations': {}, 'lock': threading.Lock()}
    threads = []
    all_species_data = []

    for species_id in species_info.keys():
        thread = threading.Thread(target=fetch_observations_for_species, args=(species_id, all_observations))
        threads.append(thread)
        thread.start()

    for t in threads:
        t.join()

    for species_id, observations in all_observations['observations'].items():
        for obs in observations:
            obs['species_name'] = species_info[species_id]
        all_species_data.extend(observations)

    if all_species_data:
        df = pd.DataFrame(all_species_data)
        df.to_csv(os.path.join(batch_directory, "combined_observations.csv"), index=False, sep=';', encoding="utf-8")
        df.to_json(os.path.join(batch_directory, "combined_observations.json"), orient='records', lines=True, force_ascii=False)
        print(f"✔️ Gegevens opgeslagen in: {batch_directory}")
    else:
        print("⚠️ Geen observaties gevonden.")

fetch_all_observations()
