In [1]:
# --- Imports and utility function ---
import os
import numpy as np
import pandas as pd
import requests
from io import StringIO

def fetch_knmi_prec_evap(station: int, start_date: str, end_date: str):
    """
    Fetch KNMI daily data and compute precipitation and evapotranspiration (Hargreaves).
    
    Parameters:
    - station (int): KNMI station number (e.g., 249 for Berkhout)
    - start_date (str): Start date in 'YYYY-MM-DD' or 'YYYYMMDD' format
    - end_date (str): End date in 'YYYY-MM-DD' or 'YYYYMMDD' format
    
    Returns:
    - prec (pd.Series): Precipitation series in mm/day (float64)
    - evap (pd.Series): Evapotranspiration series in mm/day (float64)
    """
    # Convert dates to YYYYMMDD if needed
    start = start_date.replace('-', '')
    end = end_date.replace('-', '')
    url = 'https://www.daggegevens.knmi.nl/klimatologie/daggegevens'
    params = {
        'start': start,
        'end': end,
        'stns': str(station),
        'vars': 'Q:RH:TN:TX:TG',
        'fmt': 'csv'
    }
    response = requests.post(url, data=params)
    response.raise_for_status()
    csv_data = '\n'.join(line for line in response.text.splitlines() if not line.startswith('#'))


    knmi_df = pd.read_csv(StringIO(csv_data), header=None)
    knmi_df.columns = ['STN', 'DATE', 'Q', 'RH', 'TN', 'TX', 'TG']
    knmi_df['DATE'] = pd.to_datetime(knmi_df['DATE'], format='%Y%m%d')
    knmi_df['Q'] = knmi_df['Q'] * 0.01
    knmi_df['RH'] = knmi_df['RH'] / 10.0
    knmi_df['TN'] = knmi_df['TN'] / 10.0
    knmi_df['TX'] = knmi_df['TX'] / 10.0
    knmi_df['TG'] = knmi_df['TG'] / 10.0
    knmi_df.rename(columns={
        'Q': 'Radiation',
        'RH': 'Precipitation',
        'TN': 'Tmin',
        'TX': 'Tmax',
        'TG': 'Tavg'
    }, inplace=True)
    def hargreaves_pet(row):
        t_avg, t_max, t_min, ra = row['Tavg'], row['Tmax'], row['Tmin'], row['Radiation']
        if np.isnan(t_avg) or np.isnan(t_max) or np.isnan(t_min) or np.isnan(ra):
            return np.nan
        return 0.0023 * (t_avg + 17.8) * np.sqrt(t_max - t_min) * ra
    knmi_df['ET'] = knmi_df.apply(hargreaves_pet, axis=1)
    knmi_df = knmi_df.set_index('DATE')
    prec = knmi_df['Precipitation'].astype(float)
    evap = knmi_df['ET'].astype(float)
    return prec, evap

In [2]:
# --- Discover repo root and set up paths ---
from pathlib import Path
import os

# Find repo root by looking for pyproject.toml or .git
repo_root = Path.cwd()
for candidate in [repo_root] + list(repo_root.parents):
    if (candidate / 'pyproject.toml').exists() or (candidate / '.git').exists():
        repo_root = candidate
        break
print('Repo root:', repo_root)

# Set output directory relative to repo root
output_dir = repo_root / 'input_stressors'
os.makedirs(output_dir, exist_ok=True)
print('Output directory:', output_dir)


Repo root: d:\Users\jvanruitenbeek\data_validation
Output directory: d:\Users\jvanruitenbeek\data_validation\input_stressors


In [None]:
# --- Parameters ---
# Define the station and date range for the data pull.
station = 249  # Berkhout
start_date = '2020-01-01'
end_date = '2025-10-8'

In [4]:
# --- Download and save data ---
from datetime import datetime

prec, evap = fetch_knmi_prec_evap(station, start_date, end_date)

# Get current timestamp for filenames
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')

# Save precipitation
prec_filename = f'prec_station_{station}_{timestamp}.csv'
prec_path = output_dir / prec_filename
prec.to_csv(prec_path, header=True)
print(f'Precipitation saved to: {prec_path}')

# Save evapotranspiration
evap_filename = f'evap_station_{station}_{timestamp}.csv'
evap_path = output_dir / evap_filename
evap.to_csv(evap_path, header=True)
print(f'Evapotranspiration saved to: {evap_path}')


Precipitation saved to: d:\Users\jvanruitenbeek\data_validation\input_stressors\prec_station_249_20251009145954.csv
Evapotranspiration saved to: d:\Users\jvanruitenbeek\data_validation\input_stressors\evap_station_249_20251009145954.csv
