# CSV + KNMI Data Merger and Plotter
This script processes all CSV files in a folder, merges them with KNMI precipitation/evaporation data, computes recharge, and generates interactive Plotly plots per station.

In [3]:
import sys
import argparse
import pandas as pd
import plotly.graph_objects as go
from pathlib import Path
from scripts.knmi_pull import fetch_knmi_prec_evap

def get_station_code(filename):
    return Path(filename).stem

def cache_knmi(station, start, end, cache_dir):
    cache_dir.mkdir(exist_ok=True)
    fname = cache_dir / f'knmi_{station}_{start}_{end}.parquet'
    if fname.exists():
        df = pd.read_parquet(fname)
        return df['precipitation'], df['evaporation']
    prec, evap = fetch_knmi_prec_evap(249, start, end)
    df = pd.DataFrame({'precipitation': prec, 'evaporation': evap})
    df.to_parquet(fname)
    return prec, evap

def process_station(csv_path, output_dir, cache_dir, log):
    station = get_station_code(csv_path)
    try:
        df = pd.read_csv(csv_path, parse_dates=['Timestamp'])
    except Exception as e:
        log.append(f'{station}: Fout bij lezen CSV: {e}')
        return

    # Ensure the water level column is named "Waterniveau"
    if 'Waterniveau' not in df.columns:
        wn_col = next((c for c in df.columns if 'Waterniveau' in c), None)
        if wn_col:
            df = df.rename(columns={wn_col: 'Waterniveau'})
        else:
            log.append(f'{station}: Geen Waterniveau kolom gevonden!')
            return

    start = df['Timestamp'].min().strftime('%Y%m%d')
    end   = df['Timestamp'].max().strftime('%Y%m%d')

    try:
        prec, evap = cache_knmi(station, start, end, cache_dir)
    except Exception as e:
        log.append(f'{station}: Fout bij ophalen KNMI data: {e}')
        return

    if prec is None or evap is None or prec.empty or evap.empty:
        log.append(f'{station}: Geen KNMI data gevonden!')
        return

    knmi = pd.DataFrame({
        'Timestamp': pd.to_datetime(prec.index),
        'precipitation': prec.values,
        'evaporation':   evap.values
    })

    merged = pd.merge(df, knmi, on='Timestamp', how='left')
    merged['recharge'] = merged['precipitation'] - merged['evaporation']

    # Build the Plotly figure
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=merged['Timestamp'],
        y=merged['precipitation'],
        name='Precipitation',
        yaxis='y1'
    ))
    fig.add_trace(go.Scatter(
        x=merged['Timestamp'],
        y=merged['evaporation'],
        name='Evaporation',
        yaxis='y1'
    ))
    fig.add_trace(go.Scatter(
        x=merged['Timestamp'],
        y=merged['recharge'],
        name='Recharge',
        yaxis='y1'
    ))
    fig.add_trace(go.Scatter(
        x=merged['Timestamp'],
        y=merged['Waterniveau'],
        name='Waterniveau',
        yaxis='y2'
    ))

    fig.update_layout(
        title=f'Station {station} - Waterniveau & KNMI',
        xaxis_title='Timestamp',
        yaxis=dict(
            title='mm/dag (KNMI)',
            side='left'
        ),
        yaxis2=dict(
            title='Waterniveau',
            overlaying='y',
            side='right',
            showgrid=False
        ),
        legend_title='Variabele',
        hovermode='x unified',
        template='plotly_white'
    )

    out_html = output_dir / f'{station}_plot.html'
    fig.write_html(out_html)
    log.append(
        f'{station}: {len(merged)} tijdstippen, '
        f'{len(knmi)} KNMI records, '
        f'plot opgeslagen als {out_html.name}'
    )

def main():
    # Strip off any unknown args injected by IPython/Jupyter
    # (optional, parse_known_args handles them anyway)
    if 'ipykernel' in sys.argv[0]:
        sys.argv = [sys.argv[0]]

    parser = argparse.ArgumentParser(
        description='Generate precip/evap/recharge plots per station'
    )
    parser.add_argument(
        '--input',  type=str, default='data',
        help='Input folder with CSV files'
    )
    parser.add_argument(
        '--output', type=str, default='plots',
        help='Output folder for HTML plots'
    )
    parser.add_argument(
        '--cache',  type=str, default='knmi_cache',
        help='Cache folder for KNMI pulls'
    )

    # Use parse_known_args() so unknown flags like --f are ignored
    args, _ = parser.parse_known_args()

    input_dir  = Path(args.input)
    output_dir = Path(args.output)
    cache_dir  = Path(args.cache)
    output_dir.mkdir(exist_ok=True, parents=True)

    log = []
    for csv_path in input_dir.glob('*.csv'):
        process_station(csv_path, output_dir, cache_dir, log)

    print('\n'.join(log))

if __name__ == '__main__':
    main()


