# Scan output_files/output_sheets for CSV Files

This notebook scans the `output_files/output_sheets` directory and lists all `.csv` files it finds.

Future steps: You can expand this notebook to read, process, or plot the data from these files.

In [None]:
# Standard library imports
import os
from pathlib import Path

# Define the directory to scan (relative path)
output_sheets_dir = Path('../output_files/output_sheets')

# List to store found CSV filenames
csv_files = []

# Scan the directory for .csv files
if output_sheets_dir.exists() and output_sheets_dir.is_dir():
    for filename in os.listdir(output_sheets_dir):
        if filename.lower().endswith('.csv'):
            csv_files.append(filename)
else:
    print(f'Directory not found: {output_sheets_dir}')

# Print the found CSV files
print('Found CSV files:')
for fname in csv_files:
    print(f'- {fname}')

# ---
# Future expansion:
# - Read CSV files
# - Process data
# - Plot results

In [None]:
# --- Pastas Model Analysis for Each CSV File ---
import pandas as pd
import pastas as ps
import numpy as np

# Example: set the relative paths to precipitation and evaporation input files
input_prec_path = '../input_files/input_prec/prec_station_249.csv'  # Update filename as needed
input_evap_path = '../input_files/input_evap/evap_station_249.csv'   # Update filename as needed

# Read precipitation and evaporation CSVs as pandas Series, always using first column as date and second as value
def read_timeseries_csv(path):
    df = pd.read_csv(path)
    date_col = df.columns[0]
    value_col = df.columns[1]
    df[date_col] = pd.to_datetime(df[date_col])
    df = df.set_index(date_col)
    series = df[value_col].dropna()
    return series

# Try reading the files and check if they are pandas Series with DateTimeIndex
try:
    input_prec = read_timeseries_csv(input_prec_path)
    assert isinstance(input_prec, pd.Series) and isinstance(input_prec.index, pd.DatetimeIndex)
    print(f"input_prec loaded: {input_prec.shape}, index type: {type(input_prec.index)}")
except Exception as e:
    print(f"Failed to load input_prec: {e}")

try:
    input_evap = read_timeseries_csv(input_evap_path)
    assert isinstance(input_evap, pd.Series) and isinstance(input_evap.index, pd.DatetimeIndex)
    print(f"input_evap loaded: {input_evap.shape}, index type: {type(input_evap.index)}")
except Exception as e:
    print(f"Failed to load input_evap: {e}")
# waterhoogte_daily_mean = ...  # pd.Series with datetime index (optional extra stressor)


In [None]:
input_evap.info()

In [None]:
# Ensure csv_files is defined and populated in this cell, in case previous cells were not run
import os
from pathlib import Path

output_sheets_dir = Path('../output_files/output_sheets')
csv_files = []
if output_sheets_dir.exists() and output_sheets_dir.is_dir():
    for filename in os.listdir(output_sheets_dir):
        if filename.lower().endswith('.csv'):
            csv_files.append(filename)
else:
    print(f'Directory not found: {output_sheets_dir}')

# Define model components
recharge_models = {
    "Linear": ps.rch.Linear(),
    "FlexModel": ps.rch.FlexModel(),
    "Berendrecht": ps.rch.Berendrecht()
}
response_functions = {
    "Exponential": ps.Exponential(),
    "Gamma": ps.Gamma(),
    "DoubleExponential": ps.DoubleExponential(),
    "Hantush": ps.Hantush(),
    "FourParam": ps.FourParam(),
}

results = []

# Cap the number of files to process
max_files = 3
csv_files_to_process = csv_files[:max_files]
print(f"Processing up to {max_files} CSV files (found {len(csv_files)})")

# --- Loop over each observation file ---
for csv_file in csv_files_to_process:
    print(f"\n=== Processing file: {csv_file} ===")
    try:
        df = pd.read_csv(output_sheets_dir / csv_file, parse_dates=["Timestamp"])
        df = df.rename(columns={"head": "head_raw"})
        df = df.set_index("Timestamp")
        head_daily_median = df["head_raw"].resample('D').median().dropna()

        # Determine time window for slicing input data
        start_date = head_daily_median.index.min()
        end_date = head_daily_median.index.max()

        # Slice preloaded input data to match this file's time range
        input_prec_slice = input_prec.loc[start_date:end_date].copy()
        input_evap_slice = input_evap.loc[start_date:end_date].copy()
        print(input_prec_slice[0:10])
        print(input_evap_slice[0:10])

        # Optional safety check to ensure alignment
        if not input_prec_slice.index.equals(head_daily_median.index):
            input_prec_slice = input_prec_slice.reindex(head_daily_median.index)
        if not input_evap_slice.index.equals(head_daily_median.index):
            input_evap_slice = input_evap_slice.reindex(head_daily_median.index)

        # Loop over recharge models and response functions
        # 1) Fit all recharge × response variants
        for rch_name, rch_model in recharge_models.items():
            for rfunc_name, rfunc in response_functions.items():
                model_name = f"{rch_name}_{rfunc_name}"
                print(f"  Running model: {model_name}")
                try:
                    ml = ps.Model(head_daily_median, name=model_name)
                    rm = ps.RechargeModel(
                        prec=input_prec_slice,
                        evap=input_evap_slice,
                        recharge=rch_model,
                        rfunc=rfunc,
                        name="rch"
                    )
                    ml.add_stressmodel(rm)
                    ml.add_noisemodel(ps.ArNoiseModel())
                    ml.solve(report=True)
                    stats = ml.stats
                    results.append({
                        "file": csv_file,
                        "model": model_name,
                        "RechargeModel": rch_name,
                        "RechargeRfunc": rfunc_name,
                        "EVP": stats.evp(),
                        "R2": stats.rsq(),
                        "RMSE": stats.rmse(),
                        "AIC": stats.aic(),
                        "BIC": stats.bic()
                    })
                except Exception as e:
                    print(f"    Model {model_name} failed: {e}")
                    results.append({
                        "file": csv_file,
                        "model": model_name,
                        "RechargeModel": rch_name,
                        "RechargeRfunc": rfunc_name,
                        "EVP": None,
                        "R2": None,
                        "RMSE": None,
                        "AIC": None,
                        "BIC": None,
                        "error": str(e)
                    })

        # 2) Now fit the TarsoModel once, using Exponential (the only supported rfunc)
        tarso_name = "Tarso_Exp"
        print(f"\n  Running Tarso model: {tarso_name}")
        try:
            ml2 = ps.Model(head_daily_median, name=tarso_name)
            tm = ps.TarsoModel(
                prec=input_prec_slice,
                evap=input_evap_slice,
                oseries=head_daily_median,  # lets Tarso auto-set dmin/dmax
                rfunc=ps.Exponential(),     # must be Exponential()
                name="tarso"
            )
            ml2.add_stressmodel(tm)
            ml2.add_noisemodel(ps.ArNoiseModel())
            ml2.solve(report=True)
            stats2 = ml2.stats
            results.append({
                "file": csv_file,
                "model": tarso_name,
                "RechargeModel": "Tarso",
                "RechargeRfunc": "Exponential",
                "EVP": stats2.evp(),
                "R2": stats2.rsq(),
                "RMSE": stats2.rmse(),
                "AIC": stats2.aic(),
                "BIC": stats2.bic()
            })
        except Exception as e:
            print(f"    Model {tarso_name} failed: {e}")
            results.append({
                "file": csv_file,
                "model": tarso_name,
                "RechargeModel": "Tarso",
                "RechargeRfunc": "Exponential",
                "EVP": None,
                "R2": None,
                "RMSE": None,
                "AIC": None,
                "BIC": None,
                "error": str(e)
            })
    except Exception as e:
        print(f"  Failed to process file {csv_file}: {e}")

# Combine results into a DataFrame
results_df = pd.DataFrame(results)
print("\nCombined results (first 10 rows):")
print(results_df.head(10))


In [None]:
results_df

In [11]:
# Define output path
output_path = "../output_files/model_results_monte_carlo.xlsx"

# Save DataFrame to Excel
results_df.to_excel(output_path, index=False)

print(f"Results saved to: {output_path}")

Results saved to: ../output_files/model_results_monte_carlo.xlsx
