## Groundwater head model for well 22S108GU

1. Load data
2. Test all PET-methods in calibration and validation
3. Choose best PET-method
4. See calibration results
5. Hindcasting the data
6. Exporting hindcasted data

Joaquim Altimiras Granel, 2024

In [None]:
# Libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pastas as ps
import os

# Load data: Groundwater head observations
# Input path to dialy groundwater head observations
file_path=input("Enter the path to the head observations: ")
ho = pd.read_excel(file_path, usecols=[0, 1], index_col=0, parse_dates=True).squeeze()
# If you want to name the column "H" as 'head', you can rename it after reading.
ho.name = 'ho'

# Load data: Precipitation (m/day)
file_path = input("Enter the path to the precipitation data: ")
prec = pd.read_excel(file_path, parse_dates=True, index_col=0, names=['prec']).squeeze()

# Load data: Temperature
# Enter path
file_path = input("Enter the path to the temperature data: ")
# Read full file
temp = pd.read_excel(file_path, index_col=0, parse_dates=True) #, parse_dates=True, index_col=0)     #, names=['temp']).squeeze()
temp = temp.squeeze()

# Load data: PET
file_path=input("Enter the path to the PET data: ")
# Read the whole file
evap = pd.read_excel(file_path, parse_dates=True, index_col=0)
# Create individual Series for each evapotranspiration method
for i in range(1, 19):  # Assuming the first column is dates and is set as the index
    globals()[f'evap{i}'] = evap.iloc[:, i-1].squeeze()

In [None]:
# Create all model variations

# Time period definitions
tmin_cal = '2022-12-22'
tmax_cal = '2023-11-22'
tmin_val = '2023-11-22'
tmax_val = '2024-03-19'

# Extended period for the entire R² calculation
tmin_ext = tmin_cal
tmax_ext = tmax_val

# Create models
models = []  # Store models for later use or analysis
pet_method_names = evap.columns.tolist()  # Extract PET method names

for i, method_name in enumerate(pet_method_names):  # Iterate over each evapotranspiration series
    evap_selected = evap.iloc[:, i].squeeze()

    # Naming the model after the PET method
    model_name = f"model_{method_name}"
    ml = ps.Model(ho, name=model_name)
    
    # Add stress model 1 for recharge
    sm_rch = ps.RechargeModel(
        prec=prec,
        temp=temp,
        evap=evap_selected,
        rfunc=ps.Gamma(),
        name=f"rch_{method_name}",
        recharge=ps.rch.FlexModel(gw_uptake=True, snow=True)
    )
    ml.add_stressmodel(sm_rch)
    
    # Adjust parameters
    ml.set_parameter(f"rch_{method_name}_kv", vary=True)
    
    # Solve the model in two steps
    ml.solve(tmin=tmin_cal, tmax=tmax_cal, noise=False, fit_constant=False, report=False, solver=ps.LeastSquares())
    ml.set_parameter(f"rch_{method_name}_srmax", vary=False)
    ml.solve(tmin=tmin_cal, tmax=tmax_cal, noise=True, fit_constant=False, initial=False, report=False, solver=ps.LeastSquares())
    
    models.append(ml)



# Plot all PET-model results

from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error, r2_score

# Prepare DataFrame for R² values
r2_results = pd.DataFrame(columns=["Model", "R2_Calibration", "R2_Validation", "R2_Extended"])

# Setup figure
fig, ax = plt.subplots(figsize=(14, 7))

# Color palette to maintain consistency
colors = plt.cm.viridis(np.linspace(0, 1, len(models)))

# Loop through models for simulation and plotting
for idx, model in enumerate(models):
    # Calibration period
    sim_cal = model.simulate(tmin=tmin_cal, tmax=tmax_cal)
    obs_cal = ho[tmin_cal:tmax_cal]
    r2_cal = r2_score(obs_cal, sim_cal)

    # Validation period
    sim_val = model.simulate(tmin=tmin_val, tmax=tmax_val)
    obs_val = ho[tmin_val:tmax_val]
    r2_val = r2_score(obs_val, sim_val)

    # Extended period
    sim_ext = model.simulate(tmin=tmin_ext, tmax=tmax_ext)
    obs_ext = ho[tmin_ext:tmax_ext]
    r2_ext = r2_score(obs_ext, sim_ext)

    # Store R² results using pd.concat()
    r2_row = pd.DataFrame([{
        "Model": model.name, 
        "R2_Calibration": r2_cal, 
        "R2_Validation": r2_val,
        "R2_Extended": r2_ext
    }])
    r2_results = pd.concat([r2_results, r2_row], ignore_index=True)
    
    # Plotting results
    ax.plot(sim_cal.index, sim_cal, label=f"{model.name}", color=colors[idx])
    ax.plot(sim_val.index, sim_val, linestyle='--', color=colors[idx])  # Same color for consistency

# Plot observed data
ax.plot(obs_cal.index, obs_cal, color='black', linewidth=3, label="Observed - Calibration")
ax.plot(obs_val.index, obs_val, color='grey', linewidth=3, label="Observed - Validation", linestyle='--')

# Add a vertical line to mark the transition from calibration to validation
ax.axvline(x=pd.Timestamp("2023-12-01"), color='red', linestyle=':', linewidth=2, label='Start of Validation')

# Formatting plot
ax.set_title('Model Simulations across Calibration and Validation Periods')
ax.set_xlabel('Date')
ax.set_ylabel('Flow')
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
ax.grid(True)

plt.tight_layout()  # Adjust layout
plt.show()

# Print the table of R2 results with formatted decimal places
print("R² values for each model across Calibration, Validation, and Extended periods:")
pd.options.display.float_format = '{:,.3f}'.format  # Formatting float to three decimals
print(r2_results)

In [None]:
# Display method names

for i in range(1, 19):
    print(f"Method {i}: {evap.columns[i-1]}")

In [None]:
# Choice of PET method to use for displaying calibration results

evap_choice = input("Type the name of the method that should be used: ")

# Access the actual Series based on user input
if evap_choice in evap:
    evap_selected = evap[evap_choice]
else:
    print(f"Method {evap_choice} not found. Please check your input.")
    # You might want to handle this situation more gracefully, e.g., asking for input again or exiting the script

In [None]:
# Pastas model
# Model settings
# Time period definitions
tmin_cal = '2022-12-22'
tmax_cal = '2023-11-22'
tmin_val = '2023-11-22'
tmax_val = '2024-03-19'
tmin = tmin_cal
tmax = tmax_cal

# Create a model object by passing it the observed series
gw_model = ps.Model(ho, name="my_model")
# Select a recharge model
rm1 = ps.rch.FlexModel(
    gw_uptake=True, 
    snow=True
)
sm1 = ps.RechargeModel(
    prec, 
    evap_selected,
    temp = temp, 
    recharge = rm1, 
    rfunc = ps.Gamma(), 
    name = "rch"
)
gw_model.add_stressmodel(sm1)
# As the evaporation used is a very rough estimation, vary k_v
gw_model.set_parameter("rch_kv", vary=True)
# TWO-STEP SOLVE
# Step 1
gw_model.solve(
    tmin=tmin,
    tmax=tmax,
    noise=False,
    fit_constant=False,
    report=False,
    solver=ps.LeastSquares()
)
gw_model.set_parameter("rch_srmax", vary=False)
# Step 2
gw_model.solve(
    tmin=tmin,
    tmax=tmax,
    noise=True,
    fit_constant=False,
    initial=False,
    report=False,
    solver=ps.LeastSquares()
)
gw_model.plots.results(figsize=(12, 6))

In [None]:
# Hindcast

# Observations - Hindcasting

tmin_complete = pd.Timestamp("2012-01-01")
tmax_complete = pd.Timestamp("2024-04-01")

# Important that they are aligned in time
gw_head_complete = gw_model.simulate(tmin=tmin_complete, tmax=tmax_complete)

# Create a plot
plt.figure(figsize=(12, 6))
plt.plot(gw_head_complete.index, gw_head_complete, label='Simulated Groundwater Head', color='blue')
ho.plot(label='Observed GW Head', color='black', linewidth=2)
plt.title('Simulated Groundwater Head from 2012 to 2023')
plt.xlabel('Date')
plt.ylabel('Groundwater Level (m)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Export hindcasted data

output_file_name = 'GWM-22S108GU_hindcasted'
output_file_path = input('Enter path for output file (preferrably in folder 4):')

# Append .xlsx if not already in the file name
if not output_file_name.lower().endswith('.xlsx'):
    output_file_name += '.xlsx'

full_path = os.path.join(output_file_path, output_file_name)

gw_head_complete.to_excel(full_path, index=True)

print(f"Data successfully saved to {full_path}")