The SHM model was calibrated using 3 different methods. In this notebook we select the best calibration set for each basin, and get the variables of interest for each case

In [1]:
#Import necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# code for the hydrological model
def run_SHM(X_SHM, initial_states, param):

    #read initial states and parameters
    ss, sf, su, si, sb = initial_states
    dd, f_thr, sumax, beta, perc, kf, ki, kb = param 

    # initialize vector to store variables of interest
    q_out = np.zeros((X_SHM.shape[0], 1)) # final discharge
    states = np.zeros((X_SHM.shape[0], 5)) # internal states (buckets)
    outflows = np.zeros((X_SHM.shape[0], 3)) # discharge coming from each bucket

    # run model for each timestep
    for i, (p, pet, temp) in enumerate(X_SHM):
        
        # Snow module --------------------------
        if temp > 0: # if there is snowmelt
            qs_out = min(ss, dd*temp) # snowmelt from snow reservoir
            ss = ss - qs_out # substract snowmelt from snow reservoir
            qsp_out = qs_out + p # flow from snowmelt and rainfall
        else: # if the is no snowmelt
            ss=ss + p # precipitation accumalates as snow in snow reservoir
            qsp_out = 0.0

        # Split snowmelt+rainfall into inflow to fastflow reservoir and unsaturated reservoir ------
        qf_in = max(0, qsp_out-f_thr)
        qu_in = min(qsp_out, f_thr)

        # Fastflow module ----------------------
        sf = sf + qf_in
        qf_out = sf/ kf
        sf = sf - qf_out

        # Unsaturated zone----------------------
        psi = (su / sumax) ** beta #[-]
        su_temp = su + qu_in * (1 - psi)
        su = min(su_temp, sumax)
        qu_out = qu_in * psi + max(0.0, su_temp - sumax) # [mm]
        
        # Evapotranspiration -------------------
        klu = 0.9 # land use correction factor [-]
        if su <= 0.0:
            ktetha = 0.0
        elif su >= 0.8 * sumax:
            ktetha = 1.0
        else:
            ktetha = su / sumax

        ret = pet * klu * ktetha #[mm]
        su = max(0.0, su - ret) #[mm]

        # Interflow reservoir ------------------
        qi_in = qu_out * perc #[mm]
        si = si + qi_in #[mm]
        qi_out = si / ki #[mm]
        si = si - qi_out #[mm]

        # Baseflow reservoir -------------------
        qb_in = qu_out * (1 - perc) #[mm]
        sb = sb + qb_in #[mm]
        qb_out = sb / kb #[mm]
        sb = sb - qb_out #[mm]

        # Output
        q_out[i,0] = qf_out + qi_out + qb_out #[mm]
        states[i,:] = np.hstack((ss, sf, su, si, sb))
        outflows[i,:] = np.hstack((qf_out, qi_out, qb_out))

    return q_out, states, outflows

In [3]:
# Loss function
def nse_loss(sim, obs):
    nse_loss = np.sum((sim - obs)**2) / np.sum((obs - np.mean(obs))**2)
    return np.round(1.0-nse_loss,2)

In [4]:
# Initialize information
path_basins= '../data/CAMELS-GB/timeseries_v2/Selected_Basins_hybrid.csv'
path_ts = '../data/CAMELS-GB/timeseries_v2/'
path_SHM_data = '../results/models/SHM/'

buffer = 365 #warmup period
initial_conditions = [0.0, 1.0, 5.0, 10.0, 15.0] # (not too important because there is 1 year of warmup)
forcing=['date','precipitation', 'peti', 'temperature']
target=['date', 'discharge_spec']

#all the comparisons are made in testing period
testing_period = ['2005-10-01','2012-09-30']

# Read information
selected_basins_id= list(np.loadtxt(path_basins, skiprows=1).astype(int))

In [5]:
# Read the calibration results by each method, and select the best case. In other words, select the calibrated
# parameters (for each basin) that gave best results.

# Note: The SHM_XXX_calibration files are generating when running the scripts associated with each calibration method

# Read DREAM calibration
df_DREAM = pd.read_csv(path_SHM_data+'SHM_DREAM_calibration.csv')
df_DREAM.set_index('basin_id', inplace=True)

# Read SCE calibration
df_SCE = pd.read_csv(path_SHM_data+'SHM_SCE_calibration.csv')
df_SCE.set_index('basin_id', inplace=True)

# Read SGD calibration
df_SGD = pd.read_csv(path_SHM_data+'SHM_SGD_calibration.csv')
df_SGD.set_index('basin_id', inplace=True)

# The last column of each dataset is the NSE in testing.
last_column_values = pd.concat([df_DREAM.iloc[:, -1], df_SCE.iloc[:, -1], df_SGD.iloc[:, -1]], axis=1,  keys=['DREAM', 'SCE', 'SGD'])
max_value_index = last_column_values.idxmax(axis=1)

# Select the best parameter set for each basin
parameter_sets = pd.concat([df_DREAM[max_value_index=='DREAM'].iloc[:, 1:],
                            df_SCE[max_value_index=='SCE'].iloc[:, 1:],
                            df_SGD[max_value_index=='SGD'].iloc[:, 1:]], axis=0)

parameter_sets= parameter_sets.reindex(selected_basins_id)
parameter_sets.to_csv(path_SHM_data+'SHM_calibration.csv', index=True, header=True)

Run the model for each basin, using the best calibration parameters

In [6]:
# Lists to store the results for each basin
NSE_testing = []
storages_testing = []
outflow_testing = []

# Loop that goes through each basin
for i, basin in enumerate(selected_basins_id):
     
     # read input and target for the basin of interest
     path_timeseries = path_ts + 'CAMELS_GB_hydromet_timeseries_' + str(basin) + '.csv'
     df_ts = pd.read_csv(path_timeseries)  
     df_forcing = df_ts.loc[:, forcing]
     df_forcing = df_forcing.set_index('date')
     df_target = df_ts.loc[:, target]
     df_target = df_target.set_index('date')

     # Run SHM for testing period using the calibrated parameters
     df_forcing = df_forcing.loc[testing_period[0]:testing_period[1]]
     df = parameter_sets.loc[basin]
     param = np.ndarray.flatten(df.iloc[:-1].values).tolist()
     q_sim, states, outflow = run_SHM(df_forcing.to_numpy(), initial_conditions, param)
     
     # Observations for testing subset
     df_target = df_target.loc[testing_period[0]:testing_period[1]]
     q_obs = df_target.to_numpy().reshape((-1,1))
     
     # Calculate NSE in testing
     NSE_testing.append(nse_loss(sim=q_sim[buffer:].flatten(), obs=q_obs[buffer:].flatten()))
     
     #Store infromation of interest
     storages_testing.append(states[buffer:,:])
     outflow_testing.append(outflow[buffer:,:])
     
     # Print report
     print('Testing of basin:'+str(i+1)+'/'+str(len(selected_basins_id))+' with ID:'+str(basin)+' is completed-------------------------------')

# Export NSE of different basins to a txt
aux= [list(selected_basins_id), list(NSE_testing)]
df_NSE= pd.DataFrame(list(zip(*aux)), columns=['basin_id', 'NSE_SHM'])
df_NSE = df_NSE.set_index('basin_id')
df_NSE.to_csv(path_SHM_data+'NSE_SHM.txt', index=True, header=True)

Testing of basin:1/60 with ID:2001 is completed-------------------------------
Testing of basin:2/60 with ID:4001 is completed-------------------------------
Testing of basin:3/60 with ID:6007 is completed-------------------------------
Testing of basin:4/60 with ID:7001 is completed-------------------------------
Testing of basin:5/60 with ID:7002 is completed-------------------------------
Testing of basin:6/60 with ID:8004 is completed-------------------------------
Testing of basin:7/60 with ID:8005 is completed-------------------------------
Testing of basin:8/60 with ID:8006 is completed-------------------------------
Testing of basin:9/60 with ID:9002 is completed-------------------------------
Testing of basin:10/60 with ID:10003 is completed-------------------------------
Testing of basin:11/60 with ID:11001 is completed-------------------------------
Testing of basin:12/60 with ID:11003 is completed-------------------------------
Testing of basin:13/60 with ID:12001 is comple

Generate soil moisture series of SHM model

In [8]:
# Generate soil moisture series
df_sm = pd.DataFrame()

# Iterate over each station_id
for i, station_id in enumerate(selected_basins_id):
    #Change index of second list according to the reservoir of interest (ss, sf, su, si, sb)
    sm_series = storages_testing[i][:,2]
    df_sm[station_id] = sm_series
    
df_sm.index = df_forcing.loc['2006-10-01':'2012-09-30'].index
df_sm = df_sm.rename_axis('time') #to be consistent with the era5-land data
df_sm.to_csv(path_SHM_data+'SHM_sm.csv', index=True)

Flux division between the reservoirs (used later for analysis)

In [9]:
# Initialize an empty list to store DataFrames
dfs = []

# Iterate over each station_id
for i, station_id in enumerate(selected_basins_id):

    # Total ouflow coming out of each reservoir (for a given basin)
    outflow_series = np.sum(outflow_testing[i], axis=0)
    # Relative outflow coming out of each reservoir (for a given basin)
    percentage = outflow_series / np.sum(outflow_series)

    # Create a DataFrame for the current station_id
    data = {'basin_id': [station_id], 'qf': [percentage[0]], 'qi': [percentage[1]], 'qb': [percentage[2]]}
    df = pd.DataFrame(data)
    
    # Append the DataFrame to the list
    dfs.append(df)

# Concatenate all DataFrames in the list
df_discharges = pd.concat(dfs, ignore_index=True)
df_discharges.set_index('basin_id', inplace=True)

# Calculate the mean values over all the basins
print(df_discharges.mean().round(2))

qf    0.03
qi    0.66
qb    0.31
dtype: float64
