In [1]:
import numpy as np
import pathlib
import pandas as pd
import sys
import time

In [2]:
# TODO - change these paths accordingly
sys.path.insert(1, '/Users/jay/Desktop/BA//HBV-SASK-py-tool')
# sys.path.insert(1, '/work/ga45met/mnt/linux_cluster_2/UQEFPP')

In [3]:
from common import utility
from hbv_sask import hbvsask_utility as hbv
from hbv_sask import HBVSASKModel as hbvmodel

In [4]:
# importing modules/libs for plotting
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

import matplotlib.pyplot as mp

from plotly.offline import plot

pd.options.plotting.backend = "plotly"

### Defining paths

In [5]:
# TODO - change these paths accordingly
hbv_model_data_path = pathlib.Path("/Users/jay/Desktop/BA/HBV-SASK-data/")
configurationObject = pathlib.Path('/Users/jay/Desktop/BA/HBV-SASK-py-tool/configurations/configuration_hbv_6D.json')
inputModelDir = hbv_model_data_path
basis = "Oldman_Basin"  # 'Banff_Basin'
# TODO - change this path accordingly
workingDir = hbv_model_data_path / basis / "model_runs" / "trial_single_run_hbvsaskmodel_21_Mar_2024"


# Creating Model Object

Creating a model object

In [6]:
writing_results_to_a_file = True
plotting = True
createNewFolder = True # creat a separate folder to save results for each model run

hbvsaskModelObject = hbvmodel.HBVSASKModel(
    configurationObject=configurationObject,
    inputModelDir=inputModelDir,
    workingDir=workingDir,
    basis=basis,
    writing_results_to_a_file=writing_results_to_a_file,
    plotting=plotting
)

In [7]:
# get to know some of the relevant time settings, read from a json configuration file
print(f"start_date: {hbvsaskModelObject.start_date}")
print(f"start_date_predictions: {hbvsaskModelObject.start_date_predictions}")
print(f"end_date: {hbvsaskModelObject.end_date}")
print(f"full_data_range is {len(hbvsaskModelObject.full_data_range)} hours including spin_up_length of {hbvsaskModelObject.spin_up_length} hours")
print(f"simulation_range is of length {len(hbvsaskModelObject.simulation_range)} hours")

start_date: 2004-04-30 00:00:00
start_date_predictions: 2007-04-30 00:00:00
end_date: 2007-06-30 00:00:00
full_data_range is 1157 hours including spin_up_length of 1095 hours
simulation_range is of length 62 hours


Examing the input/forcing data and ground-truth/measured data if on disposal...

In [8]:
hbvsaskModelObject.time_series_measured_data_df

Unnamed: 0_level_0,streamflow,precipitation,temperature
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004-04-30,15.1,0.00,4.86
2004-05-01,14.4,0.00,8.41
2004-05-02,17.1,0.01,11.52
2004-05-03,21.6,3.08,6.41
2004-05-04,21.7,1.88,9.09
...,...,...,...
2007-06-26,19.5,0.00,9.13
2007-06-27,17.8,0.00,11.33
2007-06-28,17.1,0.03,15.45
2007-06-29,17.2,3.35,14.96


In [9]:
hbvsaskModelObject._plot_input_data(read_measured_streamflow=True)

In [10]:
hbvsaskModelObject.precipitation_temperature_monthly_df

Unnamed: 0_level_0,monthly_average_PE,monthly_average_T
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.0,-9.43
2,0.0,-6.65
3,0.0,-4.24
4,0.41,0.95
5,1.89,6.12
6,2.95,10.08
7,3.62,13.6
8,3.16,12.93
9,2.0,8.5
10,0.78,3.13


In [11]:
hbvsaskModelObject.initial_condition_df

Unnamed: 0,TimeStamp,initial_SWE,initial_SMS,S1,S2,WatershedArea_km2
9251,2004-04-30,0.0,81.185385,0.477466,25.193458,1434.73


# Running a single model run without changing the parameter values

In [12]:
start = time.time()
## TODO: Solve Inconsistency
hbvsaskModelObject.initial_condition_df = hbvsaskModelObject.initial_condition_df.rename(columns={'initial_SWE': 'SWE', 'initial_SMS': 'SMS'})
results_array = hbvsaskModelObject.run(createNewFolder=createNewFolder)
end = time.time()
runtime = end - start
print(f"single execution of the model's run function take {runtime}; full_data_range is {len(hbvsaskModelObject.full_data_range)} hours including spin_up_length of {hbvsaskModelObject.spin_up_length} hours")

[HVBSASK INFO] [0] parameters: None
single execution of the model's run function take 0.11366605758666992; full_data_range is 1157 hours including spin_up_length of 1095 hours


## Examing the model output

Model run returns an array. \
Each element of the array is a tuple \
The first element of each tuple is a dictionary storing different info about model run \
The second element of each tuple is a runtime (which is as well stored in the above-mentioned dictionary)

In [13]:
print(type(results_array))
print(f"len of the resulted array is equal to the total number of model runs (one set of parameters one run) - {len(results_array)}")
print(type(results_array[0]))
print(f"The first element of each tuple is a {type(results_array[0][0])}")
print(f"The second element of each tuple is a {type(results_array[0][1])}")
print(f"runtime : {results_array[0][1]}")

<class 'list'>
len of the resulted array is equal to the total number of model runs (one set of parameters one run) - 1
<class 'tuple'>
The first element of each tuple is a <class 'dict'>
The second element of each tuple is a <class 'float'>
runtime : 0.06876611709594727


more about the result dictionary... it stores different dataframes

In [14]:
print(type(results_array[0]))
print(type(results_array[0][0]))
print(results_array[0][0].keys())

<class 'tuple'>
<class 'dict'>
dict_keys(['run_time', 'result_time_series', 'parameters_dict', 'state_df'])


In [15]:
results_array[0][0]['run_time']

0.06876611709594727

In [16]:
results_array[0][0]['parameters_dict']

{'index_run': 0,
 'TT': 0.0,
 'C0': 0.5,
 'ETF': 0.2,
 'FC': 250,
 'beta': 2.0,
 'FRAC': 0.3,
 'K2': 0.05,
 'LP': 0.5,
 'K1': 0.5,
 'alpha': 2.0,
 'UBAS': 1,
 'PM': 1,
 'M': 1.0,
 'VAR_M': 0.0001}

pd.DataFrame storing index_run, paramter values and values for different likelihood functions/goodness-of-fit (GoF) functions

In [17]:
## TODO: Solve gof_df
#results_array[0][0]['gof_df']

the output of the model is in the form of a time-series stored in a pd.DataFrame...

In [18]:
results_array[0][0]['result_time_series']

Unnamed: 0_level_0,Q_cms,Q_mm,AET,PET,Q1,Q1_routed,Q2,ponding,Index_run
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007-04-30,20.676424,1.245142,0.64698,0.64698,0.628367,0.628367,0.616775,1.920,0
2007-05-01,15.609417,0.940005,2.51748,2.51748,0.318442,0.318442,0.621563,3.890,0
2007-05-02,21.054685,1.267921,2.15838,2.15838,0.605052,0.605052,0.662870,5.565,0
2007-05-03,27.879960,1.678942,0.20412,0.20412,0.946452,0.946452,0.732490,15.090,0
2007-05-04,63.204394,3.806193,0.01890,0.01890,2.830228,2.830228,0.975965,1.785,0
...,...,...,...,...,...,...,...,...,...
2007-06-26,22.525692,1.356506,2.38950,2.38950,0.060610,0.060610,1.295896,0.000,0
2007-06-27,21.129815,1.272446,3.68750,3.68750,0.041344,0.041344,1.231101,0.000,0
2007-06-28,19.924423,1.199857,6.11830,6.11830,0.030310,0.030310,1.169546,0.030,0
2007-06-29,18.855321,1.135475,5.82920,5.82920,0.024022,0.024022,1.111453,3.350,0


In [19]:
results_array[0][0]['result_time_series'].columns

Index(['Q_cms', 'Q_mm', 'AET', 'PET', 'Q1', 'Q1_routed', 'Q2', 'ponding',
       'Index_run'],
      dtype='object')

In [20]:
results_array[0][0]['result_time_series'].index.name

'TimeStamp'

plotting input, predicted/simulated and measured time-series

In [21]:
fig = hbv._plot_streamflow_and_precipitation(
    input_data_df=hbvsaskModelObject.time_series_measured_data_df, 
    simulated_data_df=results_array[0][0]['result_time_series'], 
    input_data_time_column=hbvsaskModelObject.time_column_name,
    simulated_time_column=hbvsaskModelObject.time_column_name, 
    observed_streamflow_column=hbvsaskModelObject.streamflow_column_name,
    simulated_streamflow_column="Q_cms", 
    precipitation_columns=hbvsaskModelObject.precipitation_column_name)
fig.show()

column 'stramflow' contains the measured data, column "Q_cms" contains predicted data (i.e., streamflow expressed in cubic meters per second) by the model defined with the current values for the uncertain parameters

as a QoI, one can take Q_cms time-series (extract it from above dataframe), AET (Actual EvapoTranspiration), or some likelihood (i.e., goodness-of-fit (GoF)) function value

In [22]:
qoi = results_array[0][0]['result_time_series']["Q_cms"].values
qoi

array([20.67642415, 15.60941748, 21.05468485, 27.87995969, 63.20439448,
       16.76562951, 18.82081441, 23.95415098, 31.59834943, 39.17194414,
       29.38374504, 28.61607072, 31.92293107, 38.96363581, 73.3907838 ,
       27.8135932 , 34.81703938, 42.05029968, 42.51144287, 37.72170524,
       55.43797655, 38.5227341 , 67.99421218, 33.06559848, 75.16249147,
       30.76721418, 31.74679284, 39.14342856, 72.08637234, 72.82123876,
       39.23131788, 50.47691742, 54.31552619, 57.3579719 , 59.61898994,
       75.32888449, 79.97918524, 44.06043545, 70.64370264, 41.61838355,
       38.87580942, 36.87145241, 35.77933185, 33.71049513, 31.98402297,
       35.09174752, 38.14801655, 36.23354738, 63.99652873, 41.5650446 ,
       35.29129733, 30.68893578, 28.46806046, 26.74162198, 25.24103312,
       23.88035988, 22.62238247, 22.52569248, 21.12981535, 19.92442317,
       18.85532082, 20.55001684])

In [23]:
#qoi_2 = results_array[0][0]['gof_df']["RMSE"].values
#qoi_2

# Running a single model run with propagating the values for uncertain parameters specified in json configuration file

One can specify the direct values of the uncertain parameters in the form of a dictionary. The order and naming of the parameters has to follow the order from the configuration_json_file_dict["parameters"] 

In [24]:
parameter_value_dict = {'TT': -4.0, 'C0': 0.0, 'ETF': 0.0, 'FC': 50, 'FRAC': 0.1, 'K2': 0.025}

In [25]:
# parameter_value_dict = {'TT': -4.0, 'C0': 0.0, 'ETF': 0.0, 'FC': 50, 'FRAC': 0.1, 'K2': 0.025}

# parameter_value_dict = {'TT': 0.0, 'C0': 5.0, 'ETF': 0.5, 'FC': 100, 'FRAC': 0.5, 'K2': 0.025}

parameter_value_dict = {'TT': 0.0, 'C0': 0.5, 'ETF': 0.2, 'FC': 250, 'FRAC': 0.1, 'K2': 0.025}

In [26]:
hbvsaskModelObject.configurationObject

{'time_settings': {'start_day': 30,
  'start_month': 4,
  'start_year': 2004,
  'start_hour': 0,
  'start_minute': 0,
  'end_day': 30,
  'end_month': 6,
  'end_year': 2007,
  'end_hour': 0,
  'end_minute': 0,
  'run_full_timespan': 'False',
  'spin_up_length': 1095,
  'simulation_length': 61,
  'resolution': 'daily',
  'cut_runs': 'False',
  'timestep': 5},
 'model_settings': {'basis': 'Oldman_Basin',
  'plotting': 'False',
  'writing_results_to_a_file': 'False',
  'corrupt_forcing_data': 'False'},
 'model_paths': {'hbv_model_path': 'Hydro_Models/HBV-SASK-data'},
 'simulation_settings': {'qoi': 'Q_cms',
  'qoi_column': 'Q_cms',
  'autoregressive_model_first_order': 'False',
  'transform_model_output': 'None',
  'read_measured_data': 'True',
  'qoi_column_measured': 'streamflow',
  'objective_function_qoi': ['RMSE', 'LogNSE', 'KGE'],
  'calculate_GoF': 'False',
  'objective_function': ['MAE', 'MSE', 'RMSE', 'NRMSE', 'NSE', 'KGE'],
  'mode': 'continuous',
  'interval': 10,
  'min_periods

In [27]:
start = time.time()
results_array_changed_param = hbvsaskModelObject.run(
    i_s = [1,],
    parameters = [parameter_value_dict,],
    createNewFolder=createNewFolder,
    take_direct_value=True
)
end = time.time()
runtime = end - start
print(f"single execution of the model's run function take {runtime}; full_data_range is {len(hbvsaskModelObject.full_data_range)} hours including spin_up_length of {hbvsaskModelObject.spin_up_length} hours")

[HVBSASK INFO] [1] parameters: [{'TT': 0.0, 'C0': 0.5, 'ETF': 0.2, 'FC': 250, 'FRAC': 0.1, 'K2': 0.025}]
single execution of the model's run function take 0.10354304313659668; full_data_range is 1157 hours including spin_up_length of 1095 hours


## Examing the model output

In [28]:
results_array_changed_param[0][0]['parameters_dict']

{'index_run': 1,
 'TT': 0.0,
 'C0': 0.5,
 'ETF': 0.2,
 'FC': 250,
 'FRAC': 0.1,
 'K2': 0.025}

In [29]:
results_array_changed_param[0][0]['result_time_series']

Unnamed: 0_level_0,Q_cms,Q_mm,AET,PET,Q1,Q1_routed,Q2,ponding,Index_run
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007-04-30,13.429750,0.808745,0.64698,0.64698,0.220725,0.220725,0.588020,1.920,1
2007-05-01,12.371207,0.744999,2.51748,2.51748,0.148776,0.148776,0.596223,3.890,1
2007-05-02,13.450070,0.809968,2.15838,2.15838,0.182118,0.182118,0.627850,5.565,1
2007-05-03,15.507008,0.933838,0.20412,0.20412,0.255622,0.255622,0.678216,15.090,1
2007-05-04,27.145548,1.634715,0.01890,0.01890,0.793391,0.793391,0.841325,1.785,1
...,...,...,...,...,...,...,...,...,...
2007-06-26,29.314944,1.765357,2.38950,2.38950,0.025632,0.025632,1.739725,0.000,1
2007-06-27,28.501795,1.716389,3.68750,3.68750,0.020157,0.020157,1.696232,0.000,1
2007-06-28,27.733785,1.670139,6.11830,6.11830,0.016313,0.016313,1.653826,0.030,1
2007-06-29,27.007588,1.626408,5.82920,5.82920,0.013680,0.013680,1.612727,3.350,1


In [30]:
# dataframe containing predicted state data
state_df = results_array_changed_param[0][0]['state_df']

## TODO: Solve Inconsistency
state_df = state_df.rename(columns={'SWE': 'initial_SWE', 'SMS': 'initial_SMS'})
state_df

Unnamed: 0_level_0,initial_SWE,initial_SMS,S1,S2,WatershedArea_km2,Index_run
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-04-30,129.445,182.031214,0.664417,23.520800,1434.73,1
2007-05-01,127.525,182.286316,0.545484,23.848906,1434.73,1
2007-05-02,123.635,181.590707,0.603520,25.114000,1434.73,1
2007-05-03,120.220,182.061216,0.715013,27.128650,1434.73,1
2007-05-04,119.390,188.944257,1.259675,33.652989,1434.73,1
...,...,...,...,...,...,...
2007-06-27,0.000,154.796477,0.200784,67.849288,1434.73,1
2007-06-28,0.000,151.108977,0.180627,66.153055,1434.73,1
2007-06-29,0.000,145.009717,0.165410,64.509093,1434.73,1
2007-06-30,0.000,141.403426,0.264439,63.910748,1434.73,1


In [31]:
fig = px.line(state_df, x=state_df.index, y=['initial_SWE',],title="initial_SWE")
fig.show()

In [32]:
# fig = state_df.plot(x=state_df.index, y=["initial_SMS", "S1", "S2"], kind="line", )

fig = go.Figure()
fig.add_trace(go.Scatter(x=state_df.index,y=state_df["initial_SMS"],name="Soil Storage",))
fig.add_trace(go.Scatter(x=state_df.index,y=state_df["S1"],name="Fast Reservoir",))
fig.add_trace(go.Scatter(x=state_df.index,y=state_df["S2"],name="Slow Reservoir",))
fig.show()

In [33]:
#error_time_series = np.array(results_array_changed_param[0][0]['result_time_series']['streamflow'].values) - np.array(results_array_changed_param[0][0]['result_time_series']['Q_cms'].values)
#error_time_series



# Plotting the output and input data

In [34]:
results_array_changed_param

[({'run_time': 0.07126116752624512,
   'result_time_series':                 Q_cms      Q_mm      AET      PET        Q1  Q1_routed  \
   TimeStamp                                                                
   2007-04-30  13.429750  0.808745  0.64698  0.64698  0.220725   0.220725   
   2007-05-01  12.371207  0.744999  2.51748  2.51748  0.148776   0.148776   
   2007-05-02  13.450070  0.809968  2.15838  2.15838  0.182118   0.182118   
   2007-05-03  15.507008  0.933838  0.20412  0.20412  0.255622   0.255622   
   2007-05-04  27.145548  1.634715  0.01890  0.01890  0.793391   0.793391   
   ...               ...       ...      ...      ...       ...        ...   
   2007-06-26  29.314944  1.765357  2.38950  2.38950  0.025632   0.025632   
   2007-06-27  28.501795  1.716389  3.68750  3.68750  0.020157   0.020157   
   2007-06-28  27.733785  1.670139  6.11830  6.11830  0.016313   0.016313   
   2007-06-29  27.007588  1.626408  5.82920  5.82920  0.013680   0.013680   
   2007-06-30  27.

In [35]:
fig = hbv._plot_streamflow_and_precipitation(
    input_data_df=hbvsaskModelObject.time_series_measured_data_df, 
    simulated_data_df=results_array_changed_param[0][0]['result_time_series'], 
    input_data_time_column=hbvsaskModelObject.time_column_name,
    simulated_time_column=hbvsaskModelObject.time_column_name, 
    observed_streamflow_column=hbvsaskModelObject.streamflow_column_name,
    simulated_streamflow_column="Q_cms", 
    precipitation_columns=hbvsaskModelObject.precipitation_column_name)
fig.show()

In [36]:
hbvsaskModelObject.time_series_measured_data_df.columns

Index(['streamflow', 'precipitation', 'temperature'], dtype='object')

In [37]:
results_array_changed_param[0][0]['result_time_series'].columns

Index(['Q_cms', 'Q_mm', 'AET', 'PET', 'Q1', 'Q1_routed', 'Q2', 'ponding',
       'Index_run'],
      dtype='object')

In [38]:
## TODO: Solve Inconsistency
results_array_changed_param[0][0]['state_df'] = results_array_changed_param[0][0]['state_df'].rename(columns={'SWE': 'initial_SWE', 'SMS': 'initial_SMS'})
results_array_changed_param[0][0]['state_df'].columns

Index(['initial_SWE', 'initial_SMS', 'S1', 'S2', 'WatershedArea_km2',
       'Index_run'],
      dtype='object')

In [39]:
# or more detailed plotting of precipitation and temperature as main input data
# predicted streamflow and measured one
# and state data...
result_df = results_array_changed_param[0][0]['result_time_series']
state_df = results_array_changed_param[0][0]['state_df']
parsed_input_data_df = hbvsaskModelObject.time_series_measured_data_df.loc[
    result_df.index.min():result_df.index.max()]

fig = make_subplots(
    rows=6, cols=1,
    subplot_titles=("Temperature", "Precipitation", "Streamflow", "EvapoTranspiration", "Snow Storage", "Soil+Reservoirs")
)

fig.add_trace(
    go.Scatter(
        x=parsed_input_data_df.index, y=parsed_input_data_df['temperature'],
        text=parsed_input_data_df['temperature'], 
        name="Temperature"
    ), 
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
        x=parsed_input_data_df.index, y=parsed_input_data_df['precipitation'],
        text=parsed_input_data_df['precipitation'], 
        name="Precipitation"
    ), 
    row=2, col=1
)

fig.add_trace(
    go.Scatter(
        x=parsed_input_data_df.index, y=parsed_input_data_df['streamflow'],
        name="Observed Streamflow"
    ),
    row=3, col=1
)
fig.add_trace(
    go.Scatter(
        x=result_df.index, y=result_df['Q_cms'],
        text=result_df['Q_cms'], 
        name="Predicted Streamflow"
    ), 
    row=3, col=1
)

fig.add_trace(
    go.Scatter(
        x=result_df.index, y=result_df['AET'],
        text=result_df['AET'], 
        name="AET"
    ), 
    row=4, col=1
)
fig.add_trace(
    go.Scatter(
        x=result_df.index, y=result_df['PET'],
        text=result_df['PET'], 
        name="PET"
    ), 
    row=4, col=1
)

fig.add_trace(
    go.Scatter(
        x=state_df.index, y=state_df['initial_SWE'],
        text=state_df['initial_SWE'], 
        name="Snow Storage"
    ), 
    row=5, col=1
)
fig.add_trace(
    go.Scatter(
        x=state_df.index, y=state_df['initial_SMS'],
        text=state_df['initial_SMS'], 
        name="Soil Storage"
    ), 
    row=6, col=1
)
fig.add_trace(
    go.Scatter(
        x=state_df.index, y=state_df['S1'],
        text=state_df['S1'], 
        name="Fast Reservoir"
    ), 
    row=6, col=1
)
fig.add_trace(
    go.Scatter(
        x=state_df.index, y=state_df['S2'],
        text=state_df['S2'], 
        name="Slow Reservoir"
    ), 
    row=6, col=1
)
fig.update_layout(height=1000, width=800, title_text="Detailed plot of most important time-series")
fig.show()

# Reading the saved output of the model

In [40]:
# paths to saved output from model run
path_to_input = hbv_model_data_path / basis
i = 1 # index of model run of interest
if createNewFolder:
    flux_output_file = workingDir / f"run_{i}" / f"flux_df_{i}.pkl"
    state_output_file = workingDir / f"run_{i}" / f"state_df_{i}.pkl"
else:
    flux_output_file = workingDir / f"flux_df_{i}.pkl"
    state_output_file = workingDir / f"state_df_{i}.pkl"

In [41]:
flux_df = pd.read_pickle(flux_output_file, compression="gzip")
flux_df

Unnamed: 0_level_0,Q_cms,Q_mm,AET,PET,Q1,Q1_routed,Q2,ponding,Index_run
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007-04-30,13.429750,0.808745,0.64698,0.64698,0.220725,0.220725,0.588020,1.920,1
2007-05-01,12.371207,0.744999,2.51748,2.51748,0.148776,0.148776,0.596223,3.890,1
2007-05-02,13.450070,0.809968,2.15838,2.15838,0.182118,0.182118,0.627850,5.565,1
2007-05-03,15.507008,0.933838,0.20412,0.20412,0.255622,0.255622,0.678216,15.090,1
2007-05-04,27.145548,1.634715,0.01890,0.01890,0.793391,0.793391,0.841325,1.785,1
...,...,...,...,...,...,...,...,...,...
2007-06-26,29.314944,1.765357,2.38950,2.38950,0.025632,0.025632,1.739725,0.000,1
2007-06-27,28.501795,1.716389,3.68750,3.68750,0.020157,0.020157,1.696232,0.000,1
2007-06-28,27.733785,1.670139,6.11830,6.11830,0.016313,0.016313,1.653826,0.030,1
2007-06-29,27.007588,1.626408,5.82920,5.82920,0.013680,0.013680,1.612727,3.350,1


In [42]:
state_df = pd.read_pickle(state_output_file, compression="gzip")
state_df

Unnamed: 0_level_0,SWE,SMS,S1,S2,WatershedArea_km2,Index_run
TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-04-30,129.445,182.031214,0.664417,23.520800,1434.73,1
2007-05-01,127.525,182.286316,0.545484,23.848906,1434.73,1
2007-05-02,123.635,181.590707,0.603520,25.114000,1434.73,1
2007-05-03,120.220,182.061216,0.715013,27.128650,1434.73,1
2007-05-04,119.390,188.944257,1.259675,33.652989,1434.73,1
...,...,...,...,...,...,...
2007-06-27,0.000,154.796477,0.200784,67.849288,1434.73,1
2007-06-28,0.000,151.108977,0.180627,66.153055,1434.73,1
2007-06-29,0.000,145.009717,0.165410,64.509093,1434.73,1
2007-06-30,0.000,141.403426,0.264439,63.910748,1434.73,1


In [43]:
# re-computation of some qoodnes-of-fit/likelihood functions
gof_list = ["MAE", "MSE", "RMSE", "NRMSE", "NSE", "LogNSE", "KGE"]
gof_dict = utility.calculateGoodnessofFit_simple(
    measuredDF = flux_df,
    predictedDF = flux_df,
    gof_list = gof_list,
    measuredDF_time_column_name=hbvsaskModelObject.time_column_name,
    measuredDF_column_name=hbvsaskModelObject.streamflow_column_name,
    simulatedDF_time_column_name=hbvsaskModelObject.time_column_name,
    simulatedDF_column_name='Q_cms',
    return_dict=True,
)
gof_dict

TypeError: calculateGoodnessofFit_simple() missing 1 required positional argument: 'simulatedDF'