## Crtl+F for 'change' to replace Path names etc.

### Training and evaluating a MTS-LSTM
1) Loading and/or adjusting a configuration file

2) Training a MTS-LSTM based on the configuration file 

3) Evaluating the model on the testing period/data (get streamflow simulations)

4) Determine metrics for each test catchment and create a plot with Qobs and Qsim for each catchment (first for daily, then for hourly results)

### Imports etc.

In [1]:
import pickle
import os
from pathlib import Path
import xarray as xr

import matplotlib.pyplot as plt
import pandas as pd

from neuralhydrology.evaluation import metrics, get_tester
from neuralhydrology.evaluation.evaluate import start_evaluation
from neuralhydrology.nh_run import start_run, eval_run
from neuralhydrology.utils.config import Config

import torch
from ruamel.yaml import YAML  

In [5]:
os.environ['NUMEXPR_NUM_THREADS'] = '8'

import numexpr as ne

In [3]:
print(torch.cuda.get_device_name(0))

NVIDIA GeForce RTX 3090


## Configuration

- fill in work_dir

In [4]:
# specify needed paths
work_dir = Path('C:/Users/kwilbrand/Katharina/My_MTS-LSTM') 
assert work_dir.exists()
os.chdir(work_dir)
os.getcwd()

'C:\\Users\\kwilbrand\\Katharina\\My_MTS-LSTM'

- fill in experiment name (as specified in config file)
- fill in values for hyperparameters (optional)

In [5]:
experiment_name = '00_nldas2_camels'  # change name accordingly


# # values for hyperparameter tuning:
# batch_size = 2048    # [256, 2048]
# hidden_size = 64    # [64, 128]
# dropout = 0.2        # [0.2, 0.4]
# validate_every = 30  # <= nr of epochs

- load and adjust config file 
- comment first lines in next cell out if no configuration template is used and/or configuration file just needs to be loaded

In [10]:
config_template_path= Path("configs/US/00_nldas2_camels.yml")  # change path accordingly
assert config_template_path.exists()

yaml = YAML()
run_config = yaml.load(config_template_path)

run_config["experiment_name"] = experiment_name
# run_config["batch_size"] = batch_size
# run_config["hidden_size"] = {'1D': hidden_size, '1H': hidden_size}
# run_config["output_dropout"] = dropout
# run_config["validate_every"] = validate_every

config_path = Path("configs/US/" + experiment_name + ".yml") # change path accordingly
yaml.dump(run_config, config_path)

run_config = Config(config_path)
# print('model:\t\t', run_config.model)
# print('use_frequencies:', run_config.use_frequencies)
# print('seq_length:\t', run_config.seq_length)
# print('dataset:\t', run_config.dataset)
# print('target_variables:\t', run_config.target_variables)
# print('dynamic_inputs:\t', run_config.dynamic_inputs)
# print('static_attributes:\t', run_config.static_attributes)

model:		 mtslstm
use_frequencies: ['1D', '1H']
seq_length:	 {'1D': 365, '1H': 336}
dataset:	 hourly_camels_us
target_variables:	 ['qobs_mm_per_hour']


## Training

In [6]:
start_run(config_file=config_path)

## Evaluation
- fill in run_dir (printed in output log from training above, "experiment_name_date_time")
- output log is also saved as txt file in run_dir

In [7]:
run_dir = Path("C:/Users/kwilbrand/Katharina/My_MTS-LSTM/runs/00_nldas2_camels_2009_125155")
# config_path = Path('configs/....yml')
run_config = Config(Path(config_path))

results =  start_evaluation(cfg=run_config, run_dir=run_dir, period='test')

### Determine metrics for each test catchment (daily and hourly)
- "results" is nested dictionary: 
- Qobs and Qsim are in xr dataset: results[catchment_ID][frequency]['xr']
- frequency is one of '1D' or '1H'
- fill in correct variable names for Qobs and Qsim 

In [17]:
# # Use this code cell to re-load test-results
# with open(run_dir / 'test/model_epoch030/test_results.p', 'rb') as f:
#     results = pickle.load(f)


#  results['01434025']['1D']['xr']

In [2]:
# names of dataset variables qsim and qobs (look up in results[catchm_ID][freq]['xr'])
qobs_var = 'qobs_mm_per_hour_obs'
qsim_var = 'qobs_mm_per_hour_sim'

# dataframe to store metrics for each catchment
metrics_df = pd.DataFrame()

# directory to store figures of time-sereis plots with observed and simulated Q
plot_dir = Path(str(run_dir) + '/time-series_plots')
if plot_dir.exists() == False:
    os.mkdir(plot_dir)

for catchm_ID in results.keys():
    # extract observations and simulations
    daily_qobs = results[catchm_ID]["1D"]["xr"][qobs_var]
    daily_qsim = results[catchm_ID]["1D"]["xr"][qsim_var]

    fig, ax = plt.subplots(figsize=(20,4))
    ax.plot(daily_qobs["date"], daily_qobs, label="Observed")
    ax.plot(daily_qsim["date"], daily_qsim, label="Simulated")
    ax.legend()
    ax.set_ylabel("Discharge (mm/h)")
    ax.set_title(f"Test period, catchment {catchm_ID} - daily NSE {results[catchm_ID]['1D']['NSE_1D']:.3f}")
    plt.savefig(str(plot_dir) + '/daily_' + catchm_ID)

    # Calculate some metrics
    values = metrics.calculate_all_metrics(daily_qobs.isel(time_step=-1), daily_qsim.isel(time_step=-1))
    df_temp = pd.DataFrame(values, index=[catchm_ID])
    metrics_df = metrics_df.append(other=df_temp)

#         plt.close(fig)

with open(str(run_dir) + '/metrics_df_daily.pkl', 'wb') as f:
    pickle.dump(metrics_df, f, pickle.DEFAULT_PROTOCOL)

print("Daily metrics:")
metrics_df

In [1]:
# names of dataset variables qsim and qobs
qobs_var = 'qobs_mm_per_hour_obs'
qsim_var = 'qobs_mm_per_hour_sim'

# dataframe to store metrics for each catchment
metrics_df = pd.DataFrame()

# directory to store figures of time-sereis plots with observed and simulated Q
plot_dir = Path(str(run_dir) + '/time-series_plots')
if plot_dir.exists() == False:
    os.mkdir(plot_dir)

for catchm_ID in results.keys():
    # extract a date slice of observations and simulations
    hourly_xr = results[catchm_ID]["1H"]["xr"].sel(date=slice("10-1995", None))

    # The hourly data is indexed with two indices: The date (in days) and the time_step (the hour within that day).
    # As we want to get a continuous plot of several days' hours, we select all 24 hours of each day and then stack
    # the two dimensions into one consecutive datetime dimension.
    hourly_xr = hourly_xr.isel(time_step=slice(-24, None)).stack(datetime=['date', 'time_step'])
    hourly_xr['datetime'] = hourly_xr.coords['date'] + hourly_xr.coords['time_step']

    hourly_qobs = hourly_xr[qobs_var]
    hourly_qsim = hourly_xr[qsim_var]

    fig, ax = plt.subplots(figsize=(20,4))
    ax.plot(hourly_qobs["datetime"], hourly_qobs, label="Observation")
    ax.plot(hourly_qsim["datetime"], hourly_qsim, label="Simulation")
    ax.set_ylabel("Discharge (mm/h)")
    ax.set_title(f"Test period, {catchm_ID} - hourly NSE {results[catchm_ID]['1H']['NSE_1H']:.3f}")
    _ = ax.legend()
    plt.savefig(str(plot_dir) + '/hourly_' + catchm_ID)
#         plt.close(fig)

    values = metrics.calculate_all_metrics(hourly_qobs, hourly_qsim, resolution='1H')
    df_temp = pd.DataFrame(values, index=[catchm_ID])
    metrics_df = metrics_df.append(other=df_temp)

# save metrics dataframe as pickle file
with open(str(run_dir) + '/metrics_df_hourly.pkl', 'wb') as f:
    pickle.dump(metrics_df, f, pickle.DEFAULT_PROTOCOL)
    
print("Hourly metrics:")
metrics_df