In [3]:
import pickle
from pathlib import Path
import matplotlib.pyplot as plt
import torch
from neuralhydrology.evaluation import metrics
from neuralhydrology.nh_run import start_run, eval_run

In [9]:
run_config_file = "../../config_files/pretrain/pretrain_snotel.yml"
print('training: ' + run_config_file)

# Check if MPS backend is available
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    print("Using MPS backend")
    
    # Start training with MPS backend
    start_run(config_file=Path(run_config_file))

# Fall back to CUDA if available
elif torch.cuda.is_available():
    print("Using CUDA backend")
    
    # Start training with CUDA backend
    start_run(config_file=Path(run_config_file))

# Fall back to CPU-only mode
else:
    print("Using CPU backend")
    
    # Start training with CPU backend
    start_run(config_file=Path(run_config_file), gpu=-1)


training: ../../config_files/pretrain/pretrain_snotel.yml
Using MPS backend
2024-06-13 09:42:01,204: Logging to /Users/joshsturtevant/Documents/lynker/projects/nasa_water/neuralhydrology/nasa_co_swe/job_scripts/test/runs/pretrain_hs128_lossMSE_lr1e-3down_HA_snotel_seed4_1306_094201/output.log initialized.
2024-06-13 09:42:01,204: ### Folder structure created at /Users/joshsturtevant/Documents/lynker/projects/nasa_water/neuralhydrology/nasa_co_swe/job_scripts/test/runs/pretrain_hs128_lossMSE_lr1e-3down_HA_snotel_seed4_1306_094201
2024-06-13 09:42:01,205: ### Run configurations for pretrain_hs128_lossMSE_lr1e-3down_HA_snotel_seed4
2024-06-13 09:42:01,205: experiment_name: pretrain_hs128_lossMSE_lr1e-3down_HA_snotel_seed4
2024-06-13 09:42:01,206: train_basin_file: ../../basin_txt_files/excluding_co_camels_basins_531.txt
2024-06-13 09:42:01,206: validation_basin_file: ../../basin_txt_files/co_camels_basins.txt
2024-06-13 09:42:01,207: test_basin_file: ../../basin_txt_files/co_camels_basins

### Evaluate run on test set
The run directory that needs to be specified for evaluation is printed in the output log above. Since the folder name is created dynamically (including the date and time of the start of the run) you will need to change the `run_dir` argument according to your local directory name. By default, it will use the same device as during the training process.

In [5]:
run_dir = Path("runs/test_run_30_colorado_hydroatlas_basin_1106_095251")
#eval_run(run_dir=run_dir, period="test")

eval_run(run_dir=run_dir, period="train")

IndexError: list index out of range

### Load and inspect model predictions
Next, we load the results file and compare the model predictions with observations. The results file is always a pickled dictionary with one key per basin (even for a single basin). The next-lower dictionary level is the temporal resolution of the predictions. In this case, we trained a model only on daily data ('1D'). Within the temporal resolution, the next-lower dictionary level are `xr`(an xarray Dataset that contains observations and predictions), as well as one key for each metric that was specified in the config file.

In [None]:
with open(run_dir / "test" / "model_epoch050" / "test_results.p", "rb") as fp:
    results = pickle.load(fp)
    
results.keys()

The data variables in the xarray Dataset are named according to the name of the target variables, with suffix `_obs` for the observations and suffix `_sim` for the simulations.

In [None]:
example_test_basin = '06614800'

In [None]:
results[example_test_basin]['1D']['xr']

Let's plot the model predictions vs. the observations

In [None]:
# extract observations and simulations
qobs = results[example_test_basin]['1D']['xr']['QObs(mm/d)_obs']
qsim = results[example_test_basin]['1D']['xr']['QObs(mm/d)_sim']

fig, ax = plt.subplots(figsize=(8,5))
ax.plot(qobs['date'], qobs, label="streamflow observation")
ax.plot(qsim['date'], qsim, label="LSTM prediction")
plt.legend()
ax.set_ylabel("Discharge (mm/d)")
ax.set_title(f"Test period on basin {example_test_basin} - NSE {results[example_test_basin]['1D']['NSE']:.3f}")
plt.show()
plt.close()

Next, we are going to compute all metrics that are implemented in the NeuralHydrology package. You will find additional hydrological signatures implemented in `neuralhydrology.evaluation.signatures`.

In [None]:
values = metrics.calculate_all_metrics(qobs.isel(time_step=-1), qsim.isel(time_step=-1))
for key, val in values.items():
    print(f"{key}: {val:.3f}")