In [1]:
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(os.path.join(module_path, 'scripts'))

In [2]:
import pandas as pd
from hydroeval import evaluator, nse
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

import utils

In [3]:
utils.set_project_dir()

In [4]:
country_codes = ['ESP', 'USA', 'EST', 'ETH']
time_intervals = ['d', 'm']
test_size = 0.5
test_size_int = int(test_size * 100)

for time_interval in time_intervals:
    
    target = f'Q_{time_interval}+1'
    feat_set_list = [f'FS{i}_{time_interval}' for i in range(1, 4)]
    
    results_list = []

    for country_code in country_codes:
        for feat_set in feat_set_list:

            model_dir = utils.get_model_dir(country_code, target, feat_set)

            # Get indices of training samples
            train_indices = pd.read_csv(
                f'{model_dir}/{country_code}_{target}_{feat_set}_feat_train_{test_size_int}.csv', usecols=['Index']
            )['Index'].values

            # Get indices of test samples
            test_indices = pd.read_csv(
                f'{model_dir}/{country_code}_{target}_{feat_set}_feat_test_{test_size_int}.csv', usecols=['Index']
            )['Index'].values

            # Read RF results
            obs_vs_pred = pd.read_csv(f'{model_dir}/{country_code}_{target}_{feat_set}_obs_vs_pred_{test_size_int}.csv', parse_dates=['Date'])
            obs_vs_pred['Index'] = obs_vs_pred.index

            # Training set
            end_index_train = train_indices[-1]
            obs_vs_pred_train = obs_vs_pred.loc[:end_index_train, :]
            obs_train = obs_vs_pred_train[target]
            pred_train = obs_vs_pred_train[f'{target}_pred']
            nse_train = round(evaluator(nse, pred_train, obs_train)[0], 2)
            rmse_train = round(mean_squared_error(obs_train, pred_train, squared=False), 2)
            nrmse_train = round(rmse_train / (obs_train.max() - obs_train.min()) * 100, 2)

            # Test set
            start_index_test = test_indices[0]
            obs_vs_pred_test = obs_vs_pred.loc[start_index_test:, :]
            obs_test = obs_vs_pred_test[target]
            pred_test = obs_vs_pred_test[f'{target}_pred']
            nse_test = round(evaluator(nse, pred_test, obs_test)[0], 2)
            rmse_test = round(mean_squared_error(obs_test, pred_test, squared=False), 2)
            nrmse_test = round(rmse_test / (obs_train.max() - obs_train.min()) * 100, 2)

            # Append results to list
            results = {
                'catchment_name': utils.get_catchment_name(country_code),
                'country_code': country_code,
                'target': target,
                'feat_set': feat_set,
                'test_size_int': test_size_int,
                'nse_train': nse_train,
                'nse_test': nse_test,
                'rmse_train': rmse_train,
                'rmse_test': rmse_test,
                'nrmse_train': nrmse_train,
                'nrmse_test': nrmse_test
            }
            df_results = pd.DataFrame([results.values()], columns=results.keys())
            results_list.append(df_results)
    results_df = pd.concat(results_list).reset_index(drop=True)
    display(results_df)
    results_df.to_csv(fr'\\export.hpc.ut.ee\gis\flow_swat_ml_paper\ml\{target}_rf_metrics.csv', index=False)

Unnamed: 0,catchment_name,country_code,target,feat_set,test_size_int,nse_train,nse_test,rmse_train,rmse_test,nrmse_train,nrmse_test
0,Argos,ESP,Q_d+1,FS1_d,50,0.91,-0.01,0.13,0.65,1.35,6.77
1,Argos,ESP,Q_d+1,FS2_d,50,0.92,0.2,0.13,0.58,1.35,6.04
2,Argos,ESP,Q_d+1,FS3_d,50,0.9,0.24,0.14,0.57,1.46,5.94
3,Bald Eagle,USA,Q_d+1,FS1_d,50,0.91,-1.02,4.26,17.62,1.13,4.68
4,Bald Eagle,USA,Q_d+1,FS2_d,50,0.91,0.5,4.24,8.8,1.13,2.34
5,Bald Eagle,USA,Q_d+1,FS3_d,50,0.93,0.51,3.72,8.72,0.99,2.32
6,Porijõgi,EST,Q_d+1,FS1_d,50,0.99,-0.67,0.24,1.54,1.08,6.94
7,Porijõgi,EST,Q_d+1,FS2_d,50,0.99,0.82,0.21,0.5,0.95,2.25
8,Porijõgi,EST,Q_d+1,FS3_d,50,0.99,0.85,0.21,0.46,0.95,2.07
9,Rib,ETH,Q_d+1,FS1_d,50,0.99,0.81,3.14,10.74,2.28,7.81


Unnamed: 0,catchment_name,country_code,target,feat_set,test_size_int,nse_train,nse_test,rmse_train,rmse_test,nrmse_train,nrmse_test
0,Argos,ESP,Q_m+1,FS1_m,50,0.92,0.27,0.09,0.28,5.62,17.5
1,Argos,ESP,Q_m+1,FS2_m,50,0.92,0.34,0.08,0.27,5.0,16.88
2,Argos,ESP,Q_m+1,FS3_m,50,0.93,0.38,0.08,0.26,5.0,16.25
3,Bald Eagle,USA,Q_m+1,FS1_m,50,0.9,0.29,2.46,6.53,6.37,16.92
4,Bald Eagle,USA,Q_m+1,FS2_m,50,0.91,0.18,2.42,7.06,6.27,18.29
5,Bald Eagle,USA,Q_m+1,FS3_m,50,0.88,0.31,2.66,6.46,6.89,16.74
6,Porijõgi,EST,Q_m+1,FS1_m,50,0.9,0.13,0.49,0.82,5.63,9.43
7,Porijõgi,EST,Q_m+1,FS2_m,50,0.91,0.01,0.49,0.88,5.63,10.11
8,Porijõgi,EST,Q_m+1,FS3_m,50,0.9,0.05,0.5,0.86,5.75,9.89
9,Rib,ETH,Q_m+1,FS1_m,50,0.96,0.87,4.3,7.74,5.13,9.24
