## Set Up

In [1]:
import torch
import os
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import openpyxl

from scipy.stats import mannwhitneyu

In [2]:
BASE_PATH = 'D:/KIMoDIs/global-groundwater-models-main'
DATA_PATH = os.path.join(BASE_PATH, 'data')
MODEL_PATH = os.path.join(BASE_PATH, 'models')
RESULT_PATH = os.path.join(BASE_PATH, 'results')
FIGURES_PATH = os.path.join(BASE_PATH, 'figures')
SHARE_PATH = 'J:/Berlin/B22-FISHy/NUTZER/Kunz.S'

MODEL_TYPE = 'full'
VERSION = '10_Epochs'
init = 'version_0'

In [3]:
# Get metrics (median agg over 10 initalisations)
metrics_agg = pq.read_table(os.path.join(RESULT_PATH, 'metrics', 'median_metrics_all_init.parquet'))
metrics_agg = metrics_agg.to_pandas()
metrics_agg['NSE'] = metrics_agg['NSE'].replace(-np.inf, np.nan)
metrics_agg = metrics_agg.dropna(subset=['NSE'])

In [None]:
metrics_interpol_agg = pq.read_table(os.path.join(RESULT_PATH, 'metrics', 'median_metrics_all_init_interpol.parquet'))
metrics_interpol_agg = metrics_interpol_agg.to_pandas()
metrics_interpol_agg['NSE'] = metrics_interpol_agg['NSE'].replace(-np.inf, np.nan)
metrics_interpol_agg = metrics_interpol_agg.dropna(subset=['NSE'])

### NSE and P-Value for N-HiTS

In [4]:
horizons = range(1,13)
p_ls = []

for HORIZON in horizons: 
    
    nse_nhits_full = np.array(metrics_agg[(metrics_agg['horizon']==HORIZON) & (metrics_agg['model_type']=="nhits_full")]['NSE'])
    nse_nhits_dyn = np.array(metrics_agg[(metrics_agg['horizon']==HORIZON) & (metrics_agg['model_type']=="nhits_dyn")]['NSE'])
    
    nse_tft_full = np.array(metrics_agg[(metrics_agg['horizon']==HORIZON) & (metrics_agg['model_type']=="tft_full")]['NSE'])
    nse_tft_dyn = np.array(metrics_agg[(metrics_agg['horizon']==HORIZON) & (metrics_agg['model_type']=="tft_dyn")]['NSE'])
    
    # Power of sig. tests depend on sample sizes. 
    # Large sample sizes enable the detection of very small differences
    U_nhits, p_nhits = mannwhitneyu(nse_nhits_full, nse_nhits_dyn, alternative='greater')
    U_tft, p_tft = mannwhitneyu(nse_tft_full, nse_tft_dyn, alternative='greater')
    
    # append as dict
    p_ls.append({'horizon':HORIZON, 'p_value_nhits':p_nhits, 'p_value_tft': p_tft})

p_df = pd.DataFrame(p_ls)

In [5]:
# Calculate median differences in NSE
median_nse = pd.DataFrame(metrics_agg.groupby(['model_type', 'horizon'])['NSE'].median()).reset_index()
median_nse_piv = median_nse.pivot(index='horizon', columns='model_type', values='NSE')
median_nse_piv['nhits_median_diff'] = median_nse_piv['nhits_full'] - median_nse_piv['nhits_dyn']
median_nse_piv['tft_median_diff'] = median_nse_piv['tft_full'] - median_nse_piv['tft_dyn']
median_nse_piv = median_nse_piv.reset_index()

In [6]:
median_nse_piv = median_nse_piv[['horizon', 'nhits_full', 'nhits_dyn', 'nhits_median_diff', 'tft_full', 'tft_dyn', 'tft_median_diff']]

In [7]:
p_df = p_df.merge(median_nse_piv, on = 'horizon')
p_df

Unnamed: 0,horizon,p_value_nhits,p_value_tft,nhits_full,nhits_dyn,nhits_median_diff,tft_full,tft_dyn,tft_median_diff
0,1,0.088886,0.00322,0.9215,0.9195,0.002,0.9105,0.906,0.0045
1,2,0.03908,0.057089,0.869,0.865,0.004,0.8525,0.849,0.0035
2,3,0.018278,0.01288,0.825,0.82,0.005,0.8,0.7955,0.0045
3,4,0.004839,0.005765,0.785,0.775,0.01,0.748,0.7405,0.0075
4,5,0.003012,0.002561,0.74,0.727,0.013,0.6945,0.6875,0.007
5,6,0.002299,0.002533,0.6965,0.6845,0.012,0.6385,0.6285,0.01
6,7,0.000666,0.003689,0.656,0.64,0.016,0.588,0.5745,0.0135
7,8,0.00052,0.020021,0.623,0.606,0.017,0.536,0.5285,0.0075
8,9,2e-05,0.023354,0.5865,0.569,0.0175,0.481,0.4775,0.0035
9,10,5.8e-05,0.035054,0.552,0.534,0.018,0.44,0.4335,0.0065


In [9]:
# Rename the columns
p_df = p_df.rename(columns = {'horizon': 'Horizon', 
                       'p_value_nhits': 'P value N-HiTS', 
                       'p_value_tft': 'P value TFT', 
                       'nhits_full': 'Median N-HiTS', 
                       'nhits_dyn': 'Median N-HiTS (purely dynamic)', 
                       'nhits_median_diff': 'N-HiTS Median difference', 
                       'tft_full': 'Median TFT', 
                       'tft_dyn': 'Median TFT (purely dynamic)', 
                       'tft_median_diff': 'TFT Median difference'})

In [14]:
p_df['P value N-HiTS'] = p_df['P value N-HiTS'].apply(lambda x: '{:.2e}'.format(x))
p_df['P value TFT'] = p_df['P value TFT'].apply(lambda x: '{:.2e}'.format(x))
p_df['Median N-HiTS (purely dynamic)'] = p_df['Median N-HiTS (purely dynamic)'].apply(lambda x: '{:.2f}'.format(x))
p_df['Median N-HiTS'] = p_df['Median N-HiTS'].apply(lambda x: '{:.2f}'.format(x))
p_df['N-HiTS Median difference'] = p_df['N-HiTS Median difference'].apply(lambda x: '{:.3f}'.format(x))
p_df['Median TFT'] = p_df['Median TFT'].apply(lambda x: '{:.2f}'.format(x))
p_df['Median TFT (purely dynamic)'] = p_df['Median TFT (purely dynamic)'].apply(lambda x: '{:.2f}'.format(x))
p_df['TFT Median difference'] = p_df['TFT Median difference'].apply(lambda x: '{:.3f}'.format(x))

In [24]:
p_df[['Horizon',
      'Median TFT', 
      'Median TFT (purely dynamic)',
      'P value TFT',
      'Median N-HiTS',
      'Median N-HiTS (purely dynamic)',
      'P value N-HiTS', ]].to_excel(os.path.join(SHARE_PATH, 'global_mod_paper', 'si', 'median_nse_pval.xlsx'), index=False)

### P-Value TFT without groundwater level

In [13]:
nse_tft_wogwl = np.array(metrics_interpol_agg[(metrics_interpol_agg['horizon']==1) &
                                              (metrics_interpol_agg['model_type']=='tft_full_interpol')]['NSE'])

nse_tft_wogwl_dyn = np.array(metrics_interpol_agg[(metrics_interpol_agg['horizon']==1) &
                                                  (metrics_interpol_agg['model_type']=='tft_dyn_interpol')]['NSE'])

U_tft_wogwl_nse, p_tft_wogwl_nse = mannwhitneyu(nse_tft_wogwl, nse_tft_wogwl_dyn, alternative='greater')
print('P-val NSE:', p_tft_wogwl_nse)

rmse_tft_wogwl = np.array(metrics_interpol_agg[(metrics_interpol_agg['horizon']==1) &
                                              (metrics_interpol_agg['model_type']=='tft_full_interpol')]['RMSE'])

rmse_tft_wogwl_dyn = np.array(metrics_interpol_agg[(metrics_interpol_agg['horizon']==1) &
                                                  (metrics_interpol_agg['model_type']=='tft_dyn_interpol')]['RMSE'])

U_tft_wogwl_rmse, p_tft_wogwl_rmse = mannwhitneyu(rmse_tft_wogwl, rmse_tft_wogwl_dyn, alternative='less')
print('P-val RMSE', p_tft_wogwl_rmse)

P-val NSE: 1.6903462053755731e-94
P-val RMSE 2.631872546119451e-21
