In [1]:
import pandas as pd
import re
from post_processing import error_estimation, merge_estimates_true

# Utils

In [2]:
pretty_methods = {'Holdout': 'Holdout', 
                'Repeated_Holdout': 'Repeated Holdout', 
                'RepeatedHoldout': 'Repeated Holdout', 
                'Growing_Window': 'Growing Window',
                'GrowingWindow': 'Growing Window', 
                'Rolling_Window': 'Rolling Window',
                'RollingWindow': 'Rolling Window',
                'Block_CV': 'Block CV',
                'BlockCV': 'Block CV', 
                'AdaptedhvBlockCV': 'Adapted hv-Block CV', 
                'MarkovCV': 'Markov CV',
                'BlockCV_with_weights_paper': 'Weighted Block CV var.1', 
                'BlockCV_with_weights': 'Weighted Block CV var.2',
                'GrowingWindow_with_weights': 'Weighted Growing Window', 
                'RollingWindow_with_weights': 'Weighted Rolling Window',
                'Block_CV_with_weights_paper': 'Weighted Block CV var.1', 
                'Block_CV_with_weights': 'Weighted Block CV var.2',
                'Growing_Window_with_weights': 'Weighted Growing Window', 
                'Rolling_Window_with_weights': 'Weighted Rolling Window'}

In [3]:
def clean_filename(filepath, st_split = '/'):
    filename = filepath.split(st_split)[-1]
    
    filename = filename.rsplit('.', 1)[0]
    
    # Check if the filename ends with a date in the format DDMMYYYY or similar
    # and remove the date if it exists
    filename = re.sub(r'_\d{8}$', '', filename)
    
    return filename

def clean_method(method_str):
    parts = method_str.split('.')
    if len(parts) > 3:
        method_name = parts[3].split()[0]
    else:
        method_name = parts[-1].split()[0]
    return method_name

def prettify(df, pretty_methods: dict = pretty_methods):
    copy = df.copy()
    copy['method'] = copy['method'].map(pretty_methods)
    return copy

# Upload

In [4]:
real_data_test = pd.read_csv('results/real_data_test.csv')
real_data_val = pd.read_csv('results/real_data_val.csv')
syn_data_test = pd.read_csv('results/syn_data_test.csv')
syn_data_val = pd.read_csv('results/syn_data_val.csv')
stats = pd.read_csv('results/time_series_stats.csv')

# Friedman Test

In [5]:
data_val, data_test = real_data_val, real_data_test

In [6]:
est = error_estimation(syn_data_val)
estimates = merge_estimates_true(est, syn_data_test)

In [7]:
estimates['PAE'] = estimates['mse_estimate'] - estimates['mse_true']
for_friedman = estimates[estimates['model'] == 'ARMA'][['Field', 'series', 'method', 'PAE']].copy()
for_friedman

Unnamed: 0,Field,series,method,PAE
0,s1,s1_0,Adapted hv-Block CV,-8.922074e-01
3,s1,s1_0,Block CV,-8.806334e-01
6,s1,s1_0,Growing Window,-9.177595e-01
9,s1,s1_0,Holdout,-9.789598e-01
12,s1,s1_0,Markov CV,-3.865877e-01
...,...,...,...,...
98985,s3,s3_999,Rolling Window,3.648251e+06
98988,s3,s3_999,Weighted Block CV var.1,-2.349323e+06
98991,s3,s3_999,Weighted Block CV var.2,-2.770557e+06
98994,s3,s3_999,Weighted Growing Window,-1.387184e+06


In [26]:
import pandas as pd
import numpy as np
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp

def apply_friedman_test(data):
    pivot_table = data.pivot(index='series', columns='method', values='PAE')

    # Perform the Friedman test
    friedman_stat, friedman_p = friedmanchisquare(*[pivot_table[method] for method in pivot_table.columns])

    return friedman_stat, friedman_p

def apply_nemenyi(data):
    pivot_table = data.pivot(index='series', columns='method', values='PAE')

    # Perform the Nemenyi test
    nemenyi_results = sp.posthoc_nemenyi_friedman(pivot_table.to_numpy())
    return nemenyi_results

def process_by_field(df):
    results = {}
    for field in df['Field'].unique():
        field_data = df[df['Field'] == field]
        friedman_stat, friedman_p = apply_friedman_test(field_data)

        if friedman_p < 0.05:
            nemenyi_results = apply_nemenyi(field_data)
            results[field] = {
                'friedman_stat': friedman_stat,
                'friedman_p': friedman_p,
                'nemenyi_results': nemenyi_results
            }
        else:
            results[field] = {
                'friedman_stat': friedman_stat,
                'friedman_p': friedman_p,
                'nemenyi_results': None
            }
    return results


In [27]:
results = process_by_field(for_friedman)

In [28]:
results

{'s1': {'friedman_stat': 1806.3441818181818,
  'friedman_p': 0.0,
  'nemenyi_results':            0         1         2         3      4         5      6         7  \
  0   1.000000  0.003465  0.405315  0.001000  0.001  0.001000  0.001  0.014851   
  1   0.003465  1.000000  0.861752  0.681431  0.001  0.798848  0.001  0.900000   
  2   0.405315  0.861752  1.000000  0.017532  0.001  0.033067  0.001  0.900000   
  3   0.001000  0.681431  0.017532  1.000000  0.001  0.900000  0.001  0.438120   
  4   0.001000  0.001000  0.001000  0.001000  1.000  0.001000  0.001  0.001000   
  5   0.001000  0.798848  0.033067  0.900000  0.001  1.000000  0.001  0.559821   
  6   0.001000  0.001000  0.001000  0.001000  0.001  0.001000  1.000  0.001000   
  7   0.014851  0.900000  0.900000  0.438120  0.001  0.559821  0.001  1.000000   
  8   0.008438  0.900000  0.900000  0.538854  0.001  0.656271  0.001  0.900000   
  9   0.001000  0.900000  0.299887  0.900000  0.001  0.900000  0.001  0.900000   
  10  0.00100

In [29]:
(results['s1'])

{'friedman_stat': 1806.3441818181818,
 'friedman_p': 0.0,
 'nemenyi_results':            0         1         2         3      4         5      6         7  \
 0   1.000000  0.003465  0.405315  0.001000  0.001  0.001000  0.001  0.014851   
 1   0.003465  1.000000  0.861752  0.681431  0.001  0.798848  0.001  0.900000   
 2   0.405315  0.861752  1.000000  0.017532  0.001  0.033067  0.001  0.900000   
 3   0.001000  0.681431  0.017532  1.000000  0.001  0.900000  0.001  0.438120   
 4   0.001000  0.001000  0.001000  0.001000  1.000  0.001000  0.001  0.001000   
 5   0.001000  0.798848  0.033067  0.900000  0.001  1.000000  0.001  0.559821   
 6   0.001000  0.001000  0.001000  0.001000  0.001  0.001000  1.000  0.001000   
 7   0.014851  0.900000  0.900000  0.438120  0.001  0.559821  0.001  1.000000   
 8   0.008438  0.900000  0.900000  0.538854  0.001  0.656271  0.001  0.900000   
 9   0.001000  0.900000  0.299887  0.900000  0.001  0.900000  0.001  0.900000   
 10  0.001000  0.001000  0.00100