In [56]:
"""Ranks candidates by the Schulze method.
For more information read http://en.wikipedia.org/wiki/Schulze_method.
"""

__author__ = "Michael G. Parker"
__contact__ = "http://omgitsmgp.com/"


from collections import defaultdict


def _add_remaining_ranks(d, candidate_name, remaining_ranks, weight):
    for remaining_rank in remaining_ranks:
        for other_candidate_name in remaining_rank:
            d[candidate_name, other_candidate_name] += weight


def _add_ranks_to_d(d, ranks, weight):
    for i, rank in enumerate(ranks):
        remaining_ranks = ranks[i+1:]
        for candidate_name in rank:
            _add_remaining_ranks(d, candidate_name, remaining_ranks, weight)


def _compute_d(weighted_ranks):
    """Computes the d array in the Schulze method.
    d[V,W] is the number of voters who prefer candidate V over W.
    """
    d = defaultdict(int)
    for ranks, weight in weighted_ranks:
        _add_ranks_to_d(d, ranks, weight)
    return d


def _compute_p(d, candidate_names):
    """Computes the p array in the Schulze method.
    p[V,W] is the strength of the strongest path from candidate V to W.
    """
    p = {}
    for candidate_name1 in candidate_names:
        for candidate_name2 in candidate_names:
            if candidate_name1 != candidate_name2:
                strength = d.get((candidate_name1, candidate_name2), 0)
                if strength > d.get((candidate_name2, candidate_name1), 0):
                    p[candidate_name1, candidate_name2] = strength

    for candidate_name1 in candidate_names:
        for candidate_name2 in candidate_names:
            if candidate_name1 != candidate_name2:
                for candidate_name3 in candidate_names:
                    if (candidate_name1 != candidate_name3) and (candidate_name2 != candidate_name3):
                        curr_value = p.get((candidate_name2, candidate_name3), 0)
                        new_value = min(
                                p.get((candidate_name2, candidate_name1), 0),
                                p.get((candidate_name1, candidate_name3), 0))
                        if new_value > curr_value:
                            p[candidate_name2, candidate_name3] = new_value

    return p


def _rank_p(candidate_names, p):
    """Ranks the candidates by p."""
    candidate_wins = defaultdict(list)

    for candidate_name1 in candidate_names:
        num_wins = 0

        # Compute the number of wins this candidate has over all other candidates.
        for candidate_name2 in candidate_names:
            if candidate_name1 == candidate_name2:
                continue
            candidate1_score = p.get((candidate_name1, candidate_name2), 0)
            candidate2_score = p.get((candidate_name2, candidate_name1), 0)
            if candidate1_score > candidate2_score:
                num_wins += 1

        candidate_wins[num_wins].append(candidate_name1)

    sorted_wins = sorted(candidate_wins.keys(), reverse=True)
    return [candidate_wins[num_wins] for num_wins in sorted_wins]


def compute_ranks(candidate_names, weighted_ranks):
    """Returns the candidates ranked by the Schulze method.
    See http://en.wikipedia.org/wiki/Schulze_method for details.
    Parameter candidate_names is a sequence containing all the candidate names.
    Parameter weighted_ranks is a sequence of (ranks, weight) pairs.
    The first element, ranks, is a ranking of the candidates. It is an array of arrays so that we
    can express ties. For example, [[a, b], [c], [d, e]] represents a = b > c > d = e.
    The second element, weight, is typically the number of voters that chose this ranking.
    """
    d = _compute_d(weighted_ranks)
    p = _compute_p(d, candidate_names)
    return _rank_p(candidate_names, p)

In [89]:
import numpy as np
candidate_names = range(1,12)
# ranks = [candidate_names]
ranks = [
[7,	6,	10,	8,	1,	4,	11,	9,	5,	3,	2],
[11,9,	7,	2,	10,	8,	6,	5,	3,	1,	4],
[6,	8,	10,	4,	7,	11,	2,	9,	3,	5,	1],
[6,	8,	10,	7,	4,	11,	9,	1,	2,	3,	5],
[6,	8,	10,	7,	11,	4,	9,	2,	3,	1,	5],
[6,	7,	10,	11,	8,	4,	9,	1,	3,	2,	5]
]
weighted_ranks = [([[r] for r in rank], 1) for rank in ranks]
compute_ranks(candidate_names, weighted_ranks)

[[6], [7, 8, 10], [4, 11], [9], [2], [1, 3], [5]]

In [75]:
weighted_ranks

[([4, 2, 3, 6, 1, 11, 10, 7, 5, 9, 8], 1)]

In [20]:
from scipy.stats import spearmanr
from utils.data import *
from utils.analysis import evaluate_prediction_owa
import seaborn as sns
import numpy as np
import pandas as pd
sns.set_theme(style="whitegrid")
import matplotlib.pyplot as plt
from copy import deepcopy
max_owa_clip = 3.5

In [116]:
_y_test_df.head(10)

Unnamed: 0.1,Unnamed: 0,unique_id,ds,y,mdl_ARIMA,mdl_Comb,mdl_Damped,mdl_ESRNN,mdl_naive2,mdl_Theta,...,mf_diff1_acf1,mf_diff1_acf10,mf_diff2_acf1,mf_diff2_acf10,mf_seas_acf1,OWA_mdl_ARIMA,OWA_mdl_Comb,OWA_mdl_Damped,OWA_mdl_ESRNN,OWA_mdl_Theta
0,0,Q1,1976-06-30,6790.643698,6793.580724,6840.473668,6832.048777,6910.539,6818.166736,6749.707636,...,-0.514035,0.948458,-0.789694,1.794706,0.561899,0.299359,0.283988,0.252424,0.220371,0.449563
1,1,Q1,1976-09-30,6598.952202,6805.382298,6888.341814,6880.86113,6777.863,6818.166736,6725.755618,...,-0.514035,0.948458,-0.789694,1.794706,0.561899,0.299359,0.283988,0.252424,0.220371,0.449563
2,2,Q1,1976-12-31,6608.527173,6799.717404,6932.955804,6919.911015,6900.975,6818.166736,6701.8036,...,-0.514035,0.948458,-0.789694,1.794706,0.561899,0.299359,0.283988,0.252424,0.220371,0.449563
3,3,Q1,1977-03-31,6765.914014,6802.43662,6974.966469,6951.150924,6894.792,6818.166736,6677.851582,...,-0.514035,0.948458,-0.789694,1.794706,0.561899,0.299359,0.283988,0.252424,0.220371,0.449563
4,4,Q1,1977-06-30,6992.949003,6801.131364,7014.894474,6976.142853,7011.891,6818.166736,6653.899564,...,-0.514035,0.948458,-0.789694,1.794706,0.561899,0.299359,0.283988,0.252424,0.220371,0.449563
5,5,Q1,1977-09-30,6931.539,6801.757902,7053.15635,6996.136398,6883.856,6818.166736,6629.947546,...,-0.514035,0.948458,-0.789694,1.794706,0.561899,0.299359,0.283988,0.252424,0.220371,0.449563
6,6,Q1,1977-12-31,7161.240869,6801.457157,7090.085324,7012.131234,6995.383,6818.166736,6605.995527,...,-0.514035,0.948458,-0.789694,1.794706,0.561899,0.299359,0.283988,0.252424,0.220371,0.449563
7,7,Q1,1978-03-31,6981.304105,6801.601518,7125.947976,7024.927104,6979.321,6818.166736,6582.043509,...,-0.514035,0.948458,-0.789694,1.794706,0.561899,0.299359,0.283988,0.252424,0.220371,0.449563
8,8,Q2,1976-06-30,7081.709669,7072.776923,7064.662584,7058.55866,7111.784,7072.776923,7009.144658,...,-0.291751,0.242721,-0.666926,0.709541,0.537885,0.237362,0.223323,0.237582,0.216353,0.389824
9,9,Q2,1976-09-30,6789.414723,7072.776923,7107.251443,7109.479262,7071.705,7072.776923,6985.494405,...,-0.291751,0.242721,-0.666926,0.709541,0.537885,0.237362,0.223323,0.237582,0.216353,0.389824


In [123]:
seasonalities = ['Hourly','Daily','Weekly'] # ["Monthly","Yearly","Quarterly"]
seasonality = seasonalities[0]

X_train_df, y_train_df, X_test_df, y_test_df = m4_parser(seasonality, 'data', 'forecasts',
                                                                load_existing_dataframes=True)

for seasonality in  seasonalities[1:]:
    _X_train_df, _y_train_df, _X_test_df, _y_test_df = m4_parser(seasonality, 'data', 'forecasts',
                                                                load_existing_dataframes=True)
    X_train_df = pd.concat((X_train_df,_X_train_df))
    y_train_df = pd.concat((y_train_df,_y_train_df))
    X_test_df = pd.concat((X_test_df,_X_test_df))
    y_test_df = pd.concat((y_test_df,_y_test_df))

base_errors = y_test_df.copy()
base_errors = base_errors.drop_duplicates('unique_id').set_index('unique_id')
base_errors = base_errors.filter(regex='^OWA_', axis=1)
base_errors.columns = base_errors.columns.str.lstrip('OWA_')
base_errors.columns = base_errors.columns.str.lstrip('mdl_')
plot_base_errors = deepcopy(base_errors)

plot_base_errors.rename(columns={'ESRNN': 'ES-RNN'},
                        inplace=True)

for mdl, avg_owa in zip(base_errors.columns, base_errors.mean()):
    str_result = mdl + " average : " + str(avg_owa)
    print(str_result)

ensemble_owas_ = pd.DataFrame()
for seasonality in  seasonalities:
    ensemble_owas = None
    for file in glob.glob(f"results/*{seasonality[0]}.npy"):
        model_name = file.rsplit(f"_{seasonality[0]}.npy")[0].split("/")[1]    
        try:
            owas = np.load(file)
        except ValueError:
            owas = None
            print(file)
        if owas is not None:
            if ensemble_owas is None:
                ensemble_owas = pd.DataFrame(owas,columns=[model_name])
            else:
                ensemble_owas[model_name] = owas            
        else:
            print(file)    
    ensemble_owas_ = pd.concat((ensemble_owas_, ensemble_owas))
        

ensemble_owas_.rename(columns={'Model Averaging': 'AVG',
                               'Neural Averaging 2': 'FFORMA-N',
                               'Neural Stacking': 'NN-STACK',
                               'OLD_FFORMS':'FFORMS-R',
                               'FFORMS':'FFORMS-G',
                                },
                        inplace=True)
plot_ensemble_errors = deepcopy(ensemble_owas_)
plot_ensemble_errors = plot_ensemble_errors.drop(columns=["Neural Averaging", 
                                                          "Neural Stacking 2",
                                                          "nbeats"],errors='ignore')
plot_ensemble_errors.describe()

ARIMA average : 0.9996073751920922
Comb average : 1.0258061705641983
Damped average : 1.0063590552061679
ESRNN average : 0.9827885788814025
Theta average : 0.9964581368297244


Unnamed: 0,FFORMA,FFORMS-G,AVG,FFORMA-N,NN-STACK,FFORMS-R
count,25000.0,25000.0,25000.0,25000.0,25000.0,25000.0
mean,0.917094,0.920263,0.964534,0.91474,0.960158,0.917495
std,1.22642,1.250609,1.239415,1.22599,1.680363,1.220629
min,0.013589,0.016506,0.025388,0.015617,0.027531,0.037898
25%,0.404952,0.405402,0.381146,0.401684,0.392074,0.398797
50%,0.671758,0.673569,0.663707,0.667684,0.647007,0.666144
75%,1.093473,1.091259,1.101415,1.090782,1.079395,1.082034
max,25.903416,28.923804,24.710538,26.025149,79.151492,25.061488


(1.033092037228542, 0.987487969201155, 1.000995024875622, 1.068478260869565)

In [68]:
['Hourly','Daily','Weekly',"Monthly","Yearly","Quarterly"]
w2 = np.array((2070,21135,1795,240000,115000,120000))/np.array((2070,21135,1795,240000,115000,120000)).sum()
w2

array([0.00414, 0.04227, 0.00359, 0.48   , 0.23   , 0.24   ])

In [122]:
(0.415+0.983+0.725)/3

0.7076666666666666

In [121]:
(np.array((0.23,0.24,0.48))/np.array((0.23,0.24,0.48)).sum()*(0.758,0.800,0.819)).sum()

0.7994315789473685