### Imports

In [1]:
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px

import plotly.graph_objects as go
from plotly.colors import n_colors

### Constants

In [2]:
M5_RES = "results/evaluation/"
FOZZY_RES = "results/fozzy-evaluation/"
FAV_RES = "results/favorita-evaluation/"

### Functions

In [3]:
def read_data_to_single_df(folder: str) -> pd.DataFrame:
    result_df = None
    key_columns = ["unique_id", "cutoff", "metric"]
    
    for path in os.listdir(folder):
        print(f"Reading {path}...")

        data = pd.read_csv(os.path.join(folder, path))

        if result_df is None:
            result_df = data
        else:
            result_df = pd.merge(result_df, data, on=key_columns, how="inner")
    return result_df

In [4]:
def prepare_data(df: pd.DataFrame, metric: str) -> pd.DataFrame:
    # remove unused columns
    df = df[np.setdiff1d(df.columns , ['unique_id', 'cutoff'])]
    
    # select metric for analysis
    data = df.loc[(df["metric"] == metric)]
    data = data.drop(columns=["metric"])
    data = data.melt(var_name='model', value_name='value')
    return data

In [6]:
def build_ridgeline_plot(df: pd.DataFrame, metric: str):
    # prepare data
    data = prepare_data(df, metric)
    
    # filter outliers
    data = data.loc[data["value"] < data["value"].quantile(0.9)]
    
    # sort models
    # Calculate the standard deviation for each group
    group_std = data.groupby('model')['value'].mean()

    # Sort the groups based on their standard deviation values
    sorted_groups = group_std.sort_values().index.tolist()
    
    # plot
    colors = n_colors(
        'rgb(5, 200, 200)', 
        'rgb(200, 10, 10)', 
        data["model"].nunique(), 
        colortype='rgb'
    )

    fig = go.Figure()
    for model, color in zip(sorted_groups, colors):
        x_data = data["value"].loc[data["model"] == model].to_numpy()
        fig.add_trace(go.Violin(x=x_data, line_color=color, name=model))

    fig.update_traces(orientation='h', side='positive', width=3, points=False)
    fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
    fig.update_layout(
        width=1000, 
        height=800,
        title=f"Violin Plot of {metric.upper()}"
    )
    fig.show()

### Read data

In [7]:
df = read_data_to_single_df(folder=FOZZY_RES)
df.head()

Reading SimpleExponentialSmoothing.csv...
Reading SeasonalWindowAverage.csv...
Reading WindowAverage.csv...
Reading RandomWalkWithDrift.csv...
Reading ARCH.csv...
Reading HistoricAverage.csv...
Reading ARIMA.csv...
Reading SeasonalNaive.csv...
Reading AutoRegressive.csv...
Reading Theta.csv...
Reading Holt.csv...
Reading IMAPA.csv...
Reading CrostonClassic.csv...
Reading TSB.csv...
Reading Naive.csv...
Reading MSTL.csv...
Reading ADIDA.csv...
Reading CrostonSBA.csv...


Unnamed: 0,unique_id,cutoff,metric,SES,SeasWA,WindowAverage,RWD,ARCH(1),HistoricAverage,ARIMA,...,AutoRegressive,Theta,Holt,IMAPA,CrostonClassic,TSB,Naive,MSTL,ADIDA,CrostonSBA
0,1241-32485,2021-07-05,mae,1.867407e-31,0.001531,0.0,0.011538,0.031223,0.001531,0.001531,...,0.001531,0.0001750516,2.679493e-23,1.867407e-31,0.3,3.7989949999999996e-20,0.0,0.0008765727,1.867407e-31,0.285
1,1241-32485,2021-07-05,mape,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1241-32485,2021-07-05,mse,0.0,3.3e-05,0.0,0.000172,0.001394,2e-06,2e-06,...,2e-06,3.94717e-08,1e-45,0.0,0.09,1.4432370000000001e-39,0.0,7.683936e-07,0.0,0.081225
3,1241-32485,2021-07-05,rmse,0.0,0.005727,0.0,0.0131,0.03734,0.001531,0.001531,...,0.001531,0.0001986748,3.7433920000000004e-23,0.0,0.3,3.7989949999999996e-20,0.0,0.0008765806,0.0,0.285
4,1241-32485,2021-07-05,smape,200.0,14.285715,0.0,200.0,200.0,200.0,200.0,...,200.0,200.0,200.0,200.0,200.0,200.0,0.0,200.0,200.0,200.0


### Analytics

In [8]:
df.head()

Unnamed: 0,unique_id,cutoff,metric,SES,SeasWA,WindowAverage,RWD,ARCH(1),HistoricAverage,ARIMA,...,AutoRegressive,Theta,Holt,IMAPA,CrostonClassic,TSB,Naive,MSTL,ADIDA,CrostonSBA
0,1241-32485,2021-07-05,mae,1.867407e-31,0.001531,0.0,0.011538,0.031223,0.001531,0.001531,...,0.001531,0.0001750516,2.679493e-23,1.867407e-31,0.3,3.7989949999999996e-20,0.0,0.0008765727,1.867407e-31,0.285
1,1241-32485,2021-07-05,mape,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1241-32485,2021-07-05,mse,0.0,3.3e-05,0.0,0.000172,0.001394,2e-06,2e-06,...,2e-06,3.94717e-08,1e-45,0.0,0.09,1.4432370000000001e-39,0.0,7.683936e-07,0.0,0.081225
3,1241-32485,2021-07-05,rmse,0.0,0.005727,0.0,0.0131,0.03734,0.001531,0.001531,...,0.001531,0.0001986748,3.7433920000000004e-23,0.0,0.3,3.7989949999999996e-20,0.0,0.0008765806,0.0,0.285
4,1241-32485,2021-07-05,smape,200.0,14.285715,0.0,200.0,200.0,200.0,200.0,...,200.0,200.0,200.0,200.0,200.0,200.0,0.0,200.0,200.0,200.0


In [9]:
for metric in df.metric.unique():
    res = prepare_data(df, metric)
    
    print(f"Analytics of metric: {metric.upper()}")
    print(res["value"].describe().round(2))
    
    # todo: add stat per model
    
    print("\n")

Analytics of metric: MAE
count    1.628801e+06
mean     2.789006e+05
std      3.548232e+08
min      0.000000e+00
25%      0.000000e+00
50%      6.000000e-02
75%      3.600000e-01
max      4.528393e+11
Name: value, dtype: float64


Analytics of metric: MAPE
count    1656504.00
mean           8.65
std         3020.11
min            0.00
25%            0.00
50%            0.00
75%            6.54
max      3263154.00
Name: value, dtype: float64


Analytics of metric: MSE
count    1.628801e+06
mean     1.286807e+18
std      1.642274e+21
min      0.000000e+00
25%      0.000000e+00
50%      1.000000e-02
75%      3.300000e-01
max      2.095944e+24
Name: value, dtype: float64


Analytics of metric: RMSE
count    1.628801e+06
mean     8.906913e+05
std      1.134375e+09
min      0.000000e+00
25%      0.000000e+00
50%      8.000000e-02
75%      5.800000e-01
max      1.447738e+12
Name: value, dtype: float64


Analytics of metric: SMAPE
count    1656504.00
mean         157.75
std           75.57
min

### Visualizations

In [10]:
df["metric"].value_counts()

mae      92028
mape     92028
mse      92028
rmse     92028
smape    92028
Name: metric, dtype: int64

In [11]:
# build_ridgeline_plot(df, "mae")

### Experimenting

In [14]:
M5_METRICS = "results/metrics/"
FOZZY_METRICS = "results/fozzy-metrics/"
FAV_METRICS = "results/favorita-metrics/"

In [25]:
path = M5_METRICS
metric_names = ["mae", "mape", "mse", "rmse", "smape"]
data = pd.DataFrame()

for file in os.listdir(path):
    
    df = pd.read_csv(os.path.join(path, file))
    df["metrics"] = metric_names
    
    if data.empty:
        data = df
    else:
        data = data.merge(df, how='inner', on='metrics')

data = pd.melt(data, id_vars=['metrics'], var_name='model_name', value_name='metrics_value')
data

Unnamed: 0,metrics,model_name,metrics_value
0,mae,SES,1.087095
1,mape,SES,29.250298
2,mse,SES,4.988080
3,rmse,SES,1.374435
4,smape,SES,142.377790
...,...,...,...
90,mae,SeasWA,1.072436
91,mape,SeasWA,27.349192
92,mse,SeasWA,5.015110
93,rmse,SeasWA,1.376366


# Summary Statistics

In [57]:
M5_RES = "results/evaluation/"
FOZZY_RES = "results/fozzy-evaluation/"
FAV_RES = "results/favorita-evaluation/"

In [58]:
df = read_data_to_single_df(folder=M5_RES)
df.head()

Reading SimpleExponentialSmoothing-Holt.csv...
Reading ADIDA-CrostonClassic-CrostonSBA-IMAPA-TSB.csv...
Reading MSTL-Theta-ARCH.csv...
Reading AutoARIMA.csv...
Reading ARIMA-AutoRegressive.csv...
Reading HistoricAverage-Naive-RandomWalkWithDrift-SeasonalNaive-WindowAverage-SeasonalWindowAverage.csv...


Unnamed: 0,unique_id,cutoff,metric,SES,Holt,ADIDA,CrostonClassic,CrostonSBA,IMAPA,TSB,...,ARCH(1),AutoARIMA,ARIMA,AutoRegressive,HistoricAverage,Naive,RWD,SeasonalNaive,WindowAverage,SeasWA
0,FOODS_1_001_CA_1,2016-04-10,mse,1.193014,1.145749,1.152618,1.152664,1.1448,1.169181,1.203514,...,6.001423,1.153829,1.189459,1.428748,1.189471,2.142857,2.134372,2.214286,1.270408,1.392128
1,FOODS_1_001_CA_1,2016-04-10,mae,0.842261,0.744192,0.770742,0.770873,0.739472,0.806998,0.85502,...,1.942738,0.774141,0.775964,0.999062,0.775972,1.285714,1.282892,0.928571,0.918367,0.887755
2,FOODS_1_001_CA_1,2016-04-10,rmse,1.092252,1.070396,1.0736,1.073622,1.069953,1.081287,1.097048,...,2.44978,1.074164,1.090623,1.195302,1.090629,1.46385,1.460949,1.488048,1.127124,1.179885
3,FOODS_1_001_CA_1,2016-04-10,mape,20.09854,14.279613,15.852062,15.859879,13.995455,18.004808,20.856115,...,104.52639,16.053923,23.679249,27.65133,23.680695,46.42857,46.25734,35.714287,24.617348,31.632656
4,FOODS_1_001_CA_1,2016-04-10,smape,94.529144,90.83461,91.90121,91.9064,90.62074,93.282524,94.958435,...,177.68495,92.035286,103.88027,99.00221,103.882324,104.76191,104.70302,92.85714,96.93432,116.06454


In [64]:
df["metric"].unique()

array(['mse', 'mae', 'rmse', 'mape', 'smape'], dtype=object)

In [63]:
df.loc[df["metric"] == "mse"].describe()

Unnamed: 0,SES,Holt,ADIDA,CrostonClassic,CrostonSBA,IMAPA,TSB,MSTL,Theta,ARCH(1),AutoARIMA,ARIMA,AutoRegressive,HistoricAverage,Naive,RWD,SeasonalNaive,WindowAverage,SeasWA
count,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91276.0
mean,4.98808,4.725869,4.619286,4.779842,4.728084,4.625645,4.629081,5.702272,4.738752,110.7278,4.514935,6.036349,4.308738,6.036355,8.400985,8.446992,7.541581,4.536812,5.015109
std,33.984351,34.423007,31.979067,31.055682,30.928797,32.040294,30.343928,39.06517,34.3911,11532.86,29.345472,42.672787,25.79217,42.672733,58.695431,59.024496,47.099872,29.441786,35.393271
min,0.0,0.0,0.0,8.4e-05,7.6e-05,0.0,0.0,0.0,5.205866e-14,0.02125937,0.0,0.000109,1.819407e-07,0.000109,0.0,2e-05,0.0,0.0,0.0
25%,0.2971,0.253336,0.252624,0.275085,0.270834,0.254608,0.278553,0.381227,0.2653536,1.130024,0.262108,0.289559,0.2781746,0.289563,0.428571,0.431365,0.5,0.265306,0.297012
50%,0.85881,0.766238,0.7649,0.804206,0.798968,0.772213,0.809251,1.05894,0.7832222,3.283733,0.785714,0.822812,0.7927491,0.822808,1.142857,1.150394,1.5,0.785714,0.830904
75%,2.507803,2.231375,2.224282,2.318875,2.306537,2.234782,2.318184,2.959328,2.252761,9.982304,2.2638,2.484151,2.24456,2.484151,3.857143,3.87535,4.071429,2.25,2.42684
max,3167.0725,2765.889,2136.19,2562.978,2599.7092,2019.6334,1847.338,4804.0503,2730.083,3325161.0,2365.5493,2659.8025,1775.579,2659.8025,4564.5713,4601.3853,3733.2856,2047.6787,3229.355


In [65]:
df.loc[df["metric"] == "mae"].describe()

Unnamed: 0,SES,Holt,ADIDA,CrostonClassic,CrostonSBA,IMAPA,TSB,MSTL,Theta,ARCH(1),AutoARIMA,ARIMA,AutoRegressive,HistoricAverage,Naive,RWD,SeasonalNaive,WindowAverage,SeasWA
count,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91276.0
mean,1.087095,1.038411,1.033333,1.077581,1.062102,1.035718,1.049493,1.156551,1.043163,2.517262,1.018826,1.150873,1.035746,1.150872,1.345297,1.353526,1.250565,1.031242,1.072436
std,1.441369,1.403145,1.371717,1.40404,1.379039,1.373153,1.360295,1.478564,1.404112,7.534472,1.325902,1.661361,1.259097,1.661362,2.069091,2.074269,1.658929,1.327146,1.406353
min,0.0,0.0,0.0,0.009163,0.008705,0.0,0.0,3.408346e-31,2.250606e-07,0.121541,0.0,0.01046,0.000414,0.01046,0.0,0.003894,0.0,0.0,0.0
25%,0.419891,0.410932,0.410019,0.434998,0.426245,0.412216,0.423444,0.4425449,0.4157214,0.846358,0.386267,0.444367,0.427114,0.444367,0.285714,0.298516,0.428571,0.418367,0.413265
50%,0.73539,0.703594,0.702412,0.714286,0.714286,0.7054,0.714286,0.7861608,0.7121178,1.460121,0.696796,0.723075,0.715019,0.723075,0.785714,0.817161,0.857143,0.714286,0.714286
75%,1.264526,1.176379,1.175031,1.214286,1.198818,1.179644,1.214286,1.335934,1.187266,2.567393,1.177836,1.237769,1.184176,1.237769,1.571428,1.582021,1.5,1.183674,1.214286
max,53.09783,48.79318,40.586636,42.97513,43.10851,41.625942,40.135426,61.01122,48.39952,944.9587,35.222485,48.507362,34.793026,48.507362,64.14286,64.332344,51.857143,38.867344,46.882652


In [68]:
df.loc[df["metric"] == "rmse"].describe()

Unnamed: 0,SES,Holt,ADIDA,CrostonClassic,CrostonSBA,IMAPA,TSB,MSTL,Theta,ARCH(1),AutoARIMA,ARIMA,AutoRegressive,HistoricAverage,Naive,RWD,SeasonalNaive,WindowAverage,SeasWA
count,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91276.0
mean,1.374435,1.311249,1.306135,1.346552,1.339174,1.308334,1.327559,1.506866,1.31744,3.073506,1.310085,1.435069,1.312324,1.43507,1.720832,1.726687,1.749777,1.312924,1.376366
std,1.76041,1.733934,1.706848,1.722403,1.713105,1.707026,1.693133,1.852475,1.732956,10.063919,1.672914,1.994234,1.608283,1.994235,2.332334,2.337863,2.11658,1.677222,1.766568
min,0.0,0.0,0.0,0.009163,0.008705,0.0,0.0,0.0,2.281637e-07,0.145806,0.0,0.01046,0.000427,0.01046,0.0,0.004421,0.0,0.0,0.0
25%,0.545069,0.503325,0.502617,0.524486,0.520418,0.504587,0.527781,0.617436,0.5151248,1.063026,0.511965,0.538107,0.527423,0.53811,0.654654,0.656784,0.707107,0.515079,0.544988
50%,0.92672,0.87535,0.874586,0.896776,0.89385,0.878756,0.899584,1.029048,0.8849984,1.812107,0.886405,0.90709,0.890365,0.907088,1.069045,1.072564,1.224745,0.886405,0.911539
75%,1.583604,1.493779,1.491403,1.522785,1.518729,1.494919,1.522558,1.72027,1.50092,3.159478,1.504593,1.576119,1.498186,1.576119,1.963961,1.968591,2.017778,1.5,1.557832
max,56.27675,52.59172,46.218933,50.625862,50.987343,44.94033,42.98067,69.31126,52.25019,1823.5024,48.636913,51.573273,42.137623,51.573273,67.56161,67.83351,61.10062,45.251286,56.827415


In [67]:
df.loc[df["metric"] == "mape"].describe()

Unnamed: 0,SES,Holt,ADIDA,CrostonClassic,CrostonSBA,IMAPA,TSB,MSTL,Theta,ARCH(1),AutoARIMA,ARIMA,AutoRegressive,HistoricAverage,Naive,RWD,SeasonalNaive,WindowAverage,SeasWA
count,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0
mean,29.250298,26.868514,26.774539,26.48459,26.184173,26.847736,27.460527,32.450371,27.006792,67.993334,27.355012,27.010148,25.977578,27.010266,40.528393,40.771968,38.057276,26.771885,27.349192
std,29.527754,25.654895,25.244482,23.874044,22.455125,25.464791,26.48025,28.26255,25.972803,158.804249,23.872042,25.683396,23.270962,25.683478,50.257492,50.442009,33.1038,24.34992,22.968398
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,11.965025,12.444713,12.444029,12.322231,12.581241,12.374462,11.942952,14.314177,12.22929,21.88119,12.95484,12.23242,12.237921,12.23242,11.904762,11.904762,14.285715,12.244898,13.265306
50%,22.316526,21.370598,21.392586,21.317484,21.798831,21.355099,21.425909,27.037155,21.369303,54.767173,22.35267,22.113335,21.252986,22.113335,28.57143,28.730465,30.952381,21.428572,23.308865
75%,37.194465,33.294332,33.175658,33.306864,33.326197,33.374597,34.925354,43.109061,33.800328,103.620857,35.107004,34.702516,33.103801,34.702516,50.0,50.970096,52.380955,34.251698,35.79932
max,1004.9638,829.04443,826.8097,612.2939,580.2507,826.8097,816.71893,782.7985,842.79663,44204.92,669.8195,654.47235,622.79785,654.4519,1953.1461,1961.3436,766.18896,771.9373,697.3653


In [66]:
df.loc[df["metric"] == "smape"].describe()

Unnamed: 0,SES,Holt,ADIDA,CrostonClassic,CrostonSBA,IMAPA,TSB,MSTL,Theta,ARCH(1),AutoARIMA,ARIMA,AutoRegressive,HistoricAverage,Naive,RWD,SeasonalNaive,WindowAverage,SeasWA
count,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0,91470.0
mean,142.377788,140.677284,140.619712,140.636898,141.386592,140.585354,141.00236,148.834535,140.736819,178.164729,137.805966,142.422946,140.404243,142.423165,84.07169,157.073128,82.295831,126.161218,132.071585
std,50.74304,50.34477,50.351497,49.952667,49.96027,50.354938,50.6157,46.182703,50.409526,16.717319,53.510392,47.799815,50.040658,47.79987,50.598082,52.373477,37.961806,57.654995,44.781098
min,0.0,0.0,0.0,10.859313,11.360459,0.0,0.0,11.329002,10.850495,131.90668,0.0,10.587035,10.649645,10.587035,0.0,11.142449,0.0,0.0,0.0
25%,100.072637,98.249977,98.105013,98.973732,99.868981,98.117598,98.555802,115.619923,98.317187,164.637305,93.391572,103.371602,98.959779,103.371602,42.857143,111.90476,57.14286,81.878376,99.26342
50%,153.273095,152.140465,152.112455,152.209435,153.662355,151.94415,151.549945,161.166435,151.9109,179.404415,150.009805,152.947965,151.196025,152.948095,85.71429,200.0,85.71429,134.95796,140.95238
75%,191.12199,187.827365,187.756587,187.049522,187.56083,187.790757,189.13659,189.394322,188.172793,192.189295,188.232207,186.37158,187.209843,186.37158,121.904755,200.0,110.0,179.344,168.88889
max,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0


# ANOVA

In [48]:
import pandas as pd
from scipy.stats import f_oneway

In [49]:
M5_RES = "results/evaluation/"
FOZZY_RES = "results/fozzy-evaluation/"
FAV_RES = "results/favorita-evaluation/"

In [50]:
df = read_data_to_single_df(folder=M5_RES)
df.head()

Reading SimpleExponentialSmoothing-Holt.csv...
Reading ADIDA-CrostonClassic-CrostonSBA-IMAPA-TSB.csv...
Reading MSTL-Theta-ARCH.csv...
Reading AutoARIMA.csv...
Reading ARIMA-AutoRegressive.csv...
Reading HistoricAverage-Naive-RandomWalkWithDrift-SeasonalNaive-WindowAverage-SeasonalWindowAverage.csv...


Unnamed: 0,unique_id,cutoff,metric,SES,Holt,ADIDA,CrostonClassic,CrostonSBA,IMAPA,TSB,...,ARCH(1),AutoARIMA,ARIMA,AutoRegressive,HistoricAverage,Naive,RWD,SeasonalNaive,WindowAverage,SeasWA
0,FOODS_1_001_CA_1,2016-04-10,mse,1.193014,1.145749,1.152618,1.152664,1.1448,1.169181,1.203514,...,6.001423,1.153829,1.189459,1.428748,1.189471,2.142857,2.134372,2.214286,1.270408,1.392128
1,FOODS_1_001_CA_1,2016-04-10,mae,0.842261,0.744192,0.770742,0.770873,0.739472,0.806998,0.85502,...,1.942738,0.774141,0.775964,0.999062,0.775972,1.285714,1.282892,0.928571,0.918367,0.887755
2,FOODS_1_001_CA_1,2016-04-10,rmse,1.092252,1.070396,1.0736,1.073622,1.069953,1.081287,1.097048,...,2.44978,1.074164,1.090623,1.195302,1.090629,1.46385,1.460949,1.488048,1.127124,1.179885
3,FOODS_1_001_CA_1,2016-04-10,mape,20.09854,14.279613,15.852062,15.859879,13.995455,18.004808,20.856115,...,104.52639,16.053923,23.679249,27.65133,23.680695,46.42857,46.25734,35.714287,24.617348,31.632656
4,FOODS_1_001_CA_1,2016-04-10,smape,94.529144,90.83461,91.90121,91.9064,90.62074,93.282524,94.958435,...,177.68495,92.035286,103.88027,99.00221,103.882324,104.76191,104.70302,92.85714,96.93432,116.06454


In [51]:
df = df.drop(columns="cutoff")

In [52]:
df.dtypes

unique_id           object
metric              object
SES                float64
Holt               float64
ADIDA              float64
CrostonClassic     float64
CrostonSBA         float64
IMAPA              float64
TSB                float64
MSTL               float64
Theta              float64
ARCH(1)            float64
AutoARIMA          float64
ARIMA              float64
AutoRegressive     float64
HistoricAverage    float64
Naive              float64
RWD                float64
SeasonalNaive      float64
WindowAverage      float64
SeasWA             float64
dtype: object

In [53]:
models = [
    'SES', 'Holt', 'ADIDA', 'CrostonClassic',
       'CrostonSBA', 'IMAPA', 'TSB', 'MSTL', 'Theta', 'ARCH(1)', 'AutoARIMA',
       'ARIMA', 'AutoRegressive', 'HistoricAverage', 'Naive', 'RWD',
       'SeasonalNaive', 'WindowAverage', 'SeasWA'
]

print("Shape before", df.shape)
for col in models:
    df = df.loc[df[col] != 0]
    
df.shape

Shape before (457350, 21)


(420762, 21)

In [54]:
df.describe()

Unnamed: 0,SES,Holt,ADIDA,CrostonClassic,CrostonSBA,IMAPA,TSB,MSTL,Theta,ARCH(1),AutoARIMA,ARIMA,AutoRegressive,HistoricAverage,Naive,RWD,SeasonalNaive,WindowAverage,SeasWA
count,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420762.0,420204.0
mean,35.801365,34.755012,34.699906,34.619771,34.7034,34.714691,34.979839,37.999677,34.813433,75.26298,34.44595,35.399269,34.400714,35.399342,29.491424,42.404477,28.035761,33.206887,33.94741
std,60.958851,59.974898,59.638798,59.290922,59.477084,59.649196,59.741154,62.968459,60.024265,5378.113,59.03988,60.948835,58.722854,60.948923,53.752495,72.374276,45.059526,57.272029,57.09666
min,0.004864,9e-06,0.00038,0.000243,0.000219,0.000721,0.001843,0.00594,0.009746,0.011393,3.091789e-19,0.000643,0.000781,0.000643,0.071429,0.071522,0.071429,0.005102,8.51495e-07
25%,0.745171,0.692152,0.691031,0.69443,0.688433,0.695253,0.716285,0.82715,0.70395,1.60181,0.6967192,0.706762,0.706273,0.70676,0.886405,0.886405,1.0,0.707107,0.7101923
50%,2.275407,2.107412,2.10234,2.113351,2.096468,2.108404,2.156013,2.516106,2.119705,5.999426,2.124794,2.234501,2.097758,2.23456,3.214286,3.223191,3.142857,2.116505,2.199708
75%,39.776062,35.911821,35.714287,35.925228,35.740129,35.904807,37.329921,45.659102,36.316314,116.6179,36.43812,37.868915,35.51968,37.870436,42.857143,54.233922,44.642857,34.82143,38.42651
max,3167.0725,2765.889,2136.19,2562.978,2599.7092,2019.6334,1847.338,4804.0503,2730.0828,3325161.0,2365.549,2659.8025,1775.5791,2659.8025,4564.5713,4601.3853,3733.2856,2047.6787,3229.355


In [55]:
# df = df[["unique_id", "metric", "ARIMA", "AutoARIMA"]]

In [56]:
# List of metrics (replace with your actual metric names)
metric_names = ['mae', 'mape', 'mse', 'rmse', 'smape']

# Loop through each metric
for metric_name in metric_names:
    metric_data = []

    # Loop through each model column (excluding 'unique_id' and 'metric')
    for column in df.columns:
        if column not in ['unique_id', 'metric']:
            model_metric_data = df[df['metric'] == metric_name][column]
            metric_data.append(model_metric_data)

    # Perform ANOVA
    f_statistic, p_value = f_oneway(*metric_data)

    print(f"Metric: {metric_name}")
    print("ANOVA results:")
    print("F-statistic:", f_statistic)
    print("P-value:", p_value)

    if p_value < 0.05:
        print("Reject null hypothesis: There is a significant difference among at least one group.\n")
    else:
        print("Fail to reject null hypothesis: No significant difference.\n")


Metric: mae
ANOVA results:
F-statistic: nan
P-value: nan
Fail to reject null hypothesis: No significant difference.

Metric: mape
ANOVA results:
F-statistic: 4478.199551920638
P-value: 0.0
Reject null hypothesis: There is a significant difference among at least one group.

Metric: mse
ANOVA results:
F-statistic: nan
P-value: nan
Fail to reject null hypothesis: No significant difference.

Metric: rmse
ANOVA results:
F-statistic: nan
P-value: nan
Fail to reject null hypothesis: No significant difference.

Metric: smape
ANOVA results:
F-statistic: 13952.621034078395
P-value: 0.0
Reject null hypothesis: There is a significant difference among at least one group.

