In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
n_ts = [10_000, 100_000, 1_000_000, 5_000_000, 10_000_000]
times = [pd.read_csv(f'../results/time_{size}.csv').drop(labels='Unnamed: 0', axis=1) for size in n_ts]
times = pd.concat(times)
times = times[['length', 'time', 'cpus']]
times['Cost (Dollars)'] = times['time'] * 26.688 / 60

In [None]:
evaluation = []
for size in n_ts:
    fcast = pd.read_parquet(f'../results/forecasts_{size}.parquet')
    models = fcast.drop(labels=['ds', 'cutoff', 'y'], axis=1).columns
    eval_models = {}
    for model in models:
        mse = (fcast['y'] - fcast[model]) ** 2
        eval_models[model] = np.mean(mse)
    eval_models['length'] = size
    eval_models['n_windows'] = np.unique(fcast.groupby(['unique_id', 'cutoff']).size())
    evaluation.append(pd.DataFrame(eval_models, index=[0]))
evaluation = pd.concat(evaluation).reset_index(drop=True)

In [None]:
evaluation = evaluation.rename(columns = {
    'croston_classic': 'Croston',
    'seasonal_naive_season_length-7': 'SeasNaive',
    'naive': 'Naive',
    'adida': 'ADIDA',
    'historic_average': 'HistoricAverage',
    'seasonal_window_average_season_length-7_window_size-4': 'SeasWindowAverage',
    'imapa': 'iMAPA',
    'window_average_window_size-7': 'WindowAverage',
    'seasonal_exponential_smoothing_season_length-7_alpha-0.9': 'SeasExpSmooth',
    'n_windows': 'CVWindows'
})

In [None]:
results = times.merge(evaluation, how='left', on=['length'])

In [None]:
results = results.rename(columns={
    'length': 'N time series',
    'time': 'Time (mins)',
    'cpus': 'N cpus'

})

In [None]:
model_cols = [
    'Croston', 'SeasNaive', 'Naive', 'ADIDA',
    'HistoricAverage', 'SeasWindowAverage', 'iMAPA', 'WindowAverage',
    'SeasExpSmooth'
]
extra_cols = [
    'N time series', 'Time (mins)', 
    'N cpus', 'CVWindows', 'Cost (Dollars)'
]

In [None]:
results[model_cols] = results[model_cols].applymap('{:.4f}'.format)

In [None]:
results[['Time (mins)', 'CVWindows']] = results[['Time (mins)', 'CVWindows']].applymap('{:.2f}'.format)

In [None]:
results['N time series'] = results['N time series'].apply('{:,}'.format) 

In [None]:
results['Cost (Dollars)'] = results['Cost (Dollars)'].apply('${:.2f}'.format)

In [None]:
results = results[extra_cols + model_cols]

In [None]:
print(results[extra_cols].to_markdown(index=False))

In [None]:
print(results[['N time series'] + model_cols].to_markdown(index=False))