In [1]:
import pickle
import pandas as pd
import numpy as np
from modelling import (autoets, autoarima, linear_svr, stl_linear_svr,
    bayesian_ridge, stl_bayesian_ridge)
from load import load_dataset
from data import path
from sklearn.metrics import mean_absolute_error
from statsmodels.tsa.seasonal import STL
from statsmodels.stats.diagnostic import acorr_ljungbox

In [2]:
def store_predictions(model:str)->None:
    map_to_f = {
        "autoets":autoets,
        "autoarima":autoarima,
        "linear_svr":linear_svr,
        "stl_linear_svr":stl_linear_svr,
        "bayesian_ridge":bayesian_ridge,
        "stl_bayesian_ridge":stl_bayesian_ridge
    }
    
    f = map_to_f[model]
    N = 98
    
    predictions = []
    for i in range(N):
        predictions.append(f(load_dataset(i)))
        print(i)
        
    with open(f"./predictions/{model}.pkl", "wb") as f:
        pickle.dump(predictions, f)

In [4]:
# store_predictions("autoets")
# store_predictions("autoarima")
# store_predictions("linear_svr")
# store_predictions("stl_linear_svr")
# store_predictions("bayesian_ridge")
# store_predictions("stl_bayesian_ridge")

In [5]:
with open("./predictions/autoets.pkl", "rb") as f_autoets, \
        open("./predictions/autoarima.pkl", "rb") as f_autoarima, \
        open("./predictions/linear_svr.pkl", "rb") as f_linear_svr, \
        open("./predictions/stl_linear_svr.pkl", "rb") as f_stl_linear_svr, \
        open("./predictions/bayesian_ridge.pkl", "rb") as f_bayesian_ridge, \
        open("./predictions/stl_bayesian_ridge.pkl", "rb") as f_stl_bayesian_ridge:
    r_autoets = pickle.load(f_autoets)
    r_autoarima = pickle.load(f_autoarima)
    r_linear_svr = pickle.load(f_linear_svr)
    r_stl_linear_svr = pickle.load(f_stl_linear_svr)
    r_bayesian_ridge = pickle.load(f_bayesian_ridge)
    r_stl_bayesian_ridge = pickle.load(f_stl_bayesian_ridge)

In [6]:
def check_residuals_independence(ts:tuple)->bool:
    x = ts[0]
    max_p = ts[2]
    
    if max_p<=1:
        return np.nan
   
    m = np.min(x)
    if m<=1:
        y = np.log1p(x + np.abs(m) + 1)
    else:
        y = np.log1p(x)

    stl = STL(y, period=max_p).fit()
    test = acorr_ljungbox(stl.resid, return_df=True, auto_lag=True, period=max_p)

    # If the p value is greater than 0.05 then the residuals are independent
    return np.max(test["lb_pvalue"])>0.05

In [7]:
df = {
    "category":[],
    "autoets":[], 
    "autoarima":[], 
    "linear_svr":[], 
    "stl_linear_svr":[],
    "bayesian_ridge":[], 
    "stl_bayesian_ridge":[],
    "residuals_are_independent":[]
    
}

N = 98

In [8]:
get_category = lambda x : x.split("/")[3]

In [9]:
for i in range(N):
    ts = load_dataset(i)
    df["category"].append(get_category(path[i]))
    
    xx = ts[1]
    df["autoets"].append(mean_absolute_error(xx, r_autoets[i]))
    df["autoarima"].append(mean_absolute_error(xx, r_autoarima[i]))
    df["linear_svr"].append(mean_absolute_error(xx, r_linear_svr[i]))
    df["stl_linear_svr"].append(mean_absolute_error(xx, r_stl_linear_svr[i]))
    df["bayesian_ridge"].append(mean_absolute_error(xx, r_bayesian_ridge[i]))
    df["stl_bayesian_ridge"].append(mean_absolute_error(xx, r_stl_bayesian_ridge[i]))

    df["residuals_are_independent"].append(check_residuals_independence(ts))

In [10]:
df = pd.DataFrame(df)

In [11]:
df.to_csv("results.csv")