In [1]:
from sktime.datasets import load_airline
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.theta import ThetaForecaster
from sktime.forecasting.exp_smoothing import ExponentialSmoothing # Holt-Winters
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.ets import AutoETS
from sktime.forecasting.sarimax import SARIMAX
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.compose import BaggingForecaster
from sktime.forecasting.trend import TrendForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.forecasting.trend import STLForecaster
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.compose import EnsembleForecaster
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
from tqdm import tqdm
from tsai.basics import *

  from .autonotebook import tqdm as notebook_tqdm


In [53]:
from sklearn.model_selection import train_test_split
import numpy as np
from tsai.basics import *
dataset_names = [
    "ETTh1", "ETTh2", "ETTm1", "ETTm2",
    'm4_yearly_dataset',
    'm4_quarterly_dataset',
    'm4_monthly_dataset',
    'm4_weekly_dataset',
    'm4_daily_dataset',
    'm4_hourly_dataset',
    "nn5_weekly_dataset",
    "nn5_daily_dataset_without_missing_values",
    'electricity_hourly_dataset', 
    'electricity_weekly_dataset'
    'tourism_yearly_dataset',
    'tourism_quarterly_dataset',
    'tourism_monthly_dataset'
    ]
def get_dataset(datasetname):
    if "ETT" in datasetname:
        ts = get_long_term_forecasting_data(datasetname)
        ts = ts.values[:, 1:].astype(float)
        scaler = StandardScaler()
        scaler.fit(ts)
        ts = scaler.transform(ts)
        train_, _, test_, _ = train_test_split(ts, ts, test_size=0.2, shuffle=False)
        train_sd = SlidingWindow(100-8, horizon=8)
        test_sd = SlidingWindow(100-8, horizon=8, stride=100)
        train = train_sd(train_)
        test = test_sd(test_)
        test_cat = test_
        return train, test, 8, test_cat
    if any(dname in datasetname for dname in ['m4', 'nn5', 'tourism', 'electricity']):
        ts = get_Monash_forecasting_data(datasetname)
        time_series_names = ts.series_name.unique()
        ts_data = ts.values[:, 2]
        scaler = StandardScaler()
        print("transforming ...")
        scaler.fit(ts_data[None])
        ts.values[:, 2] = scaler.transform(ts_data[None])[0]
        sample_datasets = [ts[ts['series_name'] == tsn].values[:, 2:] for tsn in time_series_names]
        train_, _, test_, _ = train_test_split(sample_datasets, sample_datasets, test_size=0.2, shuffle=False)
        test_cat = np.concatenate(test_, axis=0)
        min_len = min([len(sd) for sd in sample_datasets] + [100])
        fh = min(8, min_len//3)
        print("sliding ...")
        sd = SlidingWindow(min_len - fh, horizon=fh, stride=min(min_len, len(ts) // 13837))
        print("stride: ", min(min_len, len(ts) // 13837))
        print("min_len: ", min_len)
        test_sd = SlidingWindow(min_len - fh, horizon=fh)
        train = [np.concatenate(item, axis=0).astype(float)
             for item in list(zip(*[sd(x) for x in train_]))]
        test = [np.concatenate(item, axis=0).astype(float)
             for item in zip(*[adapt(test_sd(x[-min_len:])) for x in test_])]
        return train, test, fh, test_cat
def adapt(p):
    if len(p[1].shape) == 2:
        return (p[0], p[1][None])
    return p

In [41]:
dataset_names = [
    "ETTh1", "ETTh2", 
    "ETTm1", "ETTm2",
    'm4_yearly_dataset',
    'm4_quarterly_dataset',
    'm4_monthly_dataset',
    'm4_weekly_dataset',
    'm4_daily_dataset',
    'm4_hourly_dataset',
    "nn5_weekly_dataset",
    "nn5_daily_dataset_without_missing_values",
    'electricity_hourly_dataset', 
    'electricity_weekly_dataset'
    'tourism_yearly_dataset',
    'tourism_quarterly_dataset',
    'tourism_monthly_dataset'
    ]
def get_dataset(datasetname):
    if "ETT" in datasetname:
        ts = get_long_term_forecasting_data(datasetname)
        ts = ts.values[:, 1:].astype(float)
        scaler = StandardScaler()
        scaler.fit(ts)
        ts = scaler.transform(ts)
        train_, _, test_, _ = train_test_split(ts, ts, test_size=0.2, shuffle=False)
        train_sd = SlidingWindow(100-8, horizon=8)
        test_sd = SlidingWindow(100-8, horizon=8, stride=100)
        train = train_sd(train_)
        test = test_sd(test_)
        test_cat = test_
        return train, test, 8, test_cat
    if any(dname in datasetname for dname in ['m4', 'nn5', 'tourism', 'electricity']):
        ts = get_Monash_forecasting_data(datasetname)
        time_series_names = ts.series_name.unique()
        ts_data = ts.values[:, 2]
        scaler = StandardScaler()
        scaler.fit(ts_data[None])
        ts.values[:, 2] = scaler.transform(ts_data[None])[0]
        sample_datasets = [ts[ts['series_name'] == tsn].values[:, 2:] for tsn in time_series_names]
        train_, _, test_, _ = train_test_split(sample_datasets, sample_datasets, test_size=0.2, shuffle=False)
        test_cat = np.concatenate(test_, axis=0)
        min_len = min([len(sd) for sd in sample_datasets])
        fh = min(8, min_len//3)
        sd = SlidingWindow(min_len - (min_len//3), horizon=fh)
        train = [np.concatenate(item, axis=0).astype(float)
             for item in list(zip(*[sd(x) for x in train_]))]
        test = [np.concatenate(item, axis=0).astype(float)
             for item in zip(*[sd(x[-min_len:]) for x in test_])]
        return train, test, fh, test_cat

In [3]:
def calc_epoch(batchs):
    return 6918000 // batchs + 1

In [4]:
forecasters = [
    ("trend", PolynomialTrendForecaster()),
    ("naive", NaiveForecaster())
]
algs = {
    "exp": ExponentialSmoothing,
    "ari": ARIMA,
    "sari": SARIMAX,
    "a-ets": AutoETS,
    "bag": BaggingForecaster,
    "tre": TrendForecaster,
    "poly": PolynomialTrendForecaster,
    "stl": STLForecaster,
    "pro": Prophet,
    "ens": EnsembleForecaster
}
algs_names = list(algs.keys())
def test_sktime_method(alg, test, fh, alg_args=()):
    fh_ = ForecastingHorizon(range(1, fh + 1), is_relative=True)
    train_y = test[0]
    test_y = test[1]
    len_test = len(train_y)
    mapes = []
    for i in tqdm(range(len_test)):
        train_y_ = train_y[i].transpose()
        test_y_ = test_y[i].transpose()
        alg_ = algs[alg](*alg_args)
        alg_.fit(pd.DataFrame(train_y_))
        pred_y = alg_.predict(fh_)
        mape_ = mean_absolute_percentage_error(pred_y, test_y_)
        mapes.append(mape_)
    mape = float(np.mean(mapes))
    return mape

In [33]:
tsai_algs = [
 'InceptionTimePlus',
 'InceptionTimePlus62x62',
 'InceptionTimeXLPlus',
 'MultiInceptionTimePlus',
 'MiniRocketPlus',
 'RNNPlus',
 'LSTMPlus',
 'GRUPlus',
 'TSTPlus',
 'MultiTSTPlus',
 'XCM',
 'XCMPlus',
 'mWDN']
def adapt(x, name="X"):
    print(name + ": ")
    print(x.shape)
    # print(x)
    return x
def test_tsai_methods(alg, train, test, epoch=10, lr=1e-3, truc=0):
    train_len, test_len = len(train[0]), len(test[0])
    splits = [list(range(train_len if not truc else truc)), 
        list(range(train_len, train_len + (test_len if not truc else truc)))]
    # print(train[0].shape, train[1].shape, test[0].shape, test[1].shape)
    X, y = np.concatenate([train[0], test[0]], axis=0),\
         np.concatenate([train[1], test[1]], axis=0)
    tfms = [None, TSForecasting()]
    batch_tfms = TSStandardize()
    fcst = TSForecaster(X, y, splits=splits, path='models', tfms=tfms,
        batch_tfms=batch_tfms, bs=512, arch=alg, metrics=lambda x,y:sum(
        mean_absolute_percentage_error(xi, yi) for xi, yi in zip(x.cpu().permute(0,2,1), y.cpu().permute(0,2,1))
    ))
    fcst.fit_one_cycle(epoch, lr)
    return fcst.final_record[-1]

In [None]:
mape_ = test_tsai_methods("InceptionTimePlus", train, test, epoch=calc_epoch(train[0].shape[0]), lr=1e-3)

In [15]:
train, test, fh, _ = get_dataset('ETTh1')

In [11]:
13837*500

6918500

In [55]:
test[1].shape

(256, 1, 8)

In [59]:
[
    "ETTh1", "ETTh2", "ETTm1", "ETTm2",
    'm4_yearly_dataset',
    'm4_quarterly_dataset',
    'm4_monthly_dataset',
    'm4_weekly_dataset',
    'm4_daily_dataset',
    'm4_hourly_dataset',
    "nn5_weekly_dataset",
    "nn5_daily_dataset_without_missing_values",
    'electricity_hourly_dataset', 
    'electricity_weekly_dataset'
    'tourism_yearly_dataset',
    'tourism_quarterly_dataset',
    'tourism_monthly_dataset'
    ]
train, test, fh, train_cat = get_dataset("m4_quarterly_dataset")

Dataset: m4_quarterly_dataset
converting data to dataframe...
...done

freq                   : quarterly
forecast_horizon       : 8
contain_missing_values : False
contain_equal_length   : False

exploding dataframe...
...done


data.shape: (2406108, 3)
transforming ...
sliding ...
stride:  24
min_len:  24


In [None]:
test_sktime_method("exp", test, fh)

In [None]:
new_arch_names

In [None]:
new_arch_names = []
fail_arch = []
for arch in tqdm(all_archs_names):
    try:
        test_tsai_methods(arch, train, test, 1, truc=10)
        new_arch_names.append(arch)
    except Exception as e:
        fail_arch.append(arch)
        print("fail: ", arch)
        continue
print(new_arch_names, fail_arch)

In [None]:
from tsai.learner import all_archs_names


In [None]:
mape = test_sktime_method("a-ets", test, fh)
print(mape)

In [None]:
exp_mape

In [None]:
from tsai.basics import *
from sklearn.preprocessing import StandardScaler


import os
os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890" 
# ts = get_forecasting_time_series("Sunspots").values
# ts = get_long_term_forecasting_data("ETTh1")
# ts = ts[ts.columns[1:]]
# ts = get_Monash_forecasting_data("tourism_monthly_dataset")
# ts = ts[ts.columns[2:]]

# scaler = StandardScaler()
# scaler.fit(ts)
# ts = scaler.transform(ts)

In [None]:
ts.series_name.unique()

In [None]:
# X, y = SlidingWindow(60, horizon=1)(ts)
import pandas as pd
ts_df = pd.DataFrame(ts)
y_train, y_test = temporal_train_test_split(ts_df, test_size=0.2)

In [None]:
fh = ForecastingHorizon(y_test.index, is_relative=False)

In [None]:
# y = load_airline()
# y_train, y_test = temporal_train_test_split(y)
fh = ForecastingHorizon(y_test.index, is_relative=False)
forecaster = ExponentialSmoothing()
# forecaster = ARIMA()
# forecaster = SARIMAX()
# forecaster = AutoETS()
# forecaster = Prophet() # no
# forecaster = BaggingForecaster()
# forecaster = TrendForecaster()
# forecaster = PolynomialTrendForecaster()
# forecaster = STLForecaster()

forecasters = [
    ("trend", PolynomialTrendForecaster()),
    ("naive", NaiveForecaster())
]
# forecaster = EnsembleForecaster(forecasters=forecasters)
print(y_train.shape)
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)
mape = mean_absolute_percentage_error(y_test, y_pred)

In [None]:
y_train.shape

In [None]:
y = load_airline()
y_train, y_test = temporal_train_test_split(y)


In [None]:
ts = get_forecasting_time_series("Sunspots").values
X, y = SlidingWindow(60, horizon=1)(ts)
print(X.shape, y.shape)
splits = TimeSplitter(0.2)(y)
tfms = [None, TSForecasting()]
batch_tfms = TSStandardize()
fcst = TSForecaster(X, y, splits=splits, path='models', tfms=tfms,
    batch_tfms= batch_tfms, bs=512, arch="TSTPlus", metrics=mape, cbs=ShowGraph())
fcst.fit_one_cycle(50, 1e-3)

In [None]:
# AirQualityUCI handcraft 
print(long_term_forecasting_list) # ETT
print(Monash_forecasting_list) # M4, NN5, torism
print(UCR_multivariate_list) # PEMS-SF
# Weathre2k
# VISUELLE2.0

In [None]:

def get_dataset(datasetname):
    if "m4" in datasetname:
        ts = get_long_term_forecasting_data(datasetname)
        ts = ts[ts.columns[2:]]
    if "ETT" in datasetname:
        ts = get_long_term_forecasting_data(datasetname)
        ts = ts[st.columns[1:]]
    