In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
rcParams['figure.figsize'] = 18, 7

Here's just a couple of attempts with AutoML.

In [2]:
# Prerocessing for FEDOT
from fedot.core.data.data import InputData
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams

# FEDOT 
from fedot.core.data.data_split import train_test_data_setup
from fedot.api.main import Fedot
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.node import PrimaryNode, SecondaryNode

In [19]:
from autots import AutoTS
from autots.evaluator.auto_ts import fake_regressor

### FEDOT

In [3]:
X_train = pd.read_excel('data/X_train_extern.xlsx', index_col=0)
y_train = pd.read_excel('data/y_train_feat_eng.xlsx', index_col=0)
X_test = pd.read_excel('data/X_test_extern.xlsx', index_col=0)
y_test = pd.read_excel('data/y_test_feat_eng.xlsx', index_col=0)

In [4]:
X_train['company'] = X_train['company'].astype('category')
X_test['company'] = X_test['company'].astype('category')

In [5]:
def smape(y_true, y_pred, order=2):
    if not isinstance(y_true, np.ndarray):
        y_true = np.asarray(y_true)
    if not isinstance(y_pred, np.ndarray):
        y_pred = np.asarray(y_pred)
    return 100*np.mean(2*np.linalg.norm(y_true-y_pred, ord=order))/(np.linalg.norm(y_true, ord=order)+np.linalg.norm(y_pred, ord=order))

In [6]:
X_train.shape, y_train.shape

((1816, 64), (1816, 2))

In [8]:
task = Task(TaskTypesEnum.ts_forecasting,
            TsForecastingParams(forecast_length=10))

train_idx = X_train.index
X_train_values = X_train.values

train_input = InputData(idx=train_idx,
                        features=X_train_values,
                        target=y_train,
                        task=task,
                        data_type=DataTypesEnum.ts)
train_data, test_data = train_test_data_setup(train_input, validation_blocks=4)

In [9]:
model = Fedot(problem='ts_forecasting',
              task_params=task.task_params,
              timeout=5,
              n_jobs=1,
              cv_folds=4, validation_blocks=4, preset='fast_train')

In [10]:
pipeline = model.fit(train_data)

Traceback (most recent call last):
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/fedot/api/api_utils/assumptions/assumptions_handler.py", line 60, in fit_assumption_and_check_correctness
    pipeline.fit(data_train)
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/fedot/core/pipelines/pipeline.py", line 146, in fit
    train_predicted = self._fit(input_data=copied_input_data)
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/fedot/core/pipelines/pipeline.py", line 110, in _fit
    train_predicted = self.root_node.fit(input_data=input_data)
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/fedot/core/pipelines/node.py", line 397, in fit
    secondary_input = self._input_from_parents(input_data=input_data, parent_operation='fit')
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/fedot/core/pipelines/node.py", line 437, i

ValueError: Initial pipeline fit was failed due to: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 1747 and the array at index 1 has size 1696. Check pipeline structure and the correctness of the data

As it works as a black-box, I don't know what is the underlying reason.

The problem here is also that we couldn't simply take lagged values because of data specifics (we have the number of time serieses for different companies). That's why this framework suits only for individual forecasts for each of our companies.

### LightAutoML

It seems that there's no specific functionality for time series forecasting in this framework.

### AutoTS

Frequency parameter infered from fake regressor, None specification is not allowed here.

In [24]:
model = AutoTS(
    forecast_length=10,
    frequency='Q-DEC',
    model_list = 'multivariate',
    validation_method='backwards',
    max_generations=3,
    num_validations=3,
    n_jobs='auto')

Using 4 cpus for n_jobs.


In [25]:
future_regressor_train2d, future_regressor_forecast2d = fake_regressor(
    X_train,
    forecast_length=10,
    dimensions=X_train.shape[1],
    drop_most_recent=model.drop_most_recent,
    aggfunc=model.aggfunc,
    verbose=model.verbose,
)

Inferred frequency is: Q-DEC


In [26]:
model = model.fit(
    y_train,
    future_regressor=future_regressor_train2d,
)

Too many training validations for length of data provided, decreasing num_validations to 2
Model Number: 1 with model GluonTS in generation 0 of 3
Template Eval Error: ImportError('GluonTS installation not found or installed version is incompatible with AutoTS.') in model 1: GluonTS
Model Number: 2 with model GluonTS in generation 0 of 3
Template Eval Error: ImportError('GluonTS installation not found or installed version is incompatible with AutoTS.') in model 2: GluonTS
Model Number: 3 with model GluonTS in generation 0 of 3
Template Eval Error: ImportError('GluonTS installation not found or installed version is incompatible with AutoTS.') in model 3: GluonTS
Model Number: 4 with model GluonTS in generation 0 of 3
Template Eval Error: ImportError('GluonTS installation not found or installed version is incompatible with AutoTS.') in model 4: GluonTS
Model Number: 5 with model GluonTS in generation 0 of 3
Template Eval Error: ImportError('GluonTS installation not found or installed ver

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.2s
[Parallel(n_jobs=4)]: Done 200 out of 200 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 200 out of 200 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 200 out of 200 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      |

Model Number: 13 with model MultivariateRegression in generation 0 of 3
You can set `force_col_wise=true` to remove the overhead.
Model Number: 14 with model VECM in generation 0 of 3
Model Number: 15 with model MultivariateMotif in generation 0 of 3
Model Number: 16 with model MultivariateMotif in generation 0 of 3
Model Number: 17 with model SectionalMotif in generation 0 of 3
Template Eval Error: ValueError('kth(=20) out of bounds (16)') in model 17: SectionalMotif
Model Number: 18 with model SectionalMotif in generation 0 of 3
Model Number: 19 with model MultivariateRegression in generation 0 of 3


Model Number: 20 with model NVAR in generation 0 of 3
Model Number: 21 with model RollingRegression in generation 0 of 3
Template Eval Error: ValueError("Input contains NaN, infinity or a value too large for dtype('float32').") in model 21: RollingRegression
Model Number: 22 with model DynamicFactor in generation 0 of 3
Model Number: 23 with model RollingRegression in generation 0 of 3
Template Eval Error: ValueError("Input contains NaN, infinity or a value too large for dtype('float32').") in model 23: RollingRegression
Model Number: 24 with model Cassandra in generation 0 of 3
Template Eval Error: Exception("unrecognized dates: Index([], dtype='object', name='date')") in model 24: Cassandra
Model Number: 25 with model VECM in generation 0 of 3
Model Number: 26 with model DynamicFactor in generation 0 of 3
Template Eval Error: AttributeError("'numpy.ndarray' object has no attribute 'values'") in model 26: DynamicFactor
Model Number: 27 with model GluonTS in generation 0 of 3
Template 

  slope * self.t_train[..., None] + intercept,


Model Number: 46 with model TMF in generation 0 of 3
Template Eval Error: ValueError('Model TMF returned NaN for one or more series. fail_on_forecast_nan=True') in model 46: TMF
Model Number: 47 with model SectionalMotif in generation 0 of 3
Template Eval Error: ValueError('kth(=5) out of bounds (1)') in model 47: SectionalMotif
Model Number: 48 with model MultivariateMotif in generation 0 of 3
Model Number: 49 with model LATC in generation 0 of 3
Model Number: 50 with model LATC in generation 0 of 3
Model Number: 51 with model TMF in generation 0 of 3
Template Eval Error: ValueError('Model TMF returned NaN for one or more series. fail_on_forecast_nan=True') in model 51: TMF
Model Number: 52 with model WindowRegression in generation 0 of 3
Model Number: 53 with model SectionalMotif in generation 0 of 3
Template Eval Error: ValueError('kth(=10) out of bounds (8)') in model 53: SectionalMotif
Model Number: 54 with model VARMAX in generation 0 of 3
Model Number: 55 with model PytorchForec



Model Number: 63 with model Cassandra in generation 0 of 3
Dropping zero variance feature columns Index(['month_1', 'month_2', 'month_4', 'month_5', 'month_7', 'month_8',
       'month_10', 'month_11', 'weekdayofmonth_1', 'weekdayofmonth_2',
       'weekdayofmonth_3', 'weekdayofmonth_4', 'weekdayofmonth_5'],
      dtype='object')
Dropping multi-colinear feature columns Index(['randomwalk_8', 'randomwalk_9'], dtype='object')
Template Eval Error: ValueError('operands could not be broadcast together with shapes (114,2) (124,2) ') in model 63: Cassandra
Model Number: 64 with model MultivariateMotif in generation 0 of 3
Template Eval Error: ValueError('kth(=100) out of bounds (32)') in model 64: MultivariateMotif
Model Number: 65 with model SectionalMotif in generation 0 of 3
Template Eval Error: ValueError('XA and XB must have the same number of columns (i.e. feature dimension.)') in model 65: SectionalMotif
Model Number: 66 with model DynamicFactorMQ in generation 0 of 3
Template Eval Err

Model Number: 77 with model MultivariateMotif in generation 0 of 3
Model Number: 78 with model VARMAX in generation 0 of 3
Model Number: 79 with model MultivariateRegression in generation 0 of 3
Model Number: 80 with model Cassandra in generation 0 of 3
Dropping multi-colinear feature columns Index(['seasonality365.25_10', 'seasonality365.25_11', 'seasonality365.25_12',
       'seasonality365.25_13', 'seasonality365.25_14', 'seasonality365.25_15',
       'seasonality365.25_16', 'seasonality365.25_17', 'seasonality365.25_18',
       'seasonality365.25_19', 'randnorm_0', 'randnorm_1', 'randnorm_2',
       'randnorm_3'],
      dtype='object')
Template Eval Error: ValueError('window shape cannot be larger than input array shape') in model 80: Cassandra
Model Number: 81 with model DynamicFactor in generation 0 of 3
Template Eval Error: Exception('Transformer Detrend failed on fit') in model 81: DynamicFactor
Model Number: 82 with model MAR in generation 0 of 3
Model Number: 83 with model VE

Model Number: 97 with model VARMAX in generation 0 of 3
Model Number: 98 with model DynamicFactorMQ in generation 0 of 3
Template Eval Error: ValueError('Number of factors (4) cannot be greater than the number of monthly endogenous variables (2).') in model 98: DynamicFactorMQ
Model Number: 99 with model NVAR in generation 0 of 3
Model Number: 100 with model VAR in generation 0 of 3
Template Eval Error: ValueError('maxlags is too large for the number of observations and the number of equations. The largest model cannot be estimated.') in model 100: VAR
Model Number: 101 with model MAR in generation 0 of 3
Model Number: 102 with model MultivariateRegression in generation 0 of 3
Template Eval Error: ValueError("Input contains NaN, infinity or a value too large for dtype('float64').") in model 102: MultivariateRegression
Model Number: 103 with model PytorchForecasting in generation 0 of 3
Template Eval Error: ImportError('pytorch, pytorch lighting, or pytorch-forecasting not present') in 

Model Number: 134 with model LATC in generation 1 of 3
Model Number: 135 with model RRVAR in generation 1 of 3
Model Number: 136 with model VARMAX in generation 1 of 3
Model Number: 137 with model SectionalMotif in generation 1 of 3
Model Number: 138 with model RRVAR in generation 1 of 3
Model Number: 139 with model MultivariateMotif in generation 1 of 3
Template Eval Error: ValueError('kth(=100) out of bounds (24)') in model 139: MultivariateMotif
Model Number: 140 with model NVAR in generation 1 of 3
Model Number: 141 with model DynamicFactor in generation 1 of 3
Template Eval Error: ZeroDivisionError('integer division or modulo by zero') in model 141: DynamicFactor
Model Number: 142 with model WindowRegression in generation 1 of 3
Model Number: 143 with model MultivariateMotif in generation 1 of 3
Model Number: 144 with model SectionalMotif in generation 1 of 3
Template Eval Error: ValueError('zero-size array to reduction operation fmax which has no identity') in model 144: Sectiona

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 200 out of 200 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 200 out of 200 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 200 out of 200 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      |

Model Number: 179 with model DynamicFactor in generation 1 of 3
Model Number: 180 with model RRVAR in generation 1 of 3
Model Number: 181 with model LATC in generation 1 of 3
Model Number: 182 with model MultivariateMotif in generation 1 of 3
Model Number: 183 with model MultivariateRegression in generation 1 of 3
You can set `force_col_wise=true` to remove the overhead.
Model Number: 184 with model Cassandra in generation 1 of 3
Template Eval Error: TypeError('can only concatenate str (not "int") to str') in model 184: Cassandra
Model Number: 185 with model LATC in generation 1 of 3


Model Number: 186 with model VARMAX in generation 1 of 3
Model Number: 187 with model MAR in generation 1 of 3
Model Number: 188 with model RRVAR in generation 1 of 3
Model Number: 189 with model DynamicFactor in generation 1 of 3
Template Eval Error: ValueError('Number of factors must be less than the number of endogenous variables.') in model 189: DynamicFactor
Model Number: 190 with model MultivariateMotif in generation 1 of 3
Model Number: 191 with model VAR in generation 1 of 3
Model Number: 192 with model MAR in generation 1 of 3
Model Number: 193 with model SectionalMotif in generation 1 of 3
Model Number: 194 with model SectionalMotif in generation 1 of 3
Model Number: 195 with model WindowRegression in generation 1 of 3
Model Number: 196 with model VARMAX in generation 1 of 3
Model Number: 197 with model VECM in generation 1 of 3
Model Number: 198 with model MultivariateRegression in generation 1 of 3
You can set `force_col_wise=true` to remove the overhead.


Model Number: 199 with model VECM in generation 1 of 3
Model Number: 200 with model MultivariateRegression in generation 1 of 3
Model Number: 201 with model VARMAX in generation 1 of 3
Template Eval Error: LinAlgError('1-dimensional array given. Array must be two-dimensional') in model 201: VARMAX
Model Number: 202 with model MultivariateRegression in generation 1 of 3
Model Number: 203 with model MultivariateMotif in generation 1 of 3
Model Number: 204 with model WindowRegression in generation 1 of 3


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 300 out of 300 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 192 tasks      |

New Generation: 2 of 3
Model Number: 205 with model NVAR in generation 2 of 3
Template Eval Error: ValueError('Model NVAR returned NaN for one or more series. fail_on_forecast_nan=True') in model 205: NVAR
Model Number: 206 with model SectionalMotif in generation 2 of 3
Model Number: 207 with model VECM in generation 2 of 3
Template Eval Error: Exception('Transformer Detrend failed on fit') in model 207: VECM
Model Number: 208 with model VAR in generation 2 of 3
Model Number: 209 with model Cassandra in generation 2 of 3
Template Eval Error: TypeError('can only concatenate str (not "int") to str') in model 209: Cassandra
Model Number: 210 with model MultivariateRegression in generation 2 of 3


  slope * self.t_train[..., None] + intercept,


Model Number: 211 with model RRVAR in generation 2 of 3
Model Number: 212 with model MultivariateRegression in generation 2 of 3
Model Number: 213 with model WindowRegression in generation 2 of 3
Model Number: 214 with model VECM in generation 2 of 3
Model Number: 215 with model Cassandra in generation 2 of 3
Template Eval Error: TypeError('can only concatenate str (not "int") to str') in model 215: Cassandra
Model Number: 216 with model MultivariateRegression in generation 2 of 3
Model Number: 217 with model RRVAR in generation 2 of 3
Model Number: 218 with model WindowRegression in generation 2 of 3
Template Eval Error: ValueError('Some value(s) of y are out of the valid range for family PoissonDistribution') in model 218: WindowRegression
Model Number: 219 with model VECM in generation 2 of 3
Model Number: 220 with model VARMAX in generation 2 of 3
Model Number: 221 with model RRVAR in generation 2 of 3
Model Number: 222 with model LATC in generation 2 of 3
Model Number: 223 with mo

Model Number: 306 with model WindowRegression in generation 3 of 3
Model Number: 307 with model VECM in generation 3 of 3
Model Number: 308 with model VARMAX in generation 3 of 3
Model Number: 309 with model VARMAX in generation 3 of 3
Model Number: 310 with model VAR in generation 3 of 3
Model Number: 311 with model MultivariateMotif in generation 3 of 3
Model Number: 312 with model MultivariateRegression in generation 3 of 3
Model Number: 313 with model VECM in generation 3 of 3
Model Number: 314 with model MultivariateRegression in generation 3 of 3
Model Number: 315 with model MultivariateMotif in generation 3 of 3
Model Number: 316 with model VARMAX in generation 3 of 3
Model Number: 317 with model WindowRegression in generation 3 of 3
Model Number: 318 with model WindowRegression in generation 3 of 3
Model Number: 319 with model MultivariateMotif in generation 3 of 3
Model Number: 320 with model SectionalMotif in generation 3 of 3
Model Number: 321 with model VECM in generation 3

Traceback (most recent call last):
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/autots/evaluator/auto_model.py", line 1112, in model_forecast
    model_count=model_count,
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/autots/evaluator/auto_model.py", line 1198, in model_forecast
    model_count=model_count,
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/autots/evaluator/auto_model.py", line 702, in ModelPrediction
    forecast_length=forecast_length, future_regressor=future_regressor_forecast
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/autots/models/basics.py", line 1895, in predict
    res_idx = np.argpartition(res_sum, num_top, axis=0)[0:num_top]
  File "<__array_function__ internals>", line 6, in argpartition
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/numpy/core/fromnumeric.py", line 837, in argpa

40 - LATC with avg smape 102.14: 
Model Number: 41 of 59 with model DynamicFactor for Validation 1
41 - DynamicFactor with avg smape 142.86: 
Model Number: 42 of 59 with model Cassandra for Validation 1
Dropping zero variance feature columns Index(['weekdayofmonth_1', 'weekdayofmonth_2', 'weekdayofmonth_3',
       'weekdayofmonth_4', 'weekdayofmonth_5'],
      dtype='object')
Dropping multi-colinear feature columns Index(['seasonalitycommonfourier_24', 'seasonalitycommonfourier_25',
       'randomwalk_0', 'randomwalk_1', 'randomwalk_2', 'randomwalk_3',
       'randomwalk_4', 'randomwalk_5', 'randomwalk_6', 'randomwalk_7',
       'randomwalk_8', 'randomwalk_9'],
      dtype='object')
📈 42 - Cassandra with avg smape 98.76: 
Model Number: 43 of 59 with model MultivariateMotif for Validation 1
Template Eval Error: Exception('Transformer AlignLastValue failed on fit') in model 43: MultivariateMotif
Model Number: 44 of 59 with model Cassandra for Validation 1
No anomalies detected.
Dropping 

📈 3 - Ensemble with avg smape 139.22: 
Model Number: 4 of 59 with model VECM for Validation 2
4 - VECM with avg smape 157.54: 
Model Number: 5 of 59 with model VECM for Validation 2
5 - VECM with avg smape 157.54: 
Model Number: 6 of 59 with model Ensemble for Validation 2
Traceback (most recent call last):
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/autots/evaluator/auto_model.py", line 1112, in model_forecast
    model_count=model_count,
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/autots/evaluator/auto_model.py", line 1198, in model_forecast
    model_count=model_count,
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/autots/evaluator/auto_model.py", line 702, in ModelPrediction
    forecast_length=forecast_length, future_regressor=future_regressor_forecast
  File "/Users/danilculkov/opt/anaconda3/envs/practice/lib/python3.7/site-packages/autots/models/basics.py", line 1919

44 - Cassandra with avg smape 187.41: 
Model Number: 45 of 59 with model LATC for Validation 2
45 - LATC with avg smape 198.94: 
Model Number: 46 of 59 with model NVAR for Validation 2
46 - NVAR with avg smape 120.31: 
Model Number: 47 of 59 with model SectionalMotif for Validation 2
Template Eval Error: ValueError('kth(=10) out of bounds (1)') in model 47: SectionalMotif
Model Number: 48 of 59 with model SectionalMotif for Validation 2
Template Eval Error: ValueError('kth(=10) out of bounds (1)') in model 48: SectionalMotif
Model Number: 49 of 59 with model SectionalMotif for Validation 2
Template Eval Error: ValueError('zero-size array to reduction operation fmax which has no identity') in model 49: SectionalMotif
Model Number: 50 of 59 with model WindowRegression for Validation 2
Template Eval Error: ValueError('Found array with 0 sample(s) (shape=(0, 0)) while a minimum of 2 is required.') in model 50: WindowRegression
Model Number: 51 of 59 with model SectionalMotif for Validation

In [38]:
future_regressor_test2d, future_regressor_forecast2d = fake_regressor(
    X_test,
    frequency='Q-DEC',
    forecast_length=X_test.shape[0],
    dimensions=X_test.shape[1],
    drop_most_recent=model.drop_most_recent,
    aggfunc=model.aggfunc,
    verbose=model.verbose,
)

In [43]:
prediction = model.predict(forecast_length=future_regressor_forecast2d.shape[0], future_regressor=future_regressor_forecast2d, verbose=0)
forecasts_df = prediction.forecast

In [44]:
forecasts_df

Unnamed: 0,INCOME STATEMENT///Operating Income,INCOME STATEMENT///Revenue
2020-09-30,49.680602,240.973290
2020-12-31,47.803773,266.620800
2021-03-31,46.473687,270.234800
2021-06-30,45.627805,270.110938
2021-09-30,45.204502,269.352237
...,...,...
2071-06-30,60.908818,115.499076
2071-09-30,60.908818,114.822686
2071-12-31,60.908818,114.147155
2072-03-31,60.908818,113.475162


In [45]:
smape(y_test, forecasts_df)

164.72483701188014

SMAPE has significantly increased. 

__Apparent outcome__: our data is quite specific so it requires carefull processing and modeling.