In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'c:\\Users\\jaesc2\\GitHub\\skforecast'

In [2]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

from skforecast.ForecasterAutoregMultiVariate import ForecasterAutoregMultiVariate
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries
from skforecast.model_selection_multiseries import random_search_forecaster_multiseries

In [3]:
# Data download
# ==============================================================================
url = (
       'https://raw.githubusercontent.com/JoaquinAmatRodrigo/skforecast/master/'
       'data/guangyuan_air_pollution.csv'
)
data = pd.read_csv(url, sep=',')

# Data preparation
# ==============================================================================
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data = data.set_index('date')
data = data.asfreq('D')
data = data.sort_index()
data = data[['CO', 'SO2', 'PM2.5']]
data.head()

Unnamed: 0_level_0,CO,SO2,PM2.5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-03-01,9600.0,204.0,181.0
2013-03-02,20198.0,674.0,633.0
2013-03-03,47195.0,1661.0,1956.0
2013-03-04,15000.0,485.0,438.0
2013-03-05,59594.0,2001.0,3388.0


In [4]:
# Create and fit forecaster MultiVariate
# ==============================================================================
forecaster = ForecasterAutoregMultiVariate(
                 regressor          = Ridge(random_state=123),
                 level              = 'CO',
                 lags               = {'CO': [1], 'SO2': [2], 'PM2.5': []},
                 steps              = 7,
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 n_jobs             = 'auto'
             )

forecaster.fit(series=data)
forecaster

ValueError: min() arg is an empty sequence

In [16]:
forecaster = ForecasterAutoregMultiVariate(Ridge(), level='l1', 
                                           lags={'l1': None, 'l2': 5}, steps=2)

assert forecaster.max_lag == 5
assert forecaster.window_size == 5

In [17]:
forecaster.lags_

{'l1': None, 'l2': array([1, 2, 3, 4, 5])}

In [18]:
forecaster.lags

{'l1': None, 'l2': array([1, 2, 3, 4, 5])}

In [32]:
np.hstack([np.ones(5), np.zeros(5)])

array([1., 1., 1., 1., 1., 0., 0., 0., 0., 0.])

In [72]:
series_col_names = ['l1', 'l2']
level = 'l2'
other_dict = {'l1': 3, 'l2': None}

In [73]:
cols_to_matrix = {
    col: ('both' if col == level else 'X')
    for col in series_col_names
    if col == level or other_dict.get(col) is not None
}

# Ajustamos el valor de 'level' en caso de que su valor en other_dict sea None
if other_dict.get(level) is None:
    cols_to_matrix[level] = 'y'

cols_to_matrix

{'l1': 'X', 'l2': 'y'}

In [42]:
result_dict = {
    col: 'X' if col != level else ('both' if other_dict.get(level) is not None else 'y')
    for col in series_col_names
}
result_dict

{'l1': 'y', 'l2': 'X'}

In [37]:
other_dict.get(level)

In [44]:
X_train_col_names_list = []

In [45]:
X_train_col_names_list.extend([1, 2, 3])
X_train_col_names_list

[1, 2, 3]

In [46]:
X_train_col_names_list.extend([4, 5, 6])
X_train_col_names_list

[1, 2, 3, 4, 5, 6]

In [74]:
series = pd.DataFrame({'l1': pd.Series(np.arange(10)), 
                           'l2': pd.Series(np.arange(100, 110))})
exog = None

forecaster = ForecasterAutoregMultiVariate(Ridge(), level='l2',
                                            lags={'l1': 3, 'l2': None}, 
                                            steps=2, transformer_series=None)
results = forecaster.create_train_X_y(series=series, exog=exog)

In [75]:
results[0]

Unnamed: 0,l1_lag_1,l1_lag_2,l1_lag_3
4,2.0,1.0,0.0
5,3.0,2.0,1.0
6,4.0,3.0,2.0
7,5.0,4.0,3.0
8,6.0,5.0,4.0
9,7.0,6.0,5.0


In [76]:
results[1]

{1: 3    103.0
 4    104.0
 5    105.0
 6    106.0
 7    107.0
 8    108.0
 Name: l2_step_1, dtype: float64,
 2: 4    104.0
 5    105.0
 6    106.0
 7    107.0
 8    108.0
 9    109.0
 Name: l2_step_2, dtype: float64}

In [57]:
forecaster.transformer_series

In [58]:
forecaster.transformer_series_

{'l1': StandardScaler(), 'l2': StandardScaler()}

In [59]:
forecaster.regressor

In [60]:
forecaster.lags

{'l1': None, 'l2': array([1, 2, 3, 4])}

In [61]:
forecaster.series_col_names