[Reference](https://towardsdatascience.com/6-methods-for-multi-step-forecasting-823cbde4127a)

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# dummy time series from 0 to 19
tseries = pd.Series(range(20))

# time series to matrix format
tseries_df = pd.concat([tseries.shift(i) for i in range(7, -1, -1)], axis=1)
# setting up column names and dropping invalid rows
tseries_df.columns = ['t-' + str(i) for i in list(reversed(range(4)))] + \
                     ['t+' + str(i) for i in list(range(1, 5))]
tseries_df = tseries_df.dropna().reset_index(drop=True)

# splitting explanatory variables from target variables
target_columns = tseries_df.columns.str.contains('\+')
X = tseries_df.iloc[:, ~target_columns]
Y = tseries_df.iloc[:, target_columns]

# train/test split
X_tr, X_ts, Y_tr, Y_ts = train_test_split(X, Y, test_size=0.2, shuffle=False)

# 1. Recursive (a.k.a. Iterated)


In [2]:
from sklearn.linear_model import LinearRegression
# using a linear regression for simplicity. any regression will do.
recursive = LinearRegression()
# training it to predict the next value of the series (t+1)
recursive.fit(X_tr, Y_tr['t+1'])
# setting up the prediction data structure
predictions = pd.DataFrame(np.zeros(Y_ts.shape), columns=Y_ts.columns)

# making predictions for t+1
yh = recursive.predict(X_ts)
predictions['t+1'] = yh

# iterating the model with its own predictions for multi-step forecasting
X_ts_aux = X_ts.copy()
for i in range(2, Y_tr.shape[1] + 1):
    X_ts_aux.iloc[:, :-1] = X_ts_aux.iloc[:, 1:].values
    X_ts_aux['t-0'] = yh

    yh = recursive.predict(X_ts_aux)

    predictions[f't+{i}'] = yh

# 2. Direct


In [3]:
from sklearn.multioutput import MultiOutputRegressor

direct = MultiOutputRegressor(LinearRegression())
direct.fit(X_tr, Y_tr)
direct.predict(X_ts)

array([[14., 15., 16., 17.],
       [15., 16., 17., 18.],
       [16., 17., 18., 19.]])

# 3. DirectRecursive


In [4]:
from sklearn.multioutput import RegressorChain

dirrec = RegressorChain(LinearRegression())
dirrec.fit(X_tr, Y_tr)
dirrec.predict(X_ts)

array([[14., 15., 16., 17.],
       [15., 16., 17., 18.],
       [16., 17., 18., 19.]])

# 4. Data as Demonstrator (DaD)
https://github.com/arunvenk/DaD

# 5. Dynamic Factor Machine Learning (DFML)


In [5]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# combining DFML with a direct approach
model = MultiOutputRegressor(LinearRegression())

scaler = StandardScaler()
# selecting 2 components for simplicity. This depends on the data.
pca = PCA(n_components=2)
# transforming the training target variables
Y_tr_t = scaler.fit_transform(Y_tr)
Y_tr_pca = pca.fit_transform(Y_tr_t)

# fitting a model with transformed targets
model.fit(X_tr, Y_tr_pca)

# getting transformed predictions and reverting to the original shape and scale
predictions_raw = model.predict(X_ts)
predictions = scaler.inverse_transform(pca.inverse_transform(predictions_raw))

# 6. Multi output


In [6]:
# knn is one of the algorithms which handles multiple targets directly
from sklearn.neighbors import KNeighborsRegressor

model = KNeighborsRegressor()
model.fit(X_tr, Y_tr)
model.predict(X_ts)

array([[11., 12., 13., 14.],
       [11., 12., 13., 14.],
       [11., 12., 13., 14.]])