In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

In [2]:
def aggregation(dataframe: pd.DataFrame, key: str = 'D') -> pd.DataFrame:
    agg_dict_ohlc = {
        'open_': 'first',
        'high_': 'max',
        'low_': 'min',
        'close_': 'last',
        'volume_': 'sum'
    }

    return dataframe.resample(key).agg(agg_dict_ohlc).dropna()

def resample_index(index: pd.DatetimeIndex, freq: str = 'D'):
    assert isinstance(index, pd.DatetimeIndex)
    start_date = index.min()
    end_date = index.max() + pd.DateOffset(days=1)
    resampled_index = pd.date_range(start_date, end_date, periods=freq)[:-1]
    series = pd.Series(resampled_index, resampled_index.floor('D'))
    
    
    
def _calc_wma(dataframe: pd.DataFrame, n: int = 14, _key: str = 'close_') -> pd.DataFrame:
    data = dataframe.copy()

    wma = np.zeros(data.shape[0]) + np.nan

    for i in range(n):
        wma[i] = data[_key].iloc[:i].mean()

    weights = np.arange(1, n + 1, 1)
    for i in range(n - 1, data.shape[0]):
        vector = data[_key].iloc[i - n + 1: i + 1]
        vector = weights * vector
        wma[i] = (vector.sum())/(weights.sum())

    return wma

def calc_wma(dataframe: pd.DataFrame, n: int = 14, _key: str = 'close_') -> pd.DataFrame:

    data = dataframe.copy()
    wma = np.zeros(data.shape[0]) + np.nan

    for i in range(n):
        wma[i] = data[_key].iloc[:i].mean()

    weights = np.arange(1, n + 1, 1)
    for i in range(n - 1, data.shape[0]):
        vector = data[_key].iloc[i - n + 1: i + 1]
        vector = weights * vector
        wma[i] = (vector.sum())/(weights.sum())

    data[f'WMA_{_key[0]}_{str(n)}'] = wma
    return data


def __calc_ema(x, last_ema, n):
    return (2 / (n + 1) * (x - last_ema)) + last_ema

def _calc_ema(vector: np.ndarray, n: int = 14) -> np.ndarray:
    ema = np.zeros(vector.shape[0]) + np.nan

    for i in range(1, n):
        ema[i] = vector[:i].mean()

    for i in range(n, vector.shape[0]):
        ema[i] = __calc_ema(vector[i], ema[i-1], n)

    return ema

def calc_ema(dataframe: pd.DataFrame, n: int = 14, _key: str = 'close_') -> pd.DataFrame:
    data = dataframe.copy()

    ema = np.zeros(data.shape[0]) + np.nan

    for i in range(n):
        ema[i] = data[_key].iloc[:i].mean()

    for i in range(n, data.shape[0]):
        ema[i] = __calc_ema(data[_key].iloc[i], ema[i-1], n)

    data[f'EMA_{_key[0]}_{str(n)}'] = ema

    return data

def calc_rsi(dataframe: pd.DataFrame, n: int = 14, _key: str = 'close_') -> pd.DataFrame:

    data = dataframe.copy()

    change = data[_key].diff(1)
    u = np.where(change > 0, change, 0)
    d = np.where(change < 0, -change, 0)

    ema_u = _calc_ema(u, n)
    ema_d = _calc_ema(d, n)
    ema_d = np.where((ema_d == 0), np.nan, ema_d)

    rs = ema_u/ema_d
    rsi = 100 - 100 / (1 + rs)
    data[f'RSI_{_key[0]}_{str(n)}'] = rsi

    return data

def calc_macd(dataframe: pd.DataFrame, n_fast: int = 12, n_slow: int = 26, _key: str = 'close_'):
    assert n_slow > n_fast

    data = dataframe.copy()

    ema_fast = _calc_ema(data[_key].to_numpy(), n_fast)
    ema_slow = _calc_ema(data[_key].to_numpy(), n_slow)

    data['MACD'] = ema_fast - ema_slow

    return data


def lag_features(dataframe: pd.DataFrame, columns: list[str] = None, depth: int = 1):
    data = dataframe.copy()
    
    if columns is None:
        columns = data.columns
    for col in columns:
        for i in np.arange(1, depth + 1):
            data[col+'_lag'+str(i)] = data[col].shift(i)

    return data

def calc_techical_metrics(dataframe: pd.DataFrame, *args, **kwargs) -> pd.DataFrame:
    data = dataframe.copy()
    data = calc_wma(data, **kwargs['wma'])
    data = calc_ema(data, **kwargs['ema'])
    data = calc_rsi(data, **kwargs['rsi'])
    data = calc_macd(data, **kwargs['macd'])
    return data.dropna()

In [3]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted
from sklego.preprocessing import RepeatingBasisFunction

class DateTimeTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, n_periods: int = 12, input_range: tuple[int, int] = (1, 365)):
        super().__init__()
        self.n_periods = n_periods
        self.column = 'day_of_year'
        self.remainder = 'passthrough'
        self.input_range = input_range
        self.rbf_estimator = RepeatingBasisFunction(n_periods=self.n_periods, column=self.column,
                                                    input_range=self.input_range, remainder=self.remainder)

    
    @staticmethod
    def _make_day_of_year(dataframe: pd.DataFrame) -> pd.DataFrame:
        data = dataframe.copy()
        data['day_of_year'] = data.index.day_of_year
        return data
    
    def fit(self, X, y=None):
        assert isinstance(X.index, pd.DatetimeIndex), 'Index must be datetime'        
        self.X_ = X
        data = self._make_day_of_year(X)
        self.rbf_estimator.fit(data)
        return self
    
    def transform(self, X, y=None):
        assert isinstance(X.index, pd.DatetimeIndex), 'Index must be datetime'
        check_is_fitted(self)
        data = self._make_day_of_year(X)
        transformed_month = self.rbf_estimator.transform(data)
        return transformed_month
    
    def get_feature_names_out(self) -> list[str]:
        return ['rbf_{}'.format(i) for i in range(self.rbf_estimator.n_periods)] + self.X_.columns.to_list()
    


In [4]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline, FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn import set_config

set_config(transform_output='pandas')


std_cols = ['volume_']
norm_cols = ['open_', 'high_', 'low_', 'close_', 'EMA_c_14', 'WMA_c_14', 'RSI_c_14', 'MACD']

techical_metrics_params = {
    'wma': {
        'n': 14,
        '_key': 'close_'
        },
    'ema': {
        'n': 14,
        '_key': 'close_'
        },
    'rsi': {
        'n': 14,
        '_key': 'close_'
        },
    'macd': {
        'n_fast': 12,
        'n_slow': 26,
        '_key': 'close_'
        }
}
agg_params = {
    'key': 'D'
}   

scaling_transformer = ColumnTransformer([
    ('std_scaler', StandardScaler(), std_cols),
    ('mm_scaler', MinMaxScaler(), norm_cols)
    ], remainder='passthrough'
                                        )

preprocessing_pipeline = Pipeline([
    # ('agregate_days', FunctionTransformer(aggregation, kw_args=agg_params)),
    ('metrics_append', FunctionTransformer(calc_techical_metrics, kw_args=techical_metrics_params)),
    ('date_transformer', DateTimeTransformer(n_periods=12, input_range=(1, 365))),
    ('scalling', scaling_transformer)
])

# preprocessing_pipeline.fit(train)

In [5]:
df = pd.read_pickle('data/df_TSLA.pkl')
df.sample(5)

Unnamed: 0_level_0,open_,high_,low_,close_,volume_
timestamp_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-09-07 04:00:00,245.12,246.3333,245.12,245.78,7113
2023-08-14 16:15:00,239.7,239.76,233.76,236.7,21302
2021-01-27 08:40:00,288.8833,289.84,288.8333,289.82,37023
2021-08-04 08:00:00,238.15,238.3333,236.8333,237.3333,61683
2021-09-10 15:20:00,248.4,248.4603,247.43,247.4617,561408


In [6]:
df = aggregation(df, 'D')
df

Unnamed: 0_level_0,open_,high_,low_,close_,volume_
timestamp_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-04,236.3333,248.1633,236.3333,244.5333,100740699
2021-01-05,243.3767,251.4667,239.7333,250.9667,64163466
2021-01-06,249.3333,258.0000,248.8867,254.5333,92799093
2021-01-07,256.3333,278.2400,255.7333,276.5000,103405713
2021-01-08,281.6667,294.9633,279.4633,289.1667,151570050
...,...,...,...,...,...
2023-12-22,253.7000,258.2200,249.0350,252.5700,94545651
2023-12-26,253.9800,273.4309,240.8791,256.9500,89369912
2023-12-27,257.4500,277.1995,249.5321,262.6150,108466950
2023-12-28,263.0200,265.1300,252.2900,254.8500,115267688


In [7]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.25, random_state=1)

In [8]:
print(train.shape, test.shape)

(564, 5) (189, 5)


In [9]:
preprocessing_pipeline.fit(train)
train_eda = preprocessing_pipeline.transform(train)

In [10]:
train_eda.sample(5)

Unnamed: 0_level_0,std_scaler__volume_,mm_scaler__open_,mm_scaler__high_,mm_scaler__low_,mm_scaler__close_,mm_scaler__EMA_c_14,mm_scaler__WMA_c_14,mm_scaler__RSI_c_14,mm_scaler__MACD,remainder__rbf_0,...,remainder__rbf_2,remainder__rbf_3,remainder__rbf_4,remainder__rbf_5,remainder__rbf_6,remainder__rbf_7,remainder__rbf_8,remainder__rbf_9,remainder__rbf_10,remainder__rbf_11
timestamp_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-09,1.084111,0.286369,0.38579,0.292301,0.409187,0.536939,0.463389,0.698066,0.245972,0.007606451,...,0.9573428,0.5347204,0.04042007,0.0004135027,5.724937e-07,1.07269e-10,2.720123e-15,1.647922e-12,2.027406e-08,3.375638e-05
2023-05-19,0.860018,0.23465,0.222269,0.239147,0.24379,0.546901,0.534135,0.585521,0.530641,1.0261e-09,...,0.001503893,0.09064568,0.739415,0.8162825,0.1219562,0.002465914,6.747817e-06,2.498962e-09,1.252469e-13,4.219809e-14
2022-09-23,-0.774712,0.600013,0.573764,0.559303,0.562203,0.568116,0.530351,0.773186,0.403795,2.365409e-05,...,9.269104e-13,5.124612e-15,1.810597e-10,8.657526e-07,0.0005602434,0.04906488,0.5815349,0.9328071,0.2024972,0.00594918
2023-03-15,1.042769,0.255624,0.234562,0.238218,0.245585,0.41694,0.40261,0.584521,0.243891,0.003052813,...,0.8476239,0.7031877,0.07894968,0.001199611,2.466844e-06,6.865211e-10,2.585694e-14,2.018499e-13,3.688428e-09,9.121479e-06
2021-01-15,0.006192,0.584841,0.569136,0.56001,0.558345,0.720053,0.662495,0.743014,0.567732,0.8081418,...,0.09377435,0.001590373,3.650268e-06,1.133866e-09,4.766609e-14,1.110937e-13,2.265828e-09,6.254255e-06,0.002336336,0.1181152


In [11]:
train_eda.shape

(562, 21)

In [12]:
test_eda = preprocessing_pipeline.transform(test)
test_eda.sample(5)

Unnamed: 0_level_0,std_scaler__volume_,mm_scaler__open_,mm_scaler__high_,mm_scaler__low_,mm_scaler__close_,mm_scaler__EMA_c_14,mm_scaler__WMA_c_14,mm_scaler__RSI_c_14,mm_scaler__MACD,remainder__rbf_0,...,remainder__rbf_2,remainder__rbf_3,remainder__rbf_4,remainder__rbf_5,remainder__rbf_6,remainder__rbf_7,remainder__rbf_8,remainder__rbf_9,remainder__rbf_10,remainder__rbf_11
timestamp_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-10-11,-1.166732,0.513783,0.504346,0.520092,0.523794,0.644716,0.640978,0.728967,0.589998,0.0008001906,...,3.366494e-10,1.087143e-14,4.609966e-13,7.222641e-09,1.531459e-05,0.004394666,0.17067,0.8970152,0.6380479,0.06142114
2021-06-14,-0.837922,0.320636,0.310231,0.327885,0.328035,0.71812,0.727647,0.603432,0.522451,2.018499e-13,...,9.121479e-06,0.003052813,0.1382758,0.8476239,0.7031877,0.07894968,0.001199611,2.466844e-06,6.865211e-10,2.585694e-14
2021-09-14,-0.98009,0.461809,0.452689,0.468364,0.471584,0.545118,0.483748,0.727321,0.408148,3.122958e-06,...,3.734833e-14,1.411691e-13,2.755417e-09,7.278571e-06,0.002602051,0.1258916,0.8243061,0.7304515,0.08760023,0.001421773
2021-03-03,-0.601349,0.417348,0.395288,0.37205,0.37008,0.459044,0.438602,0.671777,0.268039,0.01752588,...,0.9998792,0.3760088,0.01913637,0.000131805,1.228614e-07,1.549923e-11,2.646158e-16,1.244116e-11,1.030519e-07,0.0001155214
2021-05-28,-0.75625,0.340654,0.321386,0.342482,0.337457,0.492515,0.477402,0.667487,0.384261,6.316902e-11,...,0.0003033596,0.03309795,0.4887147,0.9766093,0.2641176,0.009666845,4.788321e-05,3.20991e-08,2.912148e-12,1.435139e-15


In [13]:
test_eda.shape

(187, 21)

После первичной обработки данных, рассчета технических метрик и выделения даты (индекса) в качетсве признака. Стоит разбить данные на тестовую и тренировочную выборки.

Как это сделать?

Пусть у нас есть датасет размером $m * n$.
Есть такое понятие как lookback window. Мы выбираем определенный период пусть это будет $n$ и далее учитывая этот период мы делим наш датасет на тренировочную и тестовую выборки. Т. е. датасет разбивается на $\frac{m}{n}$ или $m-n$ выборок, где от 0 до $n - 1$ - тренировочные значения. А n -  тестовое значение.

Если логически предположить, то существует два подхода:
1. Разбить датасет с неповторяющимися элементами. (Overlapping)
2. Разбить датасет с повторяющимися элементами (Whitout overlapping)

В первом случае мы разбиваем датасет на $n$ выборок и не используем соседние элементы.

Во втором случае мы разбиваем датасет на $m-n$ выборок и у нас пристуствуют повторяющиеся элементы

Стоит ли использовать второй случай с повторяющимися элементами? - ответ Да, это даст больше тренировочных значений

In [14]:
from torch.utils.data import Dataset
class TSDataset(Dataset):
    def __init__(self, X: pd.DataFrame, lookback: int = 14):
        # assert  X.shape[0] == y.shape[0], "X and y must have the same number of lines"
        self.X = X.iloc[lookback:, :].to_numpy('float32')
        self.y = X.shift(-lookback).iloc[:-lookback, :5].to_numpy('float32')
            
        self.X = torch.from_numpy(self.X)
        self.y = torch.from_numpy(self.y)
        self.lookback = lookback
        
    def __len__(self):
        return self.X.shape[0]
        
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    # @staticmethod
    # def create_dataset(dataset: pd.ndarray, loockback_val: int = 14) -> tuple[torch.Tensor, torch.Tensor]:
    #     X, y = [], []
    #     for i in range(loockback_val, dataset.shape[0]):
    #         X.append(dataset[i-loockback_val:i, :].to_numpy())
    #         y.append(dataset[i - loockback_val + 1:i+1, 0:5].to_numpy())
    #     return torch.Tensor(X), torch.Tensor(y)  

In [15]:
train_dataset = TSDataset(train_eda, lookback=1)
test_dataset = TSDataset(test_eda, lookback=1)

In [16]:
test_dataset.X.shape

torch.Size([186, 21])

In [17]:
test_dataset.y.shape

torch.Size([186, 5])

In [18]:
from torch.utils.data import DataLoader

batch_size = 32

train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True
)

for i, (X_batch, y_batch) in enumerate(train_dataloader):
    print(f'Batch {i}: X_batch: {X_batch.shape}, y_batch {y_batch.shape}')

Batch 0: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 1: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 2: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 3: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 4: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 5: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 6: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 7: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 8: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 9: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 10: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 11: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 12: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 13: X_batch: torch.Size([32, 21]), y_batch torch.Size([32, 5])
Batch 14: X_batch: torch.Size([32, 21]), y_b

In [19]:
class Kleopatra(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=21, hidden_size=100, num_layers=2, batch_first=True)
        self.linear = nn.Linear(100, 5)
    
    def forward(self, X):
        X, _ = self.lstm(X)
        X = self.linear(X)
        return X

Датасет - используем функцию lookback window с определенным шагом, далее батчим датасет

In [20]:
model = Kleopatra()
print(model)

Kleopatra(
  (lstm): LSTM(21, 100, num_layers=2, batch_first=True)
  (linear): Linear(in_features=100, out_features=5, bias=True)
)


In [21]:
optim = torch.optim.Adam(model.parameters())
loss_fn = torch.nn.MSELoss()

n_epochs = 200

for epoch in range(n_epochs+1):
    model.train()
    for X_batch, y_batch in train_dataloader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optim.zero_grad()
        loss.backward()
        optim.step()
    if epoch != 100:
        continue
    model.eval()
    with torch.no_grad():
        y_pred = model(train_dataset.X)
        train_mse = loss_fn(y_pred, train_dataset.y)
        y_pred = model(test_dataset.X)
        test_mse = loss_fn(y_pred, test_dataset.y)
    print("Epoch %d: train MSE %.4f, test MSE %.4f" % (epoch, train_mse, test_mse))

tensor(0.3204, grad_fn=<MseLossBackward0>)
tensor(0.3652, grad_fn=<MseLossBackward0>)
tensor(0.3996, grad_fn=<MseLossBackward0>)
tensor(0.2999, grad_fn=<MseLossBackward0>)
tensor(0.2845, grad_fn=<MseLossBackward0>)
tensor(0.2613, grad_fn=<MseLossBackward0>)
tensor(0.3539, grad_fn=<MseLossBackward0>)
tensor(0.1993, grad_fn=<MseLossBackward0>)
tensor(0.2940, grad_fn=<MseLossBackward0>)
tensor(0.2595, grad_fn=<MseLossBackward0>)
tensor(0.2304, grad_fn=<MseLossBackward0>)
tensor(0.2894, grad_fn=<MseLossBackward0>)
tensor(0.1724, grad_fn=<MseLossBackward0>)
tensor(0.2177, grad_fn=<MseLossBackward0>)
tensor(0.3065, grad_fn=<MseLossBackward0>)
tensor(0.2410, grad_fn=<MseLossBackward0>)
tensor(0.2392, grad_fn=<MseLossBackward0>)
tensor(0.2105, grad_fn=<MseLossBackward0>)
tensor(0.1601, grad_fn=<MseLossBackward0>)
tensor(0.1693, grad_fn=<MseLossBackward0>)
tensor(0.3075, grad_fn=<MseLossBackward0>)
tensor(0.2331, grad_fn=<MseLossBackward0>)
tensor(0.1712, grad_fn=<MseLossBackward0>)
tensor(0.19

In [22]:
with torch.no_grad():
    y_pred = model(train_dataset.X)
    train_mse = loss_fn(y_pred, train_dataset.y)
    y_pred = model(test_dataset.X)
    test_mse = loss_fn(y_pred, test_dataset.y)
print("train MSE %.4f, test MSE %.4f" % (train_mse, test_mse))

train MSE 0.0002, test MSE 0.0002


In [23]:
for y_pred_el, y_test_el in zip(y_pred, test_dataset.y):
    print(y_pred_el, y_test_el)

tensor([-1.1517,  0.4431,  0.4126,  0.4355,  0.4332]) tensor([-1.2372,  0.4300,  0.4093,  0.4312,  0.4281])
tensor([0.1813, 0.5320, 0.5369, 0.5025, 0.5238]) tensor([0.1918, 0.4803, 0.5248, 0.4814, 0.5423])
tensor([0.4802, 0.4604, 0.4449, 0.4609, 0.4517]) tensor([0.4659, 0.4407, 0.4234, 0.4297, 0.4412])
tensor([-0.7065,  0.5795,  0.5681,  0.5545,  0.5608]) tensor([-0.7079,  0.5624,  0.5656,  0.5401,  0.5664])
tensor([-0.2257,  0.4138,  0.3898,  0.4090,  0.4022]) tensor([-0.2244,  0.3985,  0.3857,  0.3930,  0.3868])
tensor([-0.2435,  0.2652,  0.2387,  0.2524,  0.2491]) tensor([-0.2422,  0.2577,  0.2386,  0.2472,  0.2451])
tensor([0.4894, 0.2706, 0.2445, 0.2674, 0.2537]) tensor([0.4925, 0.2557, 0.2463, 0.2652, 0.2657])
tensor([-0.5820,  0.7311,  0.7195,  0.6836,  0.7041]) tensor([-0.5717,  0.6886,  0.7412,  0.6897,  0.7570])
tensor([-0.8763,  0.4565,  0.4301,  0.4433,  0.4342]) tensor([-0.8671,  0.4318,  0.4266,  0.4387,  0.4422])
tensor([0.7085, 0.4495, 0.4339, 0.4277, 0.4303]) tensor([0

In [24]:
test_dataset.X[0]

tensor([-1.2372e+00,  4.3002e-01,  4.0934e-01,  4.3123e-01,  4.2813e-01,
         2.4885e-01,  2.3496e-01,  8.9506e-01,  3.7429e-01,  1.3977e-08,
         1.0405e-12,  4.5171e-15,  1.6314e-10,  7.9740e-07,  5.2748e-04,
         4.7222e-02,  5.7213e-01,  9.3812e-01,  2.0818e-01,  6.2519e-03,
         2.5410e-05])

Pipeline надо сделать для последовательной обработки ряда после предсказания - по сути это тот же preprocessing_pipeline только без scallers 

Для предсказания нам необходимо:
1. Дата для конвертации -> надо создать список будующих дат по которым мы будем предсказывать, с возможность последовательную дату в методе predict
2. Надо создать метод, который позволит рассчитать технические метрики на основании прошлых. Но в этом случае надо будет руководствоваться датой. Т. е. нам нужен будет временный массив, который "объединит" новые предсказанные значения с предыдущими для рассчета метрики.
3.  В методе predict необходимо будет создать временной массив, который будет содержать предсказанные даты

In [27]:
def prediction(days: int = 30):
    first_input = test_dataset.X[0].unsqueeze(-2)
    pred = []
    i = 0
    with torch.no_grad():
        pred.append(model(first_input))
        i += 1        
        while i < days:
            y_pred = model(pred[i-1].unsqueeze(-2))
            print(y_pred)
            i += 1
        # y_pred = model(test_dataset.X)
        # y_pred.

In [28]:
prediction()

RuntimeError: input.size(-1) must be equal to input_size. Expected 21, got 5