# IMPORT

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from matplotlib import pyplot as plt
import seaborn as sns
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, random_split
from torch import nn
import torch
from torch.optim.lr_scheduler import StepLR
import torchmetrics
from transformers import AutoTokenizer
import natasha
import mlflow
import json
from IPython.display import clear_output
from sklearn.preprocessing import LabelEncoder
import re
import psycopg2
from torchinfo import summary
from datetime import datetime
from sklearn.calibration import calibration_curve
from sklearn.metrics import brier_score_loss, f1_score, confusion_matrix, classification_report

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
DB_HOST = os.environ.get("DB_HOST")
DB_PORT = os.environ.get("DB_PORT")
DB_USER = os.environ.get("DB_USER")
DB_PASS = os.environ.get("DB_PASS")
DB_NAME = os.environ.get("DB_NAME")

In [3]:
current_date = datetime.strptime("2024-01-09 00:00:00", "%Y-%d-%m %H:%M:%S")

# FUNC

In [4]:
def R2_torch(preds, target, eps=1e-8):
    ss_res = ((preds - target) ** 2).sum(dim=1)
    ss_tot = ((target - target.mean(dim=1, keepdim=True)) ** 2).sum(dim=1)
    
    # Если ss_tot == 0 и ss_res == 0 (предсказания точные) — вернуть 1.0
    # Если ss_tot == 0 и ss_res > 0 (ошибка есть) — вернуть 0.0 или NaN по желанию
    r2 = torch.where(ss_tot < eps,
                     torch.where(ss_res < eps, torch.ones_like(ss_res), torch.zeros_like(ss_res)),
                     1 - ss_res / ss_tot)
    return r2.mean().item()

In [5]:
def train_model(model_config, device, train_loader, test_loader,
                model = None, optimizer = None, scheduler = None,
                weight_decay = 1e-3,
                lr = 1e-3,
                start_epoch = 0,
                epochs = 10):
    try:
        if model == None:
            model = Forecaster(model_config).to(device)
        if optimizer == None:
            optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
        if scheduler is not None:
            scheduler = StepLR(optimizer, step_size=scheduler["step_size"], gamma=scheduler["gamma"])
        with open("artifacts/model_summary.txt", "w", encoding="utf-8") as f:
            f.write(str(summary(model)))
        mlflow.log_artifacts("artifacts")
        l1_loss = nn.L1Loss(); l2_loss = nn.MSELoss()
        for ep in tqdm(range(start_epoch, start_epoch + epochs)):
            epoch_loss_train = 0; epoch_loss_test = 0
            # TRAIN
            for sample in tqdm(train_loader, desc = f"Epoch: {ep}"):
                X_time_ = sample[0].to(device); Add_feature_ts_ = sample[1].to(device); X_news_embedd_ = sample[2].to(device)
                X_news_mask_ = sample[3].to(device); TICK_ = sample[4].to(device); Y_ = sample[5].to(device)
                pred = model(
                    time = X_time_,
                    add_time = Add_feature_ts_,
                    ticker = TICK_,
                    news = X_news_embedd_,
                    news_mask = X_news_mask_
                )
                loss = l1_loss(pred, Y_) + l2_loss(pred, Y_)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                epoch_loss_train += loss.item() * len(pred)
            # TEST
            for sample in tqdm(test_loader, desc = f"Epoch: {ep}"):
                X_time_ = sample[0].to(device); Add_feature_ts_ = sample[1].to(device); X_news_embedd_ = sample[2].to(device)
                X_news_mask_ = sample[3].to(device); TICK_ = sample[4].to(device); Y_ = sample[5].to(device)
                with torch.no_grad():
                    pred = model(
                        time = X_time_,
                        add_time = Add_feature_ts_,
                        ticker = TICK_,
                        news = X_news_embedd_,
                        news_mask = X_news_mask_
                    )
                loss = l1_loss(pred, Y_) + l2_loss(pred, Y_)
                epoch_loss_test += loss.item() * len(pred)   
            mlflow.log_metrics({"train loss":epoch_loss_train, "test loss":epoch_loss_test}, step=ep)
            if scheduler is not None:
                scheduler.step()
            if ep % 10 == 0:
                clear_output()
        print(start_epoch, epochs, epoch_loss_test, epoch_loss_train)
    except KeyboardInterrupt:
        torch.save(model, "model.pt")
        torch.save(optimizer.state_dict(), "optimizer_state_dict.pt")
        print("save with error")
        return model, optimizer
    except Exception as e:
        print(e)
    finally:
        torch.save(model, "model.pt")
        torch.save(optimizer.state_dict(), "optimizer_state_dict.pt")
        print("done")
        return model, optimizer

In [6]:
def news_clean(text: str, segmenter:natasha.Segmenter, word: str = None):
    sentenses = []
    doc = natasha.Doc(text)
    doc.segment(segmenter)
    for sent in doc.sents:
        if word.lower() in sent.text.lower():
           sentenses.append(sent.text) 
    sentenses = ' '.join(sentenses)
    return sentenses

In [7]:
def pd_to_np_time_seties(df_pd:pd.DataFrame, index_column_time=None, x_step=50, y_step=10, columns_x=[1, 2, 3, 4, 5], columns_y=[2, 3]) -> np.array:   
    df_pd = df_pd.reset_index(drop=True)
    if index_column_time != None:
       time = df_pd.loc[:, index_column_time].copy()
    
    num_samples = df_pd.shape[0]
    num_samples = np.floor(num_samples / (x_step+y_step)).astype(int)
    y_features = len(columns_y)
    x_features = len(columns_x)

    

    X, Y = np.empty((num_samples, x_step, x_features)), np.empty((num_samples, y_step, y_features))
    if index_column_time != None:
        T = np.empty((num_samples, 2), dtype=pd.Timestamp)

    start_x = 0
    start_y = x_step
    for index in tqdm(range(0, num_samples)):
        if df_pd.loc[start_x:start_x + x_step - 1, columns_x].shape[0] == 0:
            print(df_pd.loc[start_x-60:start_x + x_step - 1, columns_x])
            print('start, end', start_x, start_y)
        X[index] = df_pd.loc[start_x:start_x + x_step - 1, columns_x].to_numpy().reshape((x_step, x_features))
        
        Y[index] = df_pd.loc[start_y:start_y + y_step - 1, columns_y].to_numpy().reshape((y_step, y_features))
        
        if index_column_time != None:
            T[index] = [time.loc[start_x:start_x + x_step].min(), time.loc[start_x:start_x + x_step].max()]

        start_x = start_x + x_step + y_step
        start_y = start_y + x_step + y_step
        
    if index_column_time != None:
        return X, Y, T
    else:
        return X, Y

In [8]:
def clean_text(text: str) -> str:
    # Заменяем управляющие символы (\n, \t и прочие) на пробел
    text = re.sub(r'\s+', ' ', text)

    # Удаляем ссылки (http, https, www)
    text = re.sub(r'https?:\/\/\S+|www\.\S+|https?:\S+', '', text)

    # Удаляем смайлики (оставляем только буквы, цифры и знаки препинания)
    text = re.sub(r'[^\w\s.,!?\"\'():;\-]', '', text)

    return text.strip()

In [9]:
def compute_rsi(prices: torch.Tensor, period: int = 14):
    """
    Вычисляет RSI для каждого временного ряда из батча.
    prices: тензор размера [batch, seq_len]
    Возвращает тензор RSI для последнего временного шага: [batch]
    """
    # Вычисляем изменения цены
    delta = prices[:, 1:] - prices[:, :-1]  # [batch, seq_len-1]
    # Вычисляем прирост и потери
    gain = torch.clamp(delta, min=0)
    loss = torch.clamp(-delta, min=0)
    
    # Если шагов меньше, чем период, возвращаем NaN
    if delta.shape[1] < period:
        raise ValueError("Длина временного ряда меньше, чем период RSI")
    
    # Усредняем приросты и потери по последнему периоду
    avg_gain = gain[:, -period:].mean(dim=1)
    avg_loss = loss[:, -period:].mean(dim=1)
    
    rs = avg_gain / (avg_loss + 1e-8)  # добавляем маленькое число, чтобы не делить на 0
    rsi = 100 - (100 / (1 + rs))
    return rsi  # [batch]

def compute_ema(prices: torch.Tensor, period: int):
    """
    Вычисляет EMA по заданному периоду для каждого временного ряда.
    prices: тензор размера [batch, seq_len]
    Возвращает EMA с той же размерностью [batch, seq_len].
    """
    alpha = 2 / (period + 1)
    ema = torch.zeros_like(prices)
    ema[:, 0] = prices[:, 0]  # инициализация первым значением
    # Итеративное вычисление EMA
    for t in range(1, prices.shape[1]):
        ema[:, t] = alpha * prices[:, t] + (1 - alpha) * ema[:, t-1]
    return ema

def compute_macd(prices: torch.Tensor, short_period: int = 5, long_period: int = 12, signal_period: int = 5):
    """
    Вычисляет MACD, сигнальную линию и гистограмму для каждого временного ряда.
    Из-за малого числа шагов в ряду (24 шага) стандартные периоды (12, 26, 9) могут быть не применимы.
    Поэтому можно использовать меньшие периоды.
    """
    ema_short = compute_ema(prices, short_period)
    ema_long  = compute_ema(prices, long_period)
    
    # MACD как разница EMA короткого и длинного периодов
    macd = ema_short - ema_long
    
    # Сигнальная линия – EMA от MACD
    signal_line = compute_ema(macd, signal_period)
    # Гистограмма – разница между MACD и сигнальной линией
    histogram = macd - signal_line
    return macd, signal_line, histogram

In [10]:
def text_to_list_float(row):
    embedding = [float(value) for value in row["embedding"][1:-1].split(",")]
    return embedding

# NEWS

In [11]:
model_name = "matvej-melikhov/ruBERT-finetuned-lenta"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [12]:
query = f"""SELECT news.text, news.ticker, dsc.name, dsc.sector, news.date, emb.embedding FROM
            news_with_ticker news LEFT JOIN ticker_describe dsc ON news.ticker = dsc.ticker
            LEFT JOIN news_with_ticker_embedd emb on news.id = emb.id
            """

with psycopg2.connect(dbname = DB_NAME, user = DB_USER, password = DB_PASS, host = DB_HOST, port = DB_PORT) as conn:
    with conn.cursor() as cur:
        cur.execute(query)
        news = pd.DataFrame(cur.fetchall(), columns=["text", "ticker", "name", "sector", "date", "embedding"])

# query = f"""SELECT news.date, news.text, news.channel, emb.embedding FROM
#             news LEFT JOIN news_embedd emb ON news.id = emb.id
#             """

# with psycopg2.connect(dbname = DB_NAME, user = DB_USER, password = DB_PASS, host = DB_HOST, port = DB_PORT) as conn:
#     with conn.cursor() as cur:
#         cur.execute(query)
#         news = pd.DataFrame(cur.fetchall(), columns=["date", "text", "channel", "embedding"])

In [13]:
news["embedding"] = news.apply(text_to_list_float, axis=1)

# TS

In [14]:
query = f"""SELECT * FROM price_hour"""
with psycopg2.connect(dbname = DB_NAME, user = DB_USER, password = DB_PASS, host = DB_HOST, port = DB_PORT) as conn:
    with conn.cursor() as cur:
        cur.execute(query)
        time_series = pd.DataFrame(cur.fetchall(), columns=["open", "low", "high", "close", "volume", "date", "ticker"])

In [15]:
# # optional
time_series = time_series[time_series["date"] <= current_date]
news = news[news["date"] <= current_date]
tickers_ = np.array(['MGNT', 'PHOR', 'MAGN', 'VTBR', 'RUAL', 'ALRS', 'FLOT', 'TATN',
       'RTKM', 'MTSS', 'NVTK', 'SNGS', 'ROSN', 'CHMF', 'KMAZ', 'YDEX',
       'AFLT', 'SBER', 'MVID', 'MTLR', 'GAZP', 'LENT', 'BSPB', 'SIBN',
       'HYDR', 'NLMK', 'RASP', 'GMKN', 'AFKS', 'PLZL', 'LKOH'],
      dtype=object)


time_series = time_series[time_series["ticker"].isin(tickers_)]
if news.get("ticker") is not None:
    news = news[news["ticker"].isin(tickers_)]
time_series.reset_index(inplace=True, drop=True)
news.reset_index(inplace=True, drop=True)

In [16]:
time_series['weekday'] = time_series['date'].apply(lambda row: row.weekday()) + 1
time_series['hour'] = time_series['date'].apply(lambda row: row.hour)
time_series['hour_sin'] = np.sin(np.pi * 2 * time_series['hour'] / 24)
time_series['hour_cos'] = np.cos(np.pi * 2 * time_series['hour'] / 24)
time_series['weekday_sin'] = np.sin(np.pi * 2 * time_series['weekday'] / 7)
time_series['weekday_cos'] = np.cos(np.pi * 2 * time_series['weekday'] / 7)
time_series

Unnamed: 0,open,low,high,close,volume,date,ticker,weekday,hour,hour_sin,hour_cos,weekday_sin,weekday_cos
0,5601.0,5601.0,5601.0,5601.0,13,2021-08-30 09:00:00,MGNT,1,9,7.071068e-01,-0.707107,0.781831,0.62349
1,5601.0,5578.5,5623.5,5590.0,38026,2021-08-30 10:00:00,MGNT,1,10,5.000000e-01,-0.866025,0.781831,0.62349
2,5592.0,5558.0,5593.5,5577.5,26215,2021-08-30 11:00:00,MGNT,1,11,2.588190e-01,-0.965926,0.781831,0.62349
3,5577.5,5557.5,5577.5,5561.5,14164,2021-08-30 12:00:00,MGNT,1,12,1.224647e-16,-1.000000,0.781831,0.62349
4,5561.5,5544.0,5579.0,5560.0,18672,2021-08-30 13:00:00,MGNT,1,13,-2.588190e-01,-0.965926,0.781831,0.62349
...,...,...,...,...,...,...,...,...,...,...,...,...,...
380727,6131.0,6116.0,6132.0,6132.0,400,2024-08-31 19:00:00,LKOH,6,19,-9.659258e-01,0.258819,-0.781831,0.62349
380728,6132.0,6124.0,6139.5,6133.0,281,2024-08-31 20:00:00,LKOH,6,20,-8.660254e-01,0.500000,-0.781831,0.62349
380729,6133.0,6112.5,6144.0,6120.0,719,2024-08-31 21:00:00,LKOH,6,21,-7.071068e-01,0.707107,-0.781831,0.62349
380730,6120.0,6119.0,6133.5,6122.5,245,2024-08-31 22:00:00,LKOH,6,22,-5.000000e-01,0.866025,-0.781831,0.62349


In [17]:
x_step = 24
y_step = 12
x_column = ['open', 'low', 'high', 'close', 'volume', 'weekday_sin', 'weekday_cos', 'hour_sin', 'hour_cos']
y_column = ['close']
x_feature = len(x_column)
y_feature = len(y_column)

In [18]:
def return_batch(time_series):
    X_time = np.zeros((0, x_step, x_feature))
    Y = np.zeros((0, y_step, y_feature))
    T = np.zeros((0, 2))
    TICK = np.empty((0))

    for ticker in time_series['ticker'].unique():
        X_time_, Y_, T_ = pd_to_np_time_seties(time_series[time_series['ticker'] == ticker], 
                                            index_column_time='date', 
                                            x_step=x_step, 
                                            y_step=y_step, 
                                            columns_x=x_column, 
                                            columns_y=y_column)
        
        X_time = np.concatenate((X_time, X_time_), axis=0)
        Y = np.concatenate((Y, Y_), axis=0)
        T = np.concatenate((T, T_), axis=0)
        TICK = np.concatenate((TICK, np.array([ticker] * X_time_.shape[0])), axis=0)

    X_time = torch.from_numpy(X_time).float()
    Y = torch.from_numpy(Y).float()
    if Y.shape[-1] == 1:
        Y = Y.reshape([Y.shape[0], -1])

    X_time[:, :, :5] = torch.log10(X_time[:, :, :5])
    X_time[X_time == -torch.inf] = 0
    Y = torch.log10(Y)

    Add_feature_ts = torch.empty([X_time.shape[0], 5])
    # rsi
    Add_feature_ts[:, 0] = compute_rsi(X_time[:, :, 3])
    # mean
    Add_feature_ts[:, 1] = X_time[:, :, 3].mean(dim=1)
    Add_feature_ts[:, 2] = X_time[:, :, 4].mean(dim=1)
    # std
    Add_feature_ts[:, 3] = X_time[:, :, 3].std(dim=1)
    Add_feature_ts[:, 4] = X_time[:, :, 4].std(dim=1)

    # macd, signal_line, histogram
    macd, signal_line, histogram = compute_macd(X_time[:, :, 3], 3, 5, 4)
    X_time = torch.cat([X_time, macd.unsqueeze(-1), signal_line.unsqueeze(-1), histogram.unsqueeze(-1)], dim=2)
    
    return X_time, Add_feature_ts, T, TICK, Y

In [19]:
unique_date = np.sort(time_series["date"].unique())
sep_index = int(len(unique_date) * 0.85)

In [20]:
X_time_train, Add_feature_ts_train, T_train, TICK_train, Y_train = return_batch(time_series[time_series["date"].isin(unique_date[:sep_index])])
X_time_test, Add_feature_ts_test, T_test, TICK_test, Y_test = return_batch(time_series[time_series["date"].isin(unique_date[sep_index:])])

100%|██████████| 309/309 [00:00<00:00, 767.54it/s]
100%|██████████| 308/308 [00:00<00:00, 827.59it/s]
100%|██████████| 309/309 [00:00<00:00, 781.13it/s]
100%|██████████| 298/298 [00:00<00:00, 806.73it/s]
100%|██████████| 306/306 [00:00<00:00, 772.66it/s]
100%|██████████| 309/309 [00:00<00:00, 834.93it/s]
100%|██████████| 235/235 [00:00<00:00, 779.88it/s]
100%|██████████| 304/304 [00:00<00:00, 858.03it/s]
100%|██████████| 303/303 [00:00<00:00, 828.15it/s]
100%|██████████| 306/306 [00:00<00:00, 811.57it/s]
100%|██████████| 308/308 [00:00<00:00, 823.68it/s]
100%|██████████| 303/303 [00:00<00:00, 756.10it/s]
100%|██████████| 309/309 [00:00<00:00, 848.32it/s]
100%|██████████| 310/310 [00:00<00:00, 765.16it/s]
100%|██████████| 221/221 [00:00<00:00, 883.94it/s]
100%|██████████| 310/310 [00:00<00:00, 792.80it/s]
100%|██████████| 307/307 [00:00<00:00, 822.90it/s]
100%|██████████| 310/310 [00:00<00:00, 785.58it/s]
100%|██████████| 264/264 [00:00<00:00, 843.45it/s]
100%|██████████| 270/270 [00:00

In [21]:
torch.manual_seed(12312)
train_rand_index = torch.randperm(len(Y_train))
torch.manual_seed(12312)    
test_rand_index = torch.randperm(len(Y_test))

In [22]:
X_time_train, Add_feature_ts_train, T_train, TICK_train, Y_train = X_time_train[train_rand_index], Add_feature_ts_train[train_rand_index], T_train[train_rand_index], TICK_train[train_rand_index], Y_train[train_rand_index]
X_time_test, Add_feature_ts_test, T_test, TICK_test, Y_test = X_time_test[test_rand_index], Add_feature_ts_test[test_rand_index], T_test[test_rand_index], TICK_test[test_rand_index], Y_test[test_rand_index]

# FULL DATASET

In [23]:
# max_news = np.zeros(T.shape[0])
# for index in tqdm(range(T.shape[0])):
#     max_news[index] = news[(news.date >= T[index][0]) & (news.date <= T[index][1]) & (news.ticker == TICK[index])].shape[0]

# stat_table = pd.DataFrame({'stat':max_news, 'ticker':TICK})
# stat_table = stat_table.groupby(by='ticker').agg(['min', 'max', 'mean', 'std']).stat.sort_values(by='mean', ascending=False)

# stat_table.sort_values(by="mean", ascending=False)

In [24]:
max_n_news = 100
hidden_size = len(news['embedding'][0])

In [25]:
def get_news(n_sample, T, TICK):
    X_news_embedd = torch.full((n_sample, max_n_news, hidden_size), tokenizer.pad_token_id, dtype=torch.float32)
    X_news_mask = torch.zeros((n_sample, max_n_news))
    for index in tqdm(range(T.shape[0])):
        if news.get("ticker") is not None:
            ids_mask = news[(news.date >= T[index][0]) & (news.date <= T[index][1]) & (news.ticker == TICK[index])]
        else:
            ids_mask = news[(news.date >= T[index][0]) & (news.date <= T[index][1])]
        if ids_mask.shape[0] > max_n_news:
            np.random.seed(12312)
            ids_mask = ids_mask.sample(max_n_news)

        embedding = ids_mask.embedding.tolist()

        if embedding != []:
            embedding = torch.stack([torch.tensor(e) for e in embedding])
            X_news_embedd[index, :embedding.shape[0], :] = embedding
            X_news_mask[index, :embedding.shape[0]] = torch.ones((embedding.shape[0]))

    return X_news_embedd, X_news_mask

In [26]:
X_news_embedd_train, X_news_mask_train = get_news(X_time_train.shape[0], T_train, TICK_train)
X_news_embedd_test, X_news_mask_test = get_news(X_time_test.shape[0], T_test, TICK_test)

  0%|          | 0/8916 [00:00<?, ?it/s]

100%|██████████| 8916/8916 [00:30<00:00, 290.58it/s]
100%|██████████| 1625/1625 [00:05<00:00, 310.24it/s]


In [27]:
label_encoder = LabelEncoder().fit(tickers_)
TICK_train = torch.from_numpy(label_encoder.fit_transform(TICK_train))
TICK_test = torch.from_numpy(label_encoder.fit_transform(TICK_test))

In [28]:
X_news_mask_test = X_news_mask_test.int()
X_news_mask_train = X_news_mask_train.int()

In [29]:
TRAIN_LOADER = TensorDataset(X_time_train, Add_feature_ts_train, X_news_embedd_train, X_news_mask_train, TICK_train, Y_train)
TEST_LOADER = TensorDataset(X_time_test, Add_feature_ts_test, X_news_embedd_test, X_news_mask_test, TICK_test, Y_test)

batch_size = 50
TRAIN_LOADER = DataLoader(TRAIN_LOADER, batch_size=50)
TEST_LOADER = DataLoader(TEST_LOADER, batch_size=50)

# MODEL

In [30]:
class BLOCK_TS(nn.Module):
    def __init__(self, config_:dict = None):
        super().__init__()
        self.config = {
            "x_feature" : 4,
            "hidden_size" : 36,
            "num_layers" : 4,
            "output_size": 70
        }

        if config_ != None:
            for key in config_.keys():
                self.config[key] = config_[key]
        self.lstm = nn.LSTM(self.config["x_feature"], self.config["hidden_size"], self.config["num_layers"], batch_first=True)
        # self.fc = nn.Sequential(
        #     nn.LayerNorm(self.config["hidden_size"]),
        #     nn.Linear(self.config["hidden_size"], self.config["hidden_size"] * 4),
        #     nn.SiLU(),
        #     nn.Linear(self.config["hidden_size"] * 4, self.config["output_size"])
        # )
        self.fc = nn.Sequential(
            nn.Linear(self.config["hidden_size"], self.config["output_size"])
        )
    
    def forward(self, time:torch.Tensor):
        h0 = torch.zeros(self.config["num_layers"], time.size(0), self.config["hidden_size"]).to(time.device)
        c0 = torch.zeros(self.config["num_layers"], time.size(0), self.config["hidden_size"]).to(time.device)
        out, _ = self.lstm(time, (h0, c0))
        out = out[:, -1, :]
        out = self.fc(out)
        return out

In [31]:
class CrossAttention(torch.nn.Module):
    def __init__(self, hidden_size:int, num_heads:int):
        super().__init__()
        self.cross_attn = torch.nn.MultiheadAttention(embed_dim=hidden_size, num_heads=num_heads, batch_first=True)
        self.activate = nn.SiLU()
    def forward(self, time_series, news):
        attn_output, _ = self.cross_attn(time_series, news, news)
        return self.activate(attn_output)

In [38]:
class Forecaster(nn.Module):
    def __init__(self, config_, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.config = {
            "attention_layer" : 4,
            "hidden_size" : 64,
            "num_heads" : 4,
            "news_embedd" : 768,
            "n_add_feature_ts" : 5,
            "tick_count" : 0,
            "ticker_embedd_dim":0,
            "y_feature":12,
            "block_ts_config" : {
                "x_feature" : 4,
                "hidden_size" : 36,
                "num_layers" : 4,
                "output_size":70
            }
        }
        if config_ != None:
            for key in config_.keys():
                self.config[key] = config_[key]
        self.config["block_ts_config"]["output_size"] = self.config["hidden_size"]
        self.config["hidden_size"] = self.config["hidden_size"] + self.config["ticker_embedd_dim"]
        if self.config["ticker_embedd_dim"] != 0:
            self.ticker_embedd = nn.Embedding(self.config["tick_count"], self.config["ticker_embedd_dim"])
        self.block_ts = BLOCK_TS(self.config["block_ts_config"])
        self.news_proj = nn.Sequential(
            nn.Linear(self.config["news_embedd"], self.config["hidden_size"]),
            nn.LayerNorm(self.config["hidden_size"])
        )
        self.cross_attention = nn.ModuleList(CrossAttention(self.config["hidden_size"], self.config["num_heads"]) for _ in range(self.config["attention_layer"]))
        n_feature = self.config["hidden_size"] + self.config["n_add_feature_ts"]
        self.predict_fc = nn.Sequential(
                nn.LayerNorm(n_feature),
                nn.SiLU(),
                nn.Linear(n_feature, n_feature * 4),
                nn.LayerNorm(n_feature * 4),
                nn.Linear(n_feature * 4, self.config["y_feature"]),

        ) 
    def forward(self, time:torch.Tensor, add_time:torch.Tensor, news:torch.Tensor, news_mask:torch.Tensor=None, ticker:torch.Tensor=None):
        # FEATURE
        time = self.block_ts(time)
        if self.config["ticker_embedd_dim"] != 0:
            ticker_embedd = self.ticker_embedd(ticker)
            time = torch.cat([time, ticker_embedd], dim=1)
        news = self.news_proj(news)
        if news_mask is not None:
            news = news * news_mask.unsqueeze(-1)
        # attention
        time = time.unsqueeze(1)
        for layer in self.cross_attention:
            time = time + layer(time, news)
        time = time.squeeze(1)
        time = torch.cat([time, add_time], dim=1)
        # FC
        predict_price = self.predict_fc(time)
        return predict_price

# TRAIN

In [39]:
# name_experiment = "TRANSFORMER_classs"
# mlflow.set_experiment(name_experiment)

In [40]:
model_config = {
            "attention_layer" : 4,
            "hidden_size" : 64,
            "num_heads" : 4,
            "news_embedd" : 768,
            "n_add_feature_ts" : 5,
            "tick_count" : len(tickers_),
            "ticker_embedd_dim":4,
            "y_feature":y_step,
            "block_ts_config" : {
                "x_feature" : X_time_train.shape[-1],
                "hidden_size" : 36,
                "num_layers" : 4,
            },
            "tokenizer":tokenizer.name_or_path
        }

scheduler = {
    "step_size":400,
    "gamma":0.5
}

In [41]:
import shutil

In [42]:
Add_feature_ts_train

tensor([[6.6927e+01, 2.2423e+00, 4.7399e+00, 2.0693e-03, 7.8522e-01],
        [2.1473e+01, 1.8226e+00, 4.3458e+00, 2.0234e-03, 4.8217e-01],
        [5.2691e+01, 3.0205e+00, 3.7416e+00, 2.3734e-03, 1.0736e+00],
        ...,
        [3.7996e+01, 1.6177e+00, 5.0863e+00, 2.9990e-03, 6.8899e-01],
        [2.5276e+01, 2.1198e+00, 5.9749e+00, 4.9383e-03, 6.4870e-01],
        [6.6457e+01, 3.7763e+00, 4.5156e+00, 5.0502e-03, 6.4723e-01]])

In [43]:
# Первый запуск
try:
    run = mlflow.start_run()
    shutil.rmtree("artifacts/", ignore_errors=True)
    os.makedirs("artifacts/", exist_ok=True)
    with open("artifacts/model_config.json", "w") as f:
        json.dump(model_config, f)
    model, optimizer = train_model(model_config, device, TRAIN_LOADER, TEST_LOADER,
                                   epochs=1500, lr=3e-4, weight_decay=0.1, scheduler=scheduler)
finally:
    mlflow.end_run()

Epoch: 1491: 100%|██████████| 179/179 [00:02<00:00, 66.82it/s]
Epoch: 1491: 100%|██████████| 33/33 [00:00<00:00, 109.16it/s]
Epoch: 1492: 100%|██████████| 179/179 [00:02<00:00, 69.59it/s]
Epoch: 1492: 100%|██████████| 33/33 [00:00<00:00, 136.64it/s]
Epoch: 1493: 100%|██████████| 179/179 [00:02<00:00, 69.59it/s]
Epoch: 1493: 100%|██████████| 33/33 [00:00<00:00, 113.40it/s]
Epoch: 1494: 100%|██████████| 179/179 [00:02<00:00, 69.41it/s]
Epoch: 1494: 100%|██████████| 33/33 [00:00<00:00, 101.02it/s]
Epoch: 1495: 100%|██████████| 179/179 [00:02<00:00, 67.27it/s]
Epoch: 1495: 100%|██████████| 33/33 [00:00<00:00, 142.24it/s]
Epoch: 1496: 100%|██████████| 179/179 [00:02<00:00, 66.41it/s]
Epoch: 1496: 100%|██████████| 33/33 [00:00<00:00, 107.49it/s]
Epoch: 1497: 100%|██████████| 179/179 [00:02<00:00, 68.14it/s]
Epoch: 1497: 100%|██████████| 33/33 [00:00<00:00, 146.67it/s]
Epoch: 1498: 100%|██████████| 179/179 [00:02<00:00, 68.36it/s]
Epoch: 1498: 100%|██████████| 33/33 [00:00<00:00, 105.92it/s]


0 1500 32.253454299643636 67.40190029656515
done



