# Домашняя работа № 5

1. *Создать модель (торговую стратегию) на основе нейронных сетей для прогнозирования оптимального торгового действия. Можно использовать, как самостоятельно обученные архитектуры, так и использовать предобученные сети или фреймворки.*
2. *Провести тестирование разработанной стратегии на валидационном датасете.*
3. *Зафиксировать метрики модели для дальнейшего сравнения экспериментов.*
4. *Сформировать дашборд, показывающий эффективность различных торговых стратегий.*

In [1]:
#Подключаем библиотеки

import pandas as pd
import numpy as np
import yfinance as yf
import plotly.graph_objects as go
from scipy import stats
import plotly.express as px
import talib
import matplotlib.pyplot as plt
import pywt
import torch.nn as nn
import optuna

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score, ConfusionMatrixDisplay,confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from datetime import date, datetime, timedelta
from plotly.subplots import make_subplots
from backtesting import Backtest, Strategy



In [2]:
# Загружаем данные
data = pd.read_csv('Investing_data.csv')
data['Date'] = pd.to_datetime(data['Date'])

In [3]:
data

Unnamed: 0,Date,tic,Open,High,Low,Close,Volume
0,2018-01-01,GAZP,130.56,130.93,128.86,130.50,1629536
1,2018-01-02,GAZP,130.56,130.93,128.86,130.50,1629536
2,2018-01-03,GAZP,131.03,132.20,130.63,132.20,1053392
3,2018-01-04,GAZP,132.50,136.20,132.30,135.89,3209651
4,2018-01-05,GAZP,135.60,137.12,135.08,137.12,1998116
...,...,...,...,...,...,...,...
57753,2025-05-13,SELG,79.20,79.30,78.00,79.00,2171
57754,2025-05-14,SELG,77.70,79.20,77.70,79.00,1123
57755,2025-05-15,SELG,78.80,79.10,77.70,78.50,1906
57756,2025-05-16,SELG,78.80,79.00,77.50,77.60,1614


#### Добавляем признаки

In [6]:
tickers = data['tic'].unique()
df = pd.DataFrame()
for i in tickers:
    data_temp = data.loc[data['tic'] == i]
    data_temp.loc[:,['sma_5']] = talib.SMA(data_temp["Close"], timeperiod=5)
    data_temp.loc[:,['sma_10']] = talib.SMA(data_temp["Close"], timeperiod=10)
    data_temp.loc[:,['sma_15']] = talib.SMA(data_temp["Close"], timeperiod=15)
    data_temp.loc[:,['sma_20']] = talib.SMA(data_temp["Close"], timeperiod=20)
    data_temp.loc[:,['sma_50']] = talib.SMA(data_temp["Close"], timeperiod=50)
    data_temp.loc[:,['sma_200']] = talib.SMA(data_temp["Close"], timeperiod=200)
    data_temp.loc[:,['sma_ratio_5_15']] = data_temp['sma_15'] / data_temp['sma_5']
    data_temp.loc[:,['sma_ratio_10_50']] = data_temp['sma_50'] / data_temp['sma_15']
    data_temp.loc[:,['sma_5_Volume']] = talib.SMA(data_temp["Volume"], timeperiod=5)
    data_temp.loc[:,['sma_10_Volume']] = talib.SMA(data_temp["Volume"], timeperiod=10)
    data_temp.loc[:,['sma_15_Volume']] = talib.SMA(data_temp["Volume"], timeperiod=15)
    data_temp.loc[:,['sma_50_Volume']] = talib.SMA(data_temp["Volume"], timeperiod=50)
    data_temp.loc[:,["sma_Volume_ratio_5_15"]] = data_temp['sma_5_Volume'] / data_temp['sma_15_Volume']
    data_temp.loc[:,["sma_Volume_ratio_10_50"]] = data_temp['sma_10_Volume'] / data_temp['sma_50_Volume']
    data_temp.loc[:,["ema_5"]] = talib.EMA(data_temp["Close"], timeperiod=5)
    data_temp.loc[:,["ema_10"]] = talib.EMA(data_temp["Close"], timeperiod=10)
    data_temp.loc[:,["ema_20"]] = talib.EMA(data_temp["Close"], timeperiod=20)
    data_temp.loc[:,["ema_50"]] = talib.EMA(data_temp["Close"], timeperiod=50)
    data_temp.loc[:,["ema_200"]] = talib.EMA(data_temp["Close"], timeperiod=200)
    
    # Calculate the Bollinger Bands
    data_temp.loc[:,["upper_band"]], data_temp.loc[:,["middle_band"]], data_temp.loc[:,["lower_band"]] = talib.BBANDS(data_temp["Close"], timeperiod=20)
    
    # Calculate the relative strength index (RSI)
    data_temp.loc[:,["RSI"]] = talib.RSI(data_temp["Close"], timeperiod=14)
    
    # Calculate the MACD
    data_temp.loc[:,["macd"]], data_temp.loc[:,["macd_signal"]], data_temp.loc[:,["macd_hist"]] = talib.MACD(data_temp["Close"], fastperiod=12, slowperiod=26, signalperiod=9)
    
    data_temp.loc[:,['ADX_5']] = talib.ADX(data_temp['High'], data_temp['Low'], data_temp['Close'], timeperiod=5)
    data_temp.loc[:,['ADX_15']] = talib.ADX(data_temp['High'], data_temp['Low'], data_temp['Close'], timeperiod=15)
    
    data_temp.loc[:,['RSI_5']] = talib.RSI(data_temp['Close'], timeperiod=5)
    data_temp.loc[:,['RSI_15']] = talib.RSI(data_temp['Close'], timeperiod=15)
    data_temp.loc[:,['RSI_ratio']] = data_temp['RSI_5']/data_temp['RSI_15']
    
    data_temp.loc[:,['ROC']] = talib.ROC(data_temp['Close'], timeperiod=15)

    data_temp.loc[:,['HAMMER']] = talib.CDLHAMMER(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['SHOOTING_STAR']] = talib.CDLSHOOTINGSTAR(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['ENGULFING']] = talib.CDLENGULFING(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['MORNING_STAR']] = talib.CDLMORNINGSTAR(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['EVENING_STAR']] = talib.CDLEVENINGSTAR(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['MARUBOZU']] = talib.CDLMARUBOZU(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['DOJI']] = talib.CDLDOJI(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['lag_1day']] = data_temp['Close'].shift(1)
    data_temp.loc[:,['day_of_week']] = data_temp['Date'].dt.dayofweek   
    data_temp.dropna(inplace=True)
    df = pd.concat([df, data_temp], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_temp.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_temp.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_temp.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_temp.dropna(inplace=True)
A value is trying to be set on a copy of a s

#### Добавляем компоненты вейвлет разложения как признаки

In [7]:
# Функция по разложению на компоненты, добавим их дальше как отдельные признаки
def get_wavelet_features(history, wavelet='db4', level=5):
    coeffs = pywt.wavedec(history, wavelet, level=level)
    features = {}
    for i, name in enumerate(['cA5', 'cD5', 'cD4', 'cD3', 'cD2', 'cD1']):
        if i < len(coeffs):
            # Берём среднее последних 5 коэффициентов для стабильности
            features[name] = np.mean(coeffs[i][-5:])
    return features

# Функция вейвлет-прогнозирования на 1 день вперёд, также добавим предсказание как признак
def wavelet_forecast(history, wavelet='db4', level=5):
    coeffs = pywt.wavedec(history, wavelet, level=level)
    threshold = 0.2
    coeffs = [pywt.threshold(c, threshold, mode='soft') for c in coeffs]
    restored = pywt.waverec(coeffs, wavelet)
    
    # Возвращаем последнее значение как прогноз
    return restored[-1] if len(restored) > 0 else np.nan

# Используем окно в 128 значения
window_size = 128
wavelet_df = pd.DataFrame()
for tic in tickers:
    wavelet_features_list = []
    
    for i in range(window_size, len(data[data['tic']== tic])):
        window = data[data['tic']== tic]['Close'].values[i - window_size:i]
        features = get_wavelet_features(window)
        features['wavelet_forecast'] = wavelet_forecast(window)
        features['Date'] = data[data['tic']== tic]['Date'].iloc[i]
        features['tic'] = tic
        wavelet_features_list.append(features)
    
    wavelet_df = pd.concat([wavelet_df, pd.DataFrame(wavelet_features_list).fillna(0)], ignore_index=True)
# Объединяем основной DataFrame с вейвлетами
df = pd.merge(df, wavelet_df, on=['Date', 'tic'], how='inner')




In [8]:
#Определим признаки, с которыми будет работать модель
features = ['sma_5', 'sma_10', 'sma_15', 'sma_50', 'sma_200', 'sma_ratio_5_15', 'sma_ratio_10_50', 
            'sma_5_Volume', 'sma_10_Volume', 'sma_15_Volume', 'sma_50_Volume', 'sma_Volume_ratio_5_15', 'sma_Volume_ratio_10_50', 'upper_band',
            'middle_band', 'lower_band', 'RSI', 'macd', 'macd_signal', 'macd_hist','day_of_week','lag_1day','cA5', 'cD5', 'cD4', 'cD3', 'cD2', 'cD1','wavelet_forecast',
            'ADX_5', 'ADX_15', 'RSI_5', 'RSI_15', 'RSI_ratio', 'ROC','HAMMER','SHOOTING_STAR','ENGULFING','MORNING_STAR','EVENING_STAR','MARUBOZU','DOJI']
not_features = [col for col in df.columns if col not in features]

In [9]:
df.dropna(inplace=True)
df = df.sort_values(by = ['Date','tic'],ascending = True, ignore_index = True)

In [10]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, Sampler

data_dict = {}

for ticker in tickers:

    data_dict[ticker] = df[df['tic']==ticker]

#### Разбиваем на окна

In [11]:
def create_windows(data_norm, data_raw, window_size=30):
    X, y, original_X = [], [], []
    for i in range(len(data_norm) - window_size - 1):
        # Для обучения модели: нормализованные данные
        window_norm = data_norm.iloc[i:i + window_size].copy()
        X.append(window_norm.values)

        # Для сохранения результатов
        label_close_next = data_raw['Close'].iloc[i + window_size]
        label_close_current = data_raw['Close'].iloc[i + window_size - 1]

        if label_close_next > label_close_current * 1.02:
            y.append(2)  # Покупка. Учитываем комиссию 2%
        elif label_close_next < label_close_current * 0.98:
            y.append(0)  # Продажа. Учитываем комиссию 2%
        else:
            y.append(1)  # Держим

        window_raw = data_raw.iloc[i:i + window_size].copy()
        original_X.append(window_raw[['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'tic']])

    return X, y, original_X

#### Разбиваем на трейновую, тестовую и валидационную выборки. Нормализуем данные

In [12]:
from copy import deepcopy

def split_and_normalize(df, window_size=30, train_ratio=0.7, val_ratio=0.15):
    # Разделение на train/val/test
    num_rows = len(df)
    
    # Индексы разбиения
    train_end = int(num_rows * train_ratio)
    val_end = train_end + int(num_rows * val_ratio)

    # Разделение на части
    train_df = df.iloc[:train_end]
    val_df = df.iloc[train_end:val_end]
    test_df = df.iloc[val_end:]

    # Сохраняем датафреймы для дальнейшего формирования окон
    train_df_raw = train_df.copy()
    val_df_raw = val_df.copy()
    test_df_raw = test_df.copy()

    # Нормализация
    scaler = StandardScaler()
    train_df = pd.DataFrame(scaler.fit_transform(train_df[features]),
                            columns=features,
                            index=train_df.index)

    val_df = pd.DataFrame(scaler.transform(val_df[features]),
                          columns=features,
                          index=val_df.index)

    test_df = pd.DataFrame(scaler.transform(test_df[features]),
                           columns=features,
                           index=test_df.index)

    return train_df, val_df, test_df, train_df_raw, val_df_raw, test_df_raw

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

window_size = 30
ticker_to_idx = {ticker: idx for idx, ticker in enumerate(data_dict.keys())}

X_train_all, y_train_all, ticker_train_all, original_train_all = [], [], [], []
X_val_all, y_val_all, ticker_val_all, original_val_all = [], [], [], []
X_test_all, y_test_all, ticker_test_all, original_test_all = [], [], [], []

for ticker, df_tic in data_dict.items():
    train_df, val_df, test_df, train_df_raw, val_df_raw, test_df_raw = split_and_normalize(df_tic, window_size=window_size)

    def process_and_append(data_norm, data_raw, X_list, y_list, ticker_list, original_list):
        X, y, original = create_windows(data_norm, data_raw, window_size=window_size)
        X_list.extend(X)
        y_list.extend(y)
        ticker_list.extend([ticker_to_idx[ticker]] * len(X))
        original_list.extend(original)

    process_and_append(train_df, train_df_raw, X_train_all, y_train_all, ticker_train_all, original_train_all)
    process_and_append(val_df, val_df_raw, X_val_all, y_val_all, ticker_val_all, original_val_all)
    process_and_append(test_df, test_df_raw, X_test_all, y_test_all, ticker_test_all, original_test_all)


# Конвертируем в тензоры
X_train = torch.tensor(X_train_all, dtype=torch.float32)
y_train = torch.tensor(y_train_all, dtype=torch.long)
ticker_train = torch.tensor(ticker_train_all, dtype=torch.long)

X_val = torch.tensor(X_val_all, dtype=torch.float32)
y_val = torch.tensor(y_val_all, dtype=torch.long)
ticker_val = torch.tensor(ticker_val_all, dtype=torch.long)

X_test = torch.tensor(X_test_all, dtype=torch.float32)
y_test = torch.tensor(y_test_all, dtype=torch.long)
ticker_test = torch.tensor(ticker_test_all, dtype=torch.long)

  X_train = torch.tensor(X_train_all, dtype=torch.float32)


In [14]:
from collections import defaultdict

window_to_index = []  # список индексов конца окна в исходном датафрейме
test_ticker_labels = []  # тикеры для каждого окна
window_start_end_indices = []  # начало и конец окна для каждого примера

for ticker, df in data_dict.items():
    train_df, val_df, test_df, train_df_raw, val_df_raw, test_df_raw = split_and_normalize(df, window_size=30)

    # Формируем окна и запоминаем индексы
    for i in range(len(test_df) - window_size - 1):
        window_start = test_df.index[i]
        window_end = test_df.index[i + window_size - 1]
        target_index = test_df.index[i + window_size]

        window_start_end_indices.append((window_start, window_end, target_index))
        test_ticker_labels.append(ticker)
        window_to_index.append(target_index)

In [15]:
class GroupedBatchSampler(Sampler):
    def __init__(self, group_ids, batch_size):
        self.group_ids = group_ids
        self.batch_size = batch_size
        self.groups = {}
        for idx, g in enumerate(group_ids):
            if g not in self.groups:
                self.groups[g] = []
            self.groups[g].append(idx)
        self.group_indices = list(self.groups.values())

    def __iter__(self):
        for group in self.group_indices:
            for i in range(0, len(group), self.batch_size):
                yield group[i:i + self.batch_size]

    def __len__(self):
        count = 0
        for group in self.groups.values():
            count += (len(group) + self.batch_size - 1) // self.batch_size
        return count

In [16]:
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(X_train, y_train, ticker_train)
val_dataset = TensorDataset(X_val, y_val, ticker_val)
test_dataset = TensorDataset(X_test, y_test, ticker_test)

train_sampler = GroupedBatchSampler(ticker_train.tolist(), batch_size=32)

train_loader = DataLoader(train_dataset, batch_sampler=train_sampler)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

#### Модель LSTM

In [17]:
class StockCNN_LSTM(nn.Module):
    def __init__(self, num_tickers, ticker_embedding_dim=8,
                 hidden_size=64, num_layers=1):
        super(StockCNN_LSTM, self).__init__()

        self.ticker_embedding = nn.Embedding(num_tickers, ticker_embedding_dim)

        # CNN
        self.conv1 = nn.Conv1d(len(features), 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, padding=1)

        # LSTM
        self.lstm = nn.LSTM(input_size=32 + ticker_embedding_dim,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True)

        # FC
        self.fc1 = nn.Linear(hidden_size, 3)

    def forward(self, x, ticker_indices):
        batch_size, seq_len, _ = x.size()
        ticker_emb = self.ticker_embedding(ticker_indices).unsqueeze(1).expand(-1, seq_len, -1)

        x = x.permute(0, 2, 1)  # [B, features, seq_len]
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.permute(0, 2, 1)  # [B, seq_len, features]

        x = torch.cat([x, ticker_emb], dim=2)  # Добавляем тикер
        x, _ = self.lstm(x)
        x = x[:, -1, :]  # Берём последний шаг
        return self.fc1(x)

#### Модель Transformer

In [18]:
class StockTransformer(nn.Module):
    def __init__(self, num_tickers, ticker_embedding_dim=8, d_model=64, nhead=4, num_layers=2, dim_feedforward=128):
        super(StockTransformer, self).__init__()
        
        # Проверка совместимости
        assert (d_model + ticker_embedding_dim) % nhead == 0, \
            "d_model + ticker_emb_dim must be divisible by nhead"

        self.ticker_embedding = nn.Embedding(num_tickers, ticker_embedding_dim)

        input_dim = len(features)
        self.feature_proj = nn.Linear(input_dim, d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model + ticker_embedding_dim,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.classifier = nn.Linear(d_model + ticker_embedding_dim, 3)

    def forward(self, x, ticker_indices):
        batch_size, seq_len, _ = x.size()
        ticker_emb = self.ticker_embedding(ticker_indices).unsqueeze(1).expand(-1, seq_len, -1)
        features = self.feature_proj(x)
        x = torch.cat([features, ticker_emb], dim=2)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        return self.classifier(x)

#### Добавляем раннюю остановку, чтобы модель не переучивалась

In [19]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.counter = 0
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model, path='best_model.pth'):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        torch.save(model.state_dict(), path)
        self.val_loss_min = val_loss

In [20]:
def evaluate_metrics(loader, model, device='cpu'):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for X_batch, y_batch, ticker_batch in loader:
            outputs = model(X_batch.to(device), ticker_batch.to(device))
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y_batch.numpy())
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    return acc, f1

#### Подбираем гиперпараметры для LSTM

In [21]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

def objective_lstm(trial):
    # Гиперпараметры LSTM
    lr = trial.suggest_float('lr', 1e-5, 1e-3)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    ticker_emb_dim = trial.suggest_int('ticker_emb_dim', 4, 16)
    hidden_size = trial.suggest_int('hidden_size', 32, 128)
    num_layers = trial.suggest_int('num_layers', 1, 3)

    # Модель
    model = StockCNN_LSTM(
        num_tickers=len(data_dict),
        ticker_embedding_dim=ticker_emb_dim,
        hidden_size=hidden_size,
        num_layers=num_layers
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    early_stopping = EarlyStopping(patience=3)

    # DataLoader с новым батч-сайзом (опционально пересоздаём)
    train_sampler = GroupedBatchSampler(ticker_train.tolist(), batch_size=batch_size)
    train_loader = DataLoader(train_dataset, batch_sampler=train_sampler)

    for epoch in range(20):  # короткая тренировка для Optuna
        model.train()
        total_loss = 0
        for X_batch, y_batch, ticker_batch in train_loader:
            X_batch, y_batch, ticker_batch = X_batch.to(device), y_batch.to(device), ticker_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch, ticker_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        val_acc, val_f1 = evaluate_metrics(val_loader, model, device)
        early_stopping(val_loss=1 - val_f1, model=model)
        if early_stopping.early_stop:
            break

    return val_f1 

study_lstm = optuna.create_study(direction='maximize')
study_lstm.optimize(objective_lstm, n_trials=30)


[I 2025-05-26 12:35:08,136] A new study created in memory with name: no-name-dd41b7f3-d736-47dc-9b48-15cdd20c065e
[I 2025-05-26 12:37:27,833] Trial 0 finished with value: 0.7137994188689607 and parameters: {'lr': 0.00010047471056067334, 'batch_size': 16, 'ticker_emb_dim': 7, 'hidden_size': 51, 'num_layers': 1}. Best is trial 0 with value: 0.7137994188689607.
[I 2025-05-26 12:41:09,557] Trial 1 finished with value: 0.7144283833041922 and parameters: {'lr': 0.0009217684154208711, 'batch_size': 64, 'ticker_emb_dim': 11, 'hidden_size': 85, 'num_layers': 3}. Best is trial 1 with value: 0.7144283833041922.
[I 2025-05-26 12:46:29,789] Trial 2 finished with value: 0.7099725841859491 and parameters: {'lr': 1.5359136866040867e-05, 'batch_size': 16, 'ticker_emb_dim': 5, 'hidden_size': 103, 'num_layers': 1}. Best is trial 1 with value: 0.7144283833041922.
[I 2025-05-26 12:51:31,422] Trial 3 finished with value: 0.7197338330299992 and parameters: {'lr': 0.0008393225688262598, 'batch_size': 64, 'tic

#### Подбираем гиперпараметры для Transformer

In [22]:
def objective_transformer(trial):
    lr = trial.suggest_float('lr', 1e-5, 1e-3)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    d_model = trial.suggest_int('d_model', 32, 128)
    nhead = trial.suggest_categorical('nhead', [2, 4, 8])
    num_layers = trial.suggest_int('num_layers', 1, 4)
    dim_feedforward = trial.suggest_int('dim_feedforward', 64, 256)
    ticker_emb_dim = trial.suggest_int('ticker_emb_dim', 4, 16)

    if (d_model + ticker_emb_dim) % nhead != 0:
        raise optuna.TrialPruned()  # Отсеиваем плохие комбинации
        
    model = StockTransformer(
        num_tickers=len(data_dict),
        d_model=d_model,
        nhead=nhead,
        num_layers=num_layers,
        dim_feedforward=dim_feedforward,
        ticker_embedding_dim=ticker_emb_dim
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    early_stopping = EarlyStopping(patience=3)

    for epoch in range(20):
        model.train()
        for X_batch, y_batch, ticker_batch in train_loader:
            X_batch, y_batch, ticker_batch = X_batch.to(device), y_batch.to(device), ticker_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch, ticker_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

        val_acc, val_f1 = evaluate_metrics(val_loader, model, device)
        early_stopping(val_loss=1 - val_f1, model=model)
        if early_stopping.early_stop:
            break

    return val_f1


study_transformer = optuna.create_study(direction='maximize')
study_transformer.optimize(objective_transformer, n_trials=30)

[I 2025-05-26 14:38:01,205] A new study created in memory with name: no-name-f884f3af-f229-436f-a18e-2c8556847a0e
[I 2025-05-26 14:38:01,235] Trial 0 pruned. 
[I 2025-05-26 14:38:01,240] Trial 1 pruned. 
[I 2025-05-26 14:58:45,860] Trial 2 finished with value: 0.7101200931042854 and parameters: {'lr': 0.0008611635679132808, 'batch_size': 32, 'd_model': 126, 'nhead': 4, 'num_layers': 2, 'dim_feedforward': 246, 'ticker_emb_dim': 10}. Best is trial 2 with value: 0.7101200931042854.
[I 2025-05-26 14:58:45,865] Trial 3 pruned. 
[I 2025-05-26 14:58:45,871] Trial 4 pruned. 
[I 2025-05-26 15:16:16,641] Trial 5 finished with value: 0.7148920436549073 and parameters: {'lr': 0.0001664243762189509, 'batch_size': 32, 'd_model': 124, 'nhead': 4, 'num_layers': 4, 'dim_feedforward': 206, 'ticker_emb_dim': 8}. Best is trial 5 with value: 0.7148920436549073.
[I 2025-05-26 15:16:16,644] Trial 6 pruned. 
[I 2025-05-26 15:16:16,647] Trial 7 pruned. 
[I 2025-05-26 16:39:37,379] Trial 8 finished with value: 

#### Финально обучаем модели на полученных гиперпараметрах

In [23]:
def train_and_evaluate(model_type, best_params, X_train, y_train, ticker_train,
                        X_val, y_val, ticker_val, X_test, y_test, ticker_test,
                        num_tickers, original_test_all, ticker_list, device='cpu'):
    batch_size = best_params['batch_size']
    train_sampler = GroupedBatchSampler(ticker_train.tolist(), batch_size=batch_size)
    train_loader = DataLoader(TensorDataset(X_train, y_train, ticker_train), batch_sampler=train_sampler)
    val_loader = DataLoader(TensorDataset(X_val, y_val, ticker_val), batch_size=32)
    test_loader = DataLoader(TensorDataset(X_test, y_test, ticker_test), batch_size=32)

    if model_type == 'lstm':
        model = StockCNN_LSTM(
            num_tickers=num_tickers,
            ticker_embedding_dim=best_params['ticker_emb_dim'],
            hidden_size=best_params['hidden_size'],
            num_layers=best_params['num_layers']
        )
    elif model_type == 'transformer':
        model = StockTransformer(
            num_tickers=num_tickers,
            ticker_embedding_dim=best_params['ticker_emb_dim'],
            d_model=best_params['d_model'],
            nhead=best_params['nhead'],
            num_layers=best_params['num_layers'],
            dim_feedforward=best_params.get('dim_feedforward', 128)
        )

    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=best_params['lr'])
    criterion = nn.CrossEntropyLoss()
    early_stopping = EarlyStopping(patience=5)

    print(f"\nОбучение модели: {model_type.upper()}")
    for epoch in range(100):  
        model.train()
        total_loss = 0
        for X_batch, y_batch, ticker_batch in train_loader:
            X_batch, y_batch, ticker_batch = X_batch.to(device), y_batch.to(device), ticker_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch, ticker_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()


        val_acc, val_f1 = evaluate_metrics(val_loader, model, device)
        print(f"Epoch {epoch+1} | Loss: {total_loss:.4f} | Val Acc: {val_acc:.4f} | Val F1: {val_f1:.4f}")

        early_stopping(val_loss=1 - val_f1, model=model)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    model.load_state_dict(torch.load('best_model.pth'))

    test_acc, test_f1 = evaluate_metrics(test_loader, model, device)
    print("\n--- Тестовая выборка ---")
    print(f"Accuracy: {test_acc:.4f}, F1 Score: {test_f1:.4f}")

    y_true, y_pred = [], []
    model.eval()
    all_preds = []
    with torch.no_grad():
        for X_batch, y_batch, ticker_batch in test_loader:
            X_batch, ticker_batch = X_batch.to(device), ticker_batch.to(device)
            outputs = model(X_batch, ticker_batch)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            all_preds.extend(preds)
            y_true.extend(y_batch.numpy())

    # Переводим в сигналы
    predicted_labels = [1 if p == 2 else -1 if p == 0 else 0 for p in all_preds]

    # Добавляем предсказания к оригинальным данным
    final_rows = []
    for idx, window_df in enumerate(original_test_all):
        last_row = window_df.iloc[-1].to_dict()
        last_row['signal'] = predicted_labels[idx]
        final_rows.append(last_row)

    # Создаем датафрейм
    result_df = pd.DataFrame(final_rows)
    result_df['tic'] = [ticker_list[t] for t in ticker_test.numpy()]  # добавляем тикеры

    # Оставляем нужные колонки
    cols_to_keep = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'tic', 'signal']
    result_df = result_df[[c for c in cols_to_keep if c in result_df.columns]]

    return {
        'model_type': model_type,
        'test_acc': test_acc,
        'test_f1': test_f1,
        'classification_report': classification_report(y_true, all_preds, output_dict=True),
        'result_df': result_df
    }

In [25]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

# LSTM
results_lstm = train_and_evaluate(
    model_type='lstm',
    best_params=study_lstm.best_params,
    X_train=X_train, y_train=y_train, ticker_train=ticker_train,
    X_val=X_val, y_val=y_val, ticker_val=ticker_val,
    X_test=X_test, y_test=y_test, ticker_test=ticker_test,
    num_tickers=len(data_dict),
    original_test_all=original_test_all,
    ticker_list=tickers,
    device=device
)

# Transformer
results_transformer = train_and_evaluate(
    model_type='transformer',
    best_params=study_transformer.best_params,
    X_train=X_train, y_train=y_train, ticker_train=ticker_train,
    X_val=X_val, y_val=y_val, ticker_val=ticker_val,
    X_test=X_test, y_test=y_test, ticker_test=ticker_test,
    num_tickers=len(data_dict),
    original_test_all=original_test_all,
    ticker_list=tickers,
    device=device
)

# Получаем датафреймы с сигналами
result_lstm = results_lstm['result_df']
result_transformer = results_transformer['result_df']


Обучение модели: LSTM
Epoch 1 | Loss: 762.1755 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 2 | Loss: 736.5210 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 3 | Loss: 727.4778 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 4 | Loss: 719.3467 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 5 | Loss: 710.9347 | Val Acc: 0.7993 | Val F1: 0.7102
Epoch 6 | Loss: 703.8025 | Val Acc: 0.7990 | Val F1: 0.7104
Epoch 7 | Loss: 697.7245 | Val Acc: 0.7993 | Val F1: 0.7123
Epoch 8 | Loss: 692.2373 | Val Acc: 0.7979 | Val F1: 0.7132
Epoch 9 | Loss: 687.6127 | Val Acc: 0.7964 | Val F1: 0.7127
Epoch 10 | Loss: 682.0930 | Val Acc: 0.7945 | Val F1: 0.7132
Epoch 11 | Loss: 676.9574 | Val Acc: 0.7933 | Val F1: 0.7132
Epoch 12 | Loss: 671.9436 | Val Acc: 0.7941 | Val F1: 0.7141
Epoch 13 | Loss: 667.1936 | Val Acc: 0.7918 | Val F1: 0.7144
Epoch 14 | Loss: 661.8094 | Val Acc: 0.7900 | Val F1: 0.7157
Epoch 15 | Loss: 658.1493 | Val Acc: 0.7897 | Val F1: 0.7144
Epoch 16 | Loss: 652.3347 | Val Acc: 0.7900 | Val F1: 0.7139
Epoch 17 |

  model.load_state_dict(torch.load('best_model.pth'))



--- Тестовая выборка ---
Accuracy: 0.6636, F1 Score: 0.5685

Обучение модели: TRANSFORMER
Epoch 1 | Loss: 759.6181 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 2 | Loss: 731.9070 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 3 | Loss: 722.1289 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 4 | Loss: 714.6501 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 5 | Loss: 707.4433 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 6 | Loss: 700.2052 | Val Acc: 0.7991 | Val F1: 0.7107
Epoch 7 | Loss: 692.6900 | Val Acc: 0.7993 | Val F1: 0.7101
Epoch 8 | Loss: 686.0385 | Val Acc: 0.7990 | Val F1: 0.7111
Epoch 9 | Loss: 679.3336 | Val Acc: 0.7979 | Val F1: 0.7116
Epoch 10 | Loss: 672.6658 | Val Acc: 0.7984 | Val F1: 0.7105
Epoch 11 | Loss: 667.5138 | Val Acc: 0.7954 | Val F1: 0.7112
Epoch 12 | Loss: 660.5701 | Val Acc: 0.7985 | Val F1: 0.7117
Epoch 13 | Loss: 652.2519 | Val Acc: 0.7921 | Val F1: 0.7134
Epoch 14 | Loss: 646.8496 | Val Acc: 0.7909 | Val F1: 0.7140
Epoch 15 | Loss: 640.0774 | Val Acc: 0.7939 | Val F1: 0.7159
Epo

  model.load_state_dict(torch.load('best_model.pth'))



--- Тестовая выборка ---
Accuracy: 0.6453, F1 Score: 0.5632


In [None]:
print("Лучшие параметры для LSTM:")
print(study_lstm.best_params)
print("Лучший F1 для LSTM:", study_lstm.best_value)

print("\nЛучшие параметры для Transformer:")
print(study_transformer.best_params)
print("Лучший F1 для Transformer:", study_transformer.best_value)

## Бэктест

Посмотрим на тестовой выборке, как модели поведут себя и будут ли они прибыльны. Используем библиотеку backtesting

In [26]:
# Добавим столбец где будет указан id трейда
def prepare_data(test_df):
    for ticker in tickers:
        test_df.loc[test_df['tic'] == ticker,['shift']] = test_df.loc[test_df['tic'] == ticker,'signal'].shift()
        test_df.loc[test_df['tic'] == ticker,['shift']].fillna(0, inplace=True)
        test_df.loc[test_df['tic'] == ticker,['action_x']] =test_df.loc[test_df['tic'] == ticker,'signal'].ne(test_df.loc[test_df['tic'] == ticker,'shift'])
        test_df.loc[test_df['tic'] == ticker,['action_id']] = test_df.loc[test_df['tic'] == ticker,'signal'].ne(test_df.loc[test_df['tic'] == ticker,'signal'].shift()).cumsum()
    return test_df
    
result_lstm = prepare_data(result_lstm)
result_transformer = prepare_data(result_transformer)

In [32]:
#Создаем стратегию для покупки / продажи согласно моделям
class ML_strategy(Strategy):
    def init(self):
        # переопределяем Индикатор по колонке Signal
        self.signal = self.I(lambda: self.data.Signal)
        self.previous_signal = 0

    def next(self):
        current_signal = self.signal[-1]
        if current_signal != self.previous_signal:
            if current_signal == 1:
                if self.position.is_short:
                    self.position.close()

                if not self.position.is_long:
                    self.buy()
            elif current_signal == -1:
                if self.position.is_long:
                    self.position.close()

                if not self.position.is_short:
                    self.sell()
            elif current_signal == 0:
                if self.position:
                    self.position.close()


        self.previous_signal = current_signal
        
# Делаем для каждой модели и акции свой объект класса Backtest
def bt_str(test_df):    
    db_all = pd.DataFrame()
    for ticker in tickers:  
        # Подготовка данных для бэктестинга
        bt_df = test_df[test_df['tic'] == ticker].copy()
        bt_df.columns = bt_df.columns.str.capitalize()
        bt_df.rename(columns={'Date': 'Datetime'}, inplace=True)
        bt_df["Datetime"] = pd.to_datetime(bt_df["Datetime"])
        bt_df.set_index('Datetime', inplace=True)
        
        # Создаем объект класса Backtest
        bt = Backtest(bt_df, ML_strategy, cash=1_000_000, commission=.002, exclusive_orders=True) # сделки идут последовательно
        
        stats = bt.run()
        
        db_all[ticker] =  stats[:27]
    return db_all
    
#Запускаем    
df_lstm = bt_str(result_lstm)
df_transformer = bt_str(result_transformer)

In [33]:
#Отрисуем результаты по каждой акции
result = pd.concat(
    [df_lstm, df_transformer],
    axis=1,
    keys=['LSTM', 'Transformer']
).swaplevel(axis=1).sort_index(axis=1)
result

Unnamed: 0_level_0,AFLT,AFLT,AKRN,AKRN,ALRS,ALRS,CHMF,CHMF,FIVE,FIVE,...,SNGS,SNGS,TATN,TATN,TRNFP,TRNFP,VTBR,VTBR,YNDX,YNDX
Unnamed: 0_level_1,LSTM,Transformer,LSTM,Transformer,LSTM,Transformer,LSTM,Transformer,LSTM,Transformer,...,LSTM,Transformer,LSTM,Transformer,LSTM,Transformer,LSTM,Transformer,LSTM,Transformer
Start,2024-07-30 00:00:00,2024-07-30 00:00:00,2024-07-12 00:00:00,2024-07-12 00:00:00,2024-07-30 00:00:00,2024-07-30 00:00:00,2024-07-30 00:00:00,2024-07-30 00:00:00,2024-07-26 00:00:00,2024-07-26 00:00:00,...,2024-07-30 00:00:00,2024-07-30 00:00:00,2024-07-30 00:00:00,2024-07-30 00:00:00,2024-07-30 00:00:00,2024-07-30 00:00:00,2024-07-31 00:00:00,2024-07-31 00:00:00,2023-09-25 00:00:00,2023-09-25 00:00:00
End,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,...,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2025-05-17 00:00:00,2024-06-11 00:00:00,2024-06-11 00:00:00
Duration,291 days 00:00:00,291 days 00:00:00,309 days 00:00:00,309 days 00:00:00,291 days 00:00:00,291 days 00:00:00,291 days 00:00:00,291 days 00:00:00,295 days 00:00:00,295 days 00:00:00,...,291 days 00:00:00,291 days 00:00:00,291 days 00:00:00,291 days 00:00:00,291 days 00:00:00,291 days 00:00:00,290 days 00:00:00,290 days 00:00:00,260 days 00:00:00,260 days 00:00:00
Exposure Time [%],1.809955,0.0,0.0,0.0,12.669683,4.977376,14.027149,13.574661,0.0,8.675799,...,4.524887,2.714932,7.692308,10.859729,22.624434,28.506787,0.0,3.636364,1.098901,13.736264
Equity Final [$],1014283.99054,1000000.0,1000000.0,1000000.0,868651.0746,944186.8558,954971.7544,956670.5028,1000000.0,1173379.4722,...,1019460.99344,1009168.43626,872826.4626,1035485.2332,745735.74944,1332982.13158,1000000.0,957864.90556,986429.974,784939.216
Equity Peak [$],1016026.53238,1000000.0,1000000.0,1000000.0,1009593.8182,1067947.6272,1048948.5204,1098160.2592,1000000.0,1173379.4722,...,1052540.10514,1011940.27228,1000000.0,1160325.8292,1010238.04964,1332982.13158,1000000.0,1000000.0,1000000.0,1000000.0
Commissions [$],7924.29946,,,,25635.6254,15770.2442,51605.8456,28237.6972,,8642.3278,...,16089.66656,8021.11374,26795.0374,21593.3668,69835.44056,48437.78842,,4067.79444,4004.226,7817.584
Return [%],1.428399,0.0,0.0,0.0,-13.134893,-5.581314,-4.502825,-4.33295,0.0,17.337947,...,1.946099,0.916844,-12.717354,3.548523,-25.426425,33.298213,0.0,-4.213509,-1.357003,-21.506078
Buy & Hold Return [%],-19.808833,-19.808833,9.020117,9.020117,-34.020343,-34.020343,-26.882477,-26.882477,-28.811149,-28.811149,...,-17.201587,-17.201587,5.757098,5.757098,-38.485437,-38.485437,0.511143,0.511143,71.560017,71.560017
Return (Ann.) [%],1.630388,0.0,0.0,0.0,-14.833832,-6.338892,-5.118014,-4.925534,0.0,20.199267,...,2.222094,1.046121,-14.366879,4.05625,-28.43309,38.782154,0.0,-4.811413,-1.87401,-28.486449


In [48]:
def analyze_backtest_results(df):
    """
    Возвращает сводную статистику по всем акциям.
    """
    metrics = {
        'Total Trades': df.loc['# Trades'].sum(),
        'Win Rate [%]': df.loc['Win Rate [%]'].mean(),  # среднее по всем тикерам
        'Avg. Trade Duration': str(pd.to_timedelta(df.loc['Avg. Trade Duration']).mean()).split('.')[0],
        'Sharpe Ratio': df.loc['Sharpe Ratio'].mean() if 'Sharpe Ratio' in df.index else float('nan'),
        'Sortino Ratio': df.loc['Sortino Ratio'].mean() if 'Sortino Ratio' in df.index else float('nan'),
    }

    # Вычисляем общий Return на основе начального и финального капитала
    initial_capital = 1_000_000*len(df_lstm.columns)

    final_equity = df.loc['Equity Final [$]'].sum()
    total_return = (final_equity - initial_capital) / initial_capital * 100
    metrics['Total Return (%)'] = total_return

    return pd.Series(metrics)

In [49]:
summary_lstm = analyze_backtest_results(df_lstm)
summary_transformer = analyze_backtest_results(df_transformer)

# Выводим результаты сравнения
comparison = pd.DataFrame({
    'LSTM': summary_lstm,
    'Transformer': summary_transformer
})

print("📊 Сравнение моделей:")
print(comparison.T)

📊 Сравнение моделей:
            Total Trades Win Rate [%] Avg. Trade Duration Sharpe Ratio  \
LSTM                 244    46.473976     2 days 10:33:36     -0.88663   
Transformer          149    54.696248     6 days 12:28:48    -0.297833   

            Sortino Ratio Total Return (%)  
LSTM            -0.237983        -5.349052  
Transformer       0.00879        -1.400817  


Вывод: качество моделей считаю плохим, с учетом тяжести моделей. Если суммарно посмотреть результаты бэктестинга, то чуть получше себя проявил Transfotmer. В следующий раз планирую учить модель на более весомых признаках и попробую применить ансамбль.