# Домашняя работа № 6

1. *Создать ансамбль из нескольких разноплановых моделей (рекомендуется использовать сложные модели временных рядов, LLM для анализа тональности новостей и RL для обучения взаимосвязанных агентов). Можно использовать как самостоятельно обученные архитектуры, так и использовать предобученные сети или фреймворки.*
2. *Провести тестирование разработанной итоговой (построенной на ансамбле) стратегии на валидационном датасете.*
3. *Зафиксировать метрики модели для дальнейшего сравнения экспериментов.*
4. *Сформировать дашборд, показывающий эффективность различных торговых стратегий.*

In [1]:
#Подключаем библиотеки

import pandas as pd
import numpy as np
import yfinance as yf
import plotly.graph_objects as go
from scipy import stats
import plotly.express as px
import talib
import matplotlib.pyplot as plt
import pywt
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import optuna
from torch import Tensor
from torch.utils.data import DataLoader, TensorDataset,ConcatDataset
from optuna.trial import Trial
from optuna.samplers import TPESampler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score, ConfusionMatrixDisplay,confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from datetime import date, datetime, timedelta
from plotly.subplots import make_subplots
from backtesting import Backtest, Strategy
from catboost import CatBoostClassifier


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device

device(type='cuda')

In [3]:
# Загружаем данные
data = pd.read_csv('Investing_data_rus.csv')
data['Date'] = pd.to_datetime(data['Date'])

In [4]:
data

Unnamed: 0,tic,Open,Close,High,Low,Volume,Date
0,BANEP,927.00,935.00,944.00,915.00,16034,2015-01-05
1,BANEP,928.00,926.00,938.00,921.00,9201,2015-01-06
2,BANEP,927.00,956.00,965.00,925.00,74265,2015-01-08
3,BANEP,956.00,938.00,967.00,925.00,47926,2015-01-09
4,BANEP,934.00,935.00,942.00,922.00,21278,2015-01-12
...,...,...,...,...,...,...,...
203898,IMOEX,2735.78,2699.84,2738.27,2699.84,0,2025-05-26
203899,IMOEX,2718.11,2736.75,2740.05,2705.78,0,2025-05-27
203900,IMOEX,2754.00,2785.20,2799.50,2747.96,0,2025-05-28
203901,IMOEX,2811.66,2809.94,2839.97,2800.93,0,2025-05-29


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 203903 entries, 0 to 203902
Data columns (total 7 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   tic     203903 non-null  object        
 1   Open    203903 non-null  float64       
 2   Close   203903 non-null  float64       
 3   High    203903 non-null  float64       
 4   Low     203903 non-null  float64       
 5   Volume  203903 non-null  int64         
 6   Date    203903 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 10.9+ MB


In [6]:
data = data[data.groupby('tic')['tic'].transform('count') >= 500]

#### Добавляем признаки

In [7]:
tickers = data['tic'].unique()
df = pd.DataFrame()
for i in tickers:
    data_temp = data.loc[data['tic'] == i]
    data_temp.loc[:,['sma_5']] = talib.SMA(data_temp["Close"], timeperiod=5)
    data_temp.loc[:,['sma_10']] = talib.SMA(data_temp["Close"], timeperiod=10)
    data_temp.loc[:,['sma_15']] = talib.SMA(data_temp["Close"], timeperiod=15)
    data_temp.loc[:,['sma_20']] = talib.SMA(data_temp["Close"], timeperiod=20)
    data_temp.loc[:,['sma_50']] = talib.SMA(data_temp["Close"], timeperiod=50)
    data_temp.loc[:,['sma_200']] = talib.SMA(data_temp["Close"], timeperiod=200)
    data_temp.loc[:,['sma_ratio_5_15']] = data_temp['sma_15'] / data_temp['sma_5']
    data_temp.loc[:,['sma_ratio_10_50']] = data_temp['sma_50'] / data_temp['sma_15']
    data_temp.loc[:,['sma_5_Volume']] = talib.SMA(data_temp["Volume"], timeperiod=5)
    data_temp.loc[:,['sma_10_Volume']] = talib.SMA(data_temp["Volume"], timeperiod=10)
    data_temp.loc[:,['sma_15_Volume']] = talib.SMA(data_temp["Volume"], timeperiod=15)
    data_temp.loc[:,['sma_50_Volume']] = talib.SMA(data_temp["Volume"], timeperiod=50)
    data_temp.loc[:,["sma_Volume_ratio_5_15"]] = data_temp['sma_5_Volume'] / data_temp['sma_15_Volume']
    data_temp.loc[:,["sma_Volume_ratio_10_50"]] = data_temp['sma_10_Volume'] / data_temp['sma_50_Volume']
    data_temp.loc[:,["ema_5"]] = talib.EMA(data_temp["Close"], timeperiod=5)
    data_temp.loc[:,["ema_10"]] = talib.EMA(data_temp["Close"], timeperiod=10)
    data_temp.loc[:,["ema_20"]] = talib.EMA(data_temp["Close"], timeperiod=20)
    data_temp.loc[:,["ema_50"]] = talib.EMA(data_temp["Close"], timeperiod=50)
    data_temp.loc[:,["ema_200"]] = talib.EMA(data_temp["Close"], timeperiod=200)
    
    # Calculate the Bollinger Bands
    data_temp.loc[:,["upper_band"]], data_temp.loc[:,["middle_band"]], data_temp.loc[:,["lower_band"]] = talib.BBANDS(data_temp["Close"], timeperiod=20)
    
    # Calculate the relative strength index (RSI)
    data_temp.loc[:,["RSI"]] = talib.RSI(data_temp["Close"], timeperiod=14)
    
    # Calculate the MACD
    data_temp.loc[:,["macd"]], data_temp.loc[:,["macd_signal"]], data_temp.loc[:,["macd_hist"]] = talib.MACD(data_temp["Close"], fastperiod=12, slowperiod=26, signalperiod=9)
    
    data_temp.loc[:,['ADX_5']] = talib.ADX(data_temp['High'], data_temp['Low'], data_temp['Close'], timeperiod=5)
    data_temp.loc[:,['ADX_15']] = talib.ADX(data_temp['High'], data_temp['Low'], data_temp['Close'], timeperiod=15)
    
    data_temp.loc[:,['RSI_5']] = talib.RSI(data_temp['Close'], timeperiod=5)
    data_temp.loc[:,['RSI_15']] = talib.RSI(data_temp['Close'], timeperiod=15)
    data_temp.loc[:,['RSI_ratio']] = data_temp['RSI_5']/data_temp['RSI_15']
    
    data_temp.loc[:,['ROC']] = talib.ROC(data_temp['Close'], timeperiod=15)

    data_temp.loc[:,['HAMMER']] = talib.CDLHAMMER(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['SHOOTING_STAR']] = talib.CDLSHOOTINGSTAR(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['ENGULFING']] = talib.CDLENGULFING(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['MORNING_STAR']] = talib.CDLMORNINGSTAR(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['EVENING_STAR']] = talib.CDLEVENINGSTAR(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['MARUBOZU']] = talib.CDLMARUBOZU(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['DOJI']] = talib.CDLDOJI(data_temp['Open'], data_temp['High'], data_temp['Low'], data_temp['Close'])
    data_temp.loc[:,['lag_1day']] = data_temp['Close'].shift(1)
    data_temp.loc[:,['day_of_week']] = data_temp['Date'].dt.dayofweek   

    
    data_temp.loc[:,['diff']] = -data_temp['Close'].diff(periods=-1)/data_temp['Close']
    
    #Определим, что значение таргета как -1, 0, 1 (продажа/держать/покупка) с учетом комиссии в 1%
    data_temp.loc[:,['target']] = 1
    data_temp.loc[data_temp['diff'] > 0.01, 'target'] = 2
    data_temp.loc[data_temp['diff'] < -0.01, 'target'] = 0
    
    data_temp.dropna(inplace=True)
    df = pd.concat([df, data_temp], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_temp.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_temp.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_temp.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_temp.dropna(inplace=True)
A value is trying to be set on a copy of a s

#### Добавляем компоненты вейвлет разложения как признаки

In [8]:
# Функция по разложению на компоненты, добавим их дальше как отдельные признаки
def get_wavelet_features(history, wavelet='db4', level=5):
    coeffs = pywt.wavedec(history, wavelet, level=level)
    features = {}
    for i, name in enumerate(['cA5', 'cD5', 'cD4', 'cD3', 'cD2', 'cD1']):
        if i < len(coeffs):
            # Берём среднее последних 5 коэффициентов для стабильности
            features[name] = np.mean(coeffs[i][-5:])
    return features

# Функция вейвлет-прогнозирования на 1 день вперёд, также добавим предсказание как признак
def wavelet_forecast(history, wavelet='db4', level=5):
    coeffs = pywt.wavedec(history, wavelet, level=level)
    threshold = 0.2
    coeffs = [pywt.threshold(c, threshold, mode='soft') for c in coeffs]
    restored = pywt.waverec(coeffs, wavelet)
    
    # Возвращаем последнее значение как прогноз
    return restored[-1] if len(restored) > 0 else np.nan

# Используем окно в 32 значения
window_size = 32
wavelet_df = pd.DataFrame()
for tic in tickers:
    wavelet_features_list = []
    
    for i in range(window_size, len(data[data['tic']== tic])):
        window = data[data['tic']== tic]['Close'].values[i - window_size:i]
        features = get_wavelet_features(window)
        features['wavelet_forecast'] = wavelet_forecast(window)
        features['Date'] = data[data['tic']== tic]['Date'].iloc[i]
        features['tic'] = tic
        wavelet_features_list.append(features)
    
    wavelet_df = pd.concat([wavelet_df, pd.DataFrame(wavelet_features_list).fillna(0)], ignore_index=True)
# Объединяем основной DataFrame с вейвлетами
df = pd.merge(df, wavelet_df, on=['Date', 'tic'], how='inner')




In [9]:
#Определим признаки, с которыми будет работать модель
features = ['Close', 'sma_5', 'sma_10', 'sma_15', 'sma_50', 'sma_200', 'sma_ratio_5_15', 'sma_ratio_10_50', 
            'sma_5_Volume', 'sma_10_Volume', 'sma_15_Volume', 'sma_50_Volume', 'sma_Volume_ratio_5_15', 'sma_Volume_ratio_10_50', 'upper_band',
            'middle_band', 'lower_band', 'RSI', 'macd', 'macd_signal', 'macd_hist','day_of_week','lag_1day','cA5', 'cD5', 'cD4', 'cD3', 'cD2', 'cD1',#'wavelet_forecast',
            'ADX_5', 'ADX_15', 'RSI_5', 'RSI_15', 'RSI_ratio', 'ROC','HAMMER','SHOOTING_STAR','ENGULFING','MORNING_STAR','EVENING_STAR','MARUBOZU','DOJI']
not_features = [col for col in df.columns if col not in features]

In [10]:
df.dropna(inplace=True)
df = df.sort_values(by = ['Date','tic'],ascending = True, ignore_index = True)

#### Подготовим данные, делим и нормализируем

In [11]:
def split_by_date(df, train_ratio=0.7, val_ratio=0.15):
    tics = df['tic'].unique()
    train_dates_all = set()
    val_dates_all = set()
    test_dates_all = set()

    for tic in tics:
        df_tic = df[df['tic'] == tic]
        dates = df_tic['Date'].dropna().unique()  # Убираем возможные NaN

        dates = np.sort(dates)

        train_end = int(len(dates) * train_ratio)
        val_end = train_end + int(len(dates) * val_ratio)

        train_dates = dates[:train_end]
        val_dates = dates[train_end:val_end]
        test_dates = dates[val_end:]

        train_dates_all.update(train_dates)
        val_dates_all.update(val_dates)
        test_dates_all.update(test_dates)

    train_dates_all = sorted(train_dates_all)
    val_dates_all = sorted(val_dates_all)
    test_dates_all = sorted(test_dates_all)

    return train_dates_all, val_dates_all, test_dates_all

def prepare_data(
    df,
    features,
    target=None,
    model_type='lstm',  # может быть 'lstm' или 'tsmixer'
    window=30,
    train_ratio=0.7,
    val_ratio=0.15,
    batch_size=32
):
    raw_data = df.copy()
    
    train_dates, val_dates, test_dates = split_by_date(df, train_ratio, val_ratio)
    scalers = {}
    
    def process_data(data, is_train=False):
        results = []
        raw_results = []
        
        for tic in data['tic'].unique():
            tic_data = data[data['tic'] == tic].sort_values('Date')
            
            if is_train:
                scaler = MinMaxScaler()
                scaled_features = scaler.fit_transform(tic_data[features])
                scalers[tic] = scaler
            else:
                
                scaled_features = scalers[tic].transform(tic_data[features])
            
            target_idx = features.index(target) if target in features else None
            
            X, y = [], []
            for i in range(len(scaled_features) - window):
                X.append(scaled_features[i:i+window])
                
                if model_type == 'lstm':
                    y.append(tic_data[target].iloc[i+window])
                elif model_type == 'tsmixer':
                    y.append(scaled_features[i+window, target_idx])  #'Close'

            if len(X) > 0:
                results.append((np.array(X), np.array(y)))
                raw_results.append(tic_data.iloc[window:].copy())
        
        if results:
            X = np.concatenate([r[0] for r in results])
            y = np.concatenate([r[1] for r in results])
            raw_df = pd.concat(raw_results)
            return X, y, raw_df
        else:
            return None, None, None
    
    X_train, y_train, train_raw = process_data(df[df['Date'].isin(train_dates)], is_train=True)
    X_val, y_val, val_raw = process_data(df[df['Date'].isin(val_dates)])
    X_test, y_test, test_raw = process_data(df[df['Date'].isin(test_dates)])

    print("X_train shape:", X_train.shape)
    print("y_train shape:", y_train.shape)

    def create_dataloader(X, y, shuffle=False, current_model_type='lstm'):
        if X is None or y is None:
            return None
        
        X_tensor = torch.FloatTensor(X)
        
        if current_model_type == 'lstm':
            y_tensor = torch.LongTensor(y.astype(int))
        else:
            y_tensor = torch.FloatTensor(y).unsqueeze(-1)
        
        dataset = TensorDataset(X_tensor, y_tensor)
        return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    
    train_loader = create_dataloader(X_train, y_train, shuffle=True, current_model_type=model_type)
    val_loader = create_dataloader(X_val, y_val, current_model_type=model_type)
    test_loader = create_dataloader(X_test, y_test, current_model_type=model_type)
    
    return {
        'train_loader': train_loader,
        'val_loader': val_loader,
        'test_loader': test_loader,
        'train_raw': train_raw,
        'val_raw': val_raw,
        'test_raw': test_raw,
        'scalers': scalers
    }

#### Создаем LSTM модель

In [12]:
# Для LSTM (классификация) готовим данные
lstm_data = prepare_data(
    df=df,
    features=features,
    target='target', 
    model_type='lstm',
    window=30,
    batch_size=64
)

X_train shape: (159421, 30, 42)
y_train shape: (159421,)


In [13]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout=0.2, lr=0.001, batch_size=32):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.lr = lr
        self.batch_size = batch_size

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]  # Берем последний шаг
        out = self.dropout(lstm_out)
        out = self.fc(out)
        return out

In [14]:
#Ранняя остановка уменьшает переобучение
class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_score is None:
            self.best_score = val_loss
        elif val_loss > self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = val_loss
            self.counter = 0

In [15]:
#Ищем оптимальные гиперпараметры через Optuna
def objective(trial: Trial, data_dict):
    params = {
        'hidden_dim': trial.suggest_categorical('hidden_dim', [64, 128, 256]),
        'num_layers': trial.suggest_int('num_layers', 1, 3),
        'lr': trial.suggest_float('lr', 1e-4, 1e-2, log=True),
        'batch_size': trial.suggest_categorical('batch_size', [32, 64, 128]),
        'dropout': trial.suggest_float('dropout', 0.1, 0.5),
    }

    # Модель и оптимизатор
    model = LSTMModel(
        input_dim=data_dict['train_loader'].dataset[0][0].shape[-1],  # Берем размерность из данных
        hidden_dim=params['hidden_dim'],
        num_layers=params['num_layers'],
        lr = params['lr'],
        batch_size = params['batch_size'],
        output_dim=3,
        dropout=params['dropout']
    ).to(device) 
    
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])
    criterion = nn.CrossEntropyLoss()

    # Ранняя остановка
    early_stopping = EarlyStopping(patience=5)

    # Обучение
    for epoch in range(100):
        model.train()
        for X_batch, y_batch in data_dict['train_loader']:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch.long())  # Явное преобразование в long
            loss.backward()
            optimizer.step()

        # Валидация (F1-score)
        model.eval()
        val_preds, val_true = [], []
        with torch.no_grad():
            for X_val_batch, y_val_batch in data_dict['val_loader']:
                X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
                outputs = model(X_val_batch)
                val_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
                val_true.extend(y_val_batch.cpu().numpy())
        
        f1 = f1_score(val_true, val_preds, average='weighted')

        # Ранняя остановка
        early_stopping(-f1)
        if early_stopping.early_stop:
            break

    return f1


# Оптимизация
study = optuna.create_study(direction='maximize')
study.optimize(lambda trial: objective(trial, lstm_data), n_trials=50)
best_params_lstm = study.best_params

[I 2025-06-09 22:18:21,208] A new study created in memory with name: no-name-faf6f8ce-965c-4a15-bd75-0806268347cb
[I 2025-06-09 22:28:43,126] Trial 0 finished with value: 0.6486555876891101 and parameters: {'hidden_dim': 128, 'num_layers': 2, 'lr': 0.0019929773571040724, 'batch_size': 128, 'dropout': 0.12397509889104837}. Best is trial 0 with value: 0.6486555876891101.
[I 2025-06-09 22:30:25,835] Trial 1 finished with value: 0.43637351749210684 and parameters: {'hidden_dim': 64, 'num_layers': 1, 'lr': 0.00506999005597648, 'batch_size': 32, 'dropout': 0.10852853708830229}. Best is trial 0 with value: 0.6486555876891101.
[I 2025-06-09 22:33:07,757] Trial 2 finished with value: 0.469797489030954 and parameters: {'hidden_dim': 64, 'num_layers': 1, 'lr': 0.0008967228486109183, 'batch_size': 32, 'dropout': 0.21487014978488828}. Best is trial 0 with value: 0.6486555876891101.
[I 2025-06-09 22:51:39,019] Trial 3 finished with value: 0.8068692660560046 and parameters: {'hidden_dim': 256, 'num_l

In [16]:
# Обучение лучшей модели на объединённой выборке train + val

# Определяем количество эпох для дообучения
num_epochs_full = 50

# Пересоздаём модель с лучшими параметрами
best_model_lstm = LSTMModel(
    input_dim=lstm_data['train_loader'].dataset[0][0].shape[-1],
    output_dim=3,
    **best_params_lstm
).to(device)

# Оптимизатор и лосс
optimizer = torch.optim.Adam(best_model_lstm.parameters(), lr=best_params_lstm['lr'])
criterion = nn.CrossEntropyLoss()

# Объединяем тренировочные и валидационные данные
full_train_dataset = ConcatDataset([
    lstm_data['train_loader'].dataset,
    lstm_data['val_loader'].dataset
])

full_train_loader = DataLoader(
    full_train_dataset,
    batch_size=best_params_lstm['batch_size'],
    shuffle=True
)

# Обучение на полной выборке
best_model_lstm.train()
for epoch in range(num_epochs_full):
    total_loss = 0
    for X_batch, y_batch in full_train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = best_model_lstm(X_batch)
        loss = criterion(outputs, y_batch.long())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 10, Loss: 1829.2655
Epoch 20, Loss: 1658.8600
Epoch 30, Loss: 1386.2102
Epoch 40, Loss: 1145.3194
Epoch 50, Loss: 959.9384


In [17]:
from sklearn.metrics import classification_report, f1_score

best_model_lstm.eval()
test_preds, test_true = [], []

with torch.no_grad():
    for X_test_batch, y_test_batch in lstm_data['test_loader']:
        X_test_batch, y_test_batch = X_test_batch.to(device), y_test_batch.to(device)
        outputs = best_model_lstm(X_test_batch)
        preds = torch.argmax(outputs, dim=1)
        test_preds.extend(preds.cpu().numpy())
        test_true.extend(y_test_batch.cpu().numpy())

# Оценка
test_f1 = f1_score(test_true, test_preds, average='weighted')
print(f"Test F1-score (weighted): {test_f1:.4f}")
print("\nClassification Report:\n", classification_report(test_true, test_preds))

Test F1-score (weighted): 0.8544

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.79      0.83     19740
           1       0.84      0.92      0.88     38243
           2       0.86      0.79      0.82     18204

    accuracy                           0.86     76187
   macro avg       0.86      0.83      0.85     76187
weighted avg       0.86      0.86      0.85     76187



#### Создаем модель TSMixer

In [18]:
# Для TSMixer (регрессия) готовим данные
tsmixer_data = prepare_data(
    df=df,
    features=features,
    target='Close',
    model_type='tsmixer',
    window=30,
    batch_size=64
)

X_train shape: (159421, 30, 42)
y_train shape: (159421,)


In [19]:
class TSMixer(nn.Module):
    def __init__(self, input_dim, seq_len, pred_len=1, hidden_dim=64, num_layers=2, dropout=0.1, lr=1e-4):
        super(TSMixer, self).__init__()
        self.input_dim = input_dim
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.lr = lr
        
        # Temporal Mixing
        self.temporal_mixing = nn.Sequential(
            nn.Linear(seq_len, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, seq_len)
        )
        
        # Feature Mixing
        self.feature_mixing = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, input_dim)
        )
        
        # Нормализации
        self.norm1 = nn.LayerNorm(input_dim)    # Нормализует по фичам
        self.norm2 = nn.LayerNorm(seq_len)      # Нормализует по времени
        
        # Финальный слой: предсказание pred_len * 1 (вместо input_dim)
        self.output_layer = nn.Linear(seq_len * input_dim, pred_len)
    
    def forward(self, x):
        # x: [batch_size, seq_len, input_dim]
        residual = x
        
        # 1. Temporal Mixing
        x = x.transpose(1, 2)  # [batch_size, input_dim, seq_len]
        x = self.temporal_mixing(x) + residual.transpose(1, 2)
        x = self.norm1(x.transpose(1, 2))  # [batch_size, seq_len, input_dim]
        
        # 2. Feature Mixing
        residual = x
        x = self.feature_mixing(x) + residual
        x = x.transpose(1, 2)  # [batch_size, input_dim, seq_len]
        x = self.norm2(x)      # Нормализует по seq_len
        x = x.transpose(1, 2)  # Возвращаем [batch_size, seq_len, input_dim]
        
        # 3. Предсказание
        x = x.reshape(x.size(0), -1)  # [batch_size, seq_len * input_dim]
        x = self.output_layer(x)      # [batch_size, pred_len]
        x = x.unsqueeze(-1)           # [batch_size, pred_len, 1]
        
        return x

In [20]:
def train_tsmixer(model, data_dict, epochs=100, lr=1e-3, patience=5):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    best_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in data_dict['train_loader']:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        # Валидация
        val_loss = evaluate_model(model, data_dict['val_loader'], criterion)
        
        # Ранняя остановка
        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), "best_tsmixer.pth")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                break

def evaluate_model(model, loader, criterion):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()
    return total_loss / len(loader)

In [21]:
# Подключаем Optuna для подбора гипперпараметров
def objective(trial, data_dict):
    params = {
        'hidden_dim': trial.suggest_categorical('hidden_dim', [64, 128, 256]),
        'num_layers': trial.suggest_int('num_layers', 1, 3),
        'lr': trial.suggest_float('lr', 1e-4, 1e-2, log=True),
        'dropout': trial.suggest_float('dropout', 0.1, 0.5),
    }
    
    model = TSMixer(
        input_dim=data_dict['train_loader'].dataset[0][0].shape[-1],
        seq_len=data_dict['train_loader'].dataset[0][0].shape[0],
        pred_len=1,
        hidden_dim=params['hidden_dim'],
        num_layers=params['num_layers'],
        dropout=params['dropout']
    ).to(device)
    train_tsmixer(model, data_dict, lr=params['lr'], epochs=50)
    val_loss = evaluate_model(model, data_dict['val_loader'], nn.MSELoss())
    return val_loss

# Использование
study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: objective(trial, tsmixer_data), n_trials=50)
best_params_tsmixer = study.best_params

[I 2025-06-10 07:11:48,327] A new study created in memory with name: no-name-0638f4c6-61fa-471e-9d0e-5529051fe291
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
[I 2025-06-10 07:14:07,991] Trial 0 finished with value: 0.06409028530611557 and parameters: {'hidden_dim': 128, 'num_layers': 2, 'lr': 0.004185037784576577, 'dropout': 0.49918343858773584}. Best is trial 0 with value: 0.06409028530611557.
[I 2025-06-10 07:15:23,212] Trial 1 finished with value: 0.06138724610399191 and parameters: {'hidden_dim': 128, 'num_layers': 3, 'lr': 0.006071794041709758, 'dropout': 0.4620278795888516}. Best is trial 1 with value: 0.06138724610399191.
[I 2025-06-10 07:17:34,668] Trial 2 finished with value: 0.06482338368125402 and parameters: {'hidden_dim': 256, 'num_layers': 1, 'lr': 0.001482797930418166, 'dropout': 0.1566453108104825}. Best is trial 1 with value: 0.0613

In [22]:
# Обучение на всех данных (train + val)
full_train_loader_tsmixer = DataLoader(
    ConcatDataset([
        tsmixer_data['train_loader'].dataset,
        tsmixer_data['val_loader'].dataset
    ]),
 shuffle=True)

# Инициализация модели с лучшими параметрами
best_model_tsmixer = TSMixer(
    input_dim=tsmixer_data['train_loader'].dataset[0][0].shape[-1],
    seq_len=tsmixer_data['train_loader'].dataset[0][0].shape[0],
    pred_len=1, 
    **best_params_tsmixer
)

# Оптимизатор
optimizer = torch.optim.Adam(best_model_tsmixer.parameters(), lr=best_params_tsmixer['lr'])

# Лосс
criterion = nn.MSELoss()

# Обучение
for epoch in range(50):
    best_model_tsmixer.train()
    for x_batch, y_batch in full_train_loader_tsmixer:
        optimizer.zero_grad()
        output = best_model_tsmixer(x_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 0.00010024195216828957
Epoch 2, Loss: 0.0002548978663980961
Epoch 3, Loss: 0.0005274220020510256
Epoch 4, Loss: 1.1253001503064297e-05
Epoch 5, Loss: 1.8326827557757497e-05
Epoch 6, Loss: 1.1322915938194456e-08
Epoch 7, Loss: 0.00019688883912749588
Epoch 8, Loss: 1.3340851182874758e-05
Epoch 9, Loss: 8.056612728069013e-07
Epoch 10, Loss: 0.0002589461801107973
Epoch 11, Loss: 0.0011310579720884562
Epoch 12, Loss: 8.509532926836982e-05
Epoch 13, Loss: 8.540461567463353e-05
Epoch 14, Loss: 5.380801303545013e-05
Epoch 15, Loss: 0.00013561410014517605
Epoch 16, Loss: 9.355838301416952e-06
Epoch 17, Loss: 0.00015681273362133652
Epoch 18, Loss: 2.104708255501464e-05
Epoch 19, Loss: 0.0001228972978424281
Epoch 20, Loss: 0.000333879521349445
Epoch 21, Loss: 1.7066412283384125e-06
Epoch 22, Loss: 0.00027564947959035635
Epoch 23, Loss: 0.0014386428520083427
Epoch 24, Loss: 0.005081790499389172
Epoch 25, Loss: 0.000508118886500597
Epoch 26, Loss: 1.615631117601879e-05
Epoch 27, Loss

#### Создаем модель Catboost

In [23]:
# Функция подготовки данных для CatBoost
def prepare_data_for_catboost(df, features, target='target', train_ratio=0.7, val_ratio=0.15):
    # Разделяем по дате, используем уже созданную ранее фукнцию
    train_dates, val_dates, test_dates = split_by_date(df, train_ratio, val_ratio)

    train_df = df[df['Date'].isin(train_dates)]
    val_df = df[df['Date'].isin(val_dates)]
    test_df = df[df['Date'].isin(test_dates)]

    # Инициализируем скалеры по тикерам
    scalers = {}

    def scale_group(group):
        tic = group.name
        scaler = MinMaxScaler()
        group[features] = scaler.fit_transform(group[features])
        scalers[tic] = scaler
        return group.reset_index(drop=True)

    # Обучаем нормализацию на train
    train_df = train_df.groupby('tic').apply(scale_group)

    # Применяем к val и test
    def apply_scaling(data):
        results = []
        for tic in data['tic'].unique():
            tic_data = data[data['tic'] == tic].copy()
            tic_data[features] = scalers[tic].transform(tic_data[features])
            results.append(tic_data)
        return pd.concat(results)

    val_df = apply_scaling(val_df)
    test_df = apply_scaling(test_df)

    # Формируем выборки
    X_train = train_df[features]
    y_train = train_df[target]

    X_val = val_df[features]
    y_val = val_df[target]

    X_test = test_df[features]
    y_test = test_df[target]

    return {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val,
        'X_test': X_test,
        'y_test': y_test,
        'scalers': scalers
    }

In [24]:
prepared = prepare_data_for_catboost(df, features=features, target='target')

# Извлекаем данные для обучения и валидации
X_train = prepared['X_train']
y_train = prepared['y_train']
X_val = prepared['X_val']
y_val = prepared['y_val']
X_test = prepared['X_test']
y_test = prepared['y_test']

  train_df = train_df.groupby('tic').apply(scale_group)


In [25]:
#Подбираем гиппепараметры для Catboost
def objective(trial, X_train, y_train, X_val, y_val):
    params = {
        "iterations": trial.suggest_int("iterations", 100, 1000),
        "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.3),
        "depth": trial.suggest_int("depth", 4, 10),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-8, 100),
        "random_strength": trial.suggest_float("random_strength", 0, 10),
        "bagging_temperature": trial.suggest_float("bagging_temperature", 0, 10),
        "grow_policy": trial.suggest_categorical("grow_policy", ["SymmetricTree", "Depthwise"]),
        "eval_metric": "MultiClass",
        "verbose": False,
        "task_type": "GPU"
    }

    model = CatBoostClassifier(**params)
    model.fit(X_train, y_train, eval_set=(X_val, y_val))
    
    preds = model.predict(X_val)
    score = f1_score(y_val, preds, average="macro")
    return score

# Запуск оптимизации
study = optuna.create_study(sampler=TPESampler(), direction="maximize")
study.optimize(lambda trial: objective(trial, X_train, y_train, X_val, y_val), n_trials=50)

print("Лучшие параметры:", study.best_params)

[I 2025-06-10 15:46:57,357] A new study created in memory with name: no-name-bef8138d-aab4-4a80-8ea2-daf962336cb0
[I 2025-06-10 15:47:02,537] Trial 0 finished with value: 0.45185230422964956 and parameters: {'iterations': 242, 'learning_rate': 0.23993627076632137, 'depth': 9, 'l2_leaf_reg': 34.84645623787289, 'random_strength': 4.374751626119188, 'bagging_temperature': 4.621591146844826, 'grow_policy': 'SymmetricTree'}. Best is trial 0 with value: 0.45185230422964956.
[I 2025-06-10 15:47:07,772] Trial 1 finished with value: 0.3792546684082198 and parameters: {'iterations': 499, 'learning_rate': 0.03684269269118207, 'depth': 7, 'l2_leaf_reg': 65.01555205110455, 'random_strength': 3.0676749874749043, 'bagging_temperature': 2.1914528203191583, 'grow_policy': 'Depthwise'}. Best is trial 0 with value: 0.45185230422964956.
[I 2025-06-10 15:47:11,584] Trial 2 finished with value: 0.3619667685056592 and parameters: {'iterations': 593, 'learning_rate': 0.1722940768782681, 'depth': 5, 'l2_leaf_r

Лучшие параметры: {'iterations': 473, 'learning_rate': 0.2016143517749402, 'depth': 10, 'l2_leaf_reg': 0.10183499858941047, 'random_strength': 6.793891167330451, 'bagging_temperature': 9.969163512905503, 'grow_policy': 'Depthwise'}


In [26]:
best_params = study.best_params

# Убираем verbose и устанавливаем больше итераций при обучении
final_params = best_params.copy()
final_params["verbose"] = 100
final_params["task_type"] = "GPU"

# Объединяем train и val
X_combined = pd.concat([X_train, X_val])
y_combined = pd.concat([y_train, y_val])


model_catboost = CatBoostClassifier(**final_params)

model_catboost.fit(X_combined, y_combined)

# Оценка
train_preds = model_catboost.predict(X_combined)
test_preds = model_catboost.predict(X_test)

print("F1 на трейне + вале:", f1_score(y_combined, train_preds, average="macro"))
print("F1 на тесте:", f1_score(y_test, test_preds, average="macro"))

0:	learn: 1.0545692	total: 32.3ms	remaining: 15.2s
100:	learn: 0.6995463	total: 2.98s	remaining: 11s
200:	learn: 0.5564472	total: 5.8s	remaining: 7.85s
300:	learn: 0.4542159	total: 8.59s	remaining: 4.91s
400:	learn: 0.3777521	total: 11.3s	remaining: 2.03s
472:	learn: 0.3322736	total: 13.3s	remaining: 0us
F1 на трейне + вале: 0.9369237842663075
F1 на тесте: 0.9234592671680604


In [31]:
# Сохраняем состояние LSTM модели
torch.save(best_model_lstm.state_dict(), "lstm_model.pth")

# Сохраняем состояние TSMixer модели
torch.save(best_model_tsmixer.state_dict(), "tsmixer_model.pth")

#### Ансамбль. Стекинг

##### Подготовим данные

In [39]:
def get_predictions_lstm(model, data_loader, device='cpu'):
    model.eval()
    all_preds = []
    all_probs = []
    
    with torch.no_grad():
        for x_batch, _ in data_loader:
            x_batch = x_batch.to(device)
            output = model(x_batch)
            
            probs = torch.softmax(output, dim=1)
            preds = torch.argmax(probs, dim=1)

            all_probs.extend(probs.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    return np.array(all_preds), np.array(all_probs)

In [37]:
def get_predictions_tsmixer(model, data_loader, device='cpu'):
    model = model.to(device)
    model.eval()
    all_preds = []
    
    with torch.no_grad():
        for x_batch, _ in data_loader:
            x_batch = x_batch.to(device)
            output = model(x_batch)
            preds = output.squeeze().cpu().numpy()
            all_preds.extend(preds)

    return np.array(all_preds)

In [29]:
def get_predictions_catboost(model, X):
    preds = model.predict(X)
    probs = model.predict_proba(X)
    return preds, probs

In [40]:
# LSTM
lstm_train_preds, lstm_train_probs = get_predictions_lstm(best_model_lstm, lstm_data['train_loader'], device)
lstm_val_preds, lstm_val_probs = get_predictions_lstm(best_model_lstm, lstm_data['val_loader'], device)
lstm_test_preds, lstm_test_probs = get_predictions_lstm(best_model_lstm, lstm_data['test_loader'], device)

# TSMixer (регрессия)
tsmixer_train_preds = get_predictions_tsmixer(best_model_tsmixer, tsmixer_data['train_loader'], device)
tsmixer_val_preds = get_predictions_tsmixer(best_model_tsmixer, tsmixer_data['val_loader'], device)
tsmixer_test_preds = get_predictions_tsmixer(best_model_tsmixer, tsmixer_data['test_loader'], device)

# CatBoost
cb_train_preds, cb_train_probs = get_predictions_catboost(model_catboost, X_train)
cb_val_preds, cb_val_probs = get_predictions_catboost(model_catboost, X_val)
cb_test_preds, cb_test_probs = get_predictions_catboost(model_catboost, X_test)

In [41]:
# LSTM создаем в валидационном и тестовом датасете предсказания и вероятности
val_pred_lstm = lstm_data['val_raw'].drop(columns=not_features)
val_pred_lstm['lstm_prob_0'] = lstm_val_probs[:, 0]
val_pred_lstm['lstm_prob_1'] = lstm_val_probs[:, 1]
val_pred_lstm['lstm_prob_2'] = lstm_val_probs[:, 2]
val_pred_lstm['lstm_pred'] = lstm_val_preds
test_pred_lstm = lstm_data['test_raw'].drop(columns=not_features)
test_pred_lstm['lstm_prob_0'] = lstm_test_probs[:, 0]
test_pred_lstm['lstm_prob_1'] = lstm_test_probs[:, 1]
test_pred_lstm['lstm_prob_2'] = lstm_test_probs[:, 2]
test_pred_lstm['lstm_pred'] = lstm_test_preds

In [42]:
# денормализуем предсказания у tsmixer модели
def denormalize(preds, raw_df, scalers, features, target='Close'):
    target_idx = features.index(target)
    preds_denorm = []
    dummy = np.zeros((len(preds), len(features)))

    # Заполняем только столбец 'Close'
    dummy[:, target_idx] = preds

    # применяем inverse_transform
    for i, row in enumerate(raw_df.iterrows()):
        tic = row[1]['tic']
        scaler = scalers[tic]
        dummy[i, :] = scaler.inverse_transform(dummy[i:i+1, :])

    preds_denorm = dummy[:, target_idx]
    return preds_denorm

In [43]:
preds_denorm_val = denormalize(tsmixer_val_preds, tsmixer_data['val_raw'], tsmixer_data['scalers'], features, target='Close')
preds_denorm_test = denormalize(tsmixer_test_preds, tsmixer_data['test_raw'], tsmixer_data['scalers'], features, target='Close')

In [44]:
# TSMixer, создаем в валидационном и тестовом датасете предсказания
val_pred_tsmixer = tsmixer_data['val_raw'].drop(columns=features)
val_pred_tsmixer['tsmixer_pred'] = preds_denorm_val
test_pred_tsmixer = tsmixer_data['test_raw'].drop(columns=features)
test_pred_tsmixer['tsmixer_pred'] = preds_denorm_test

In [45]:
# Catboost создаем в валидационном и тестовом датасете предсказания и вероятности
X_val['catboost_pred'] = cb_val_preds
X_val['catboost_prob_0'] = cb_val_probs[:, 0]
X_val['catboost_prob_1'] = cb_val_probs[:, 1]
X_val['catboost_prob_2'] = cb_val_probs[:, 2]
X_val.drop(columns=features, inplace = True)
X_test['catboost_pred'] = cb_test_preds
X_test['catboost_prob_0'] = cb_test_probs[:, 0]
X_test['catboost_prob_1'] = cb_test_probs[:, 1]
X_test['catboost_prob_2'] = cb_test_probs[:, 2]
X_test.drop(columns=features, inplace = True)

In [46]:
col = ['Date', 'Close', 'Open', 'High', 'Low', 'Volume', 'target','tic', 'lstm_prob_0', 'lstm_prob_1',
       'lstm_prob_2', 'lstm_pred', 'catboost_pred', 'catboost_prob_0', 'catboost_prob_1', 
       'catboost_prob_2','tsmixer_pred']

In [47]:
#Объединяем датафреймы трех поученных моделей
result_df_val = val_pred_lstm.join(X_val, how='outer',lsuffix='_left', rsuffix='_right').join(val_pred_tsmixer, how='inner', lsuffix='_left', rsuffix='_right')[col]
result_df_test = test_pred_lstm.join(X_test, how='outer',lsuffix='_left', rsuffix='_right').join(test_pred_tsmixer, how='inner', lsuffix='_left', rsuffix='_right')[col]

In [48]:
# TSMixer, создаем в валидационном и тестовом датасете процент отклонения от текущей цены закрытия
result_df_val['tsmixer_delta'] = result_df_val['tsmixer_pred']/result_df_val['Close']-1
result_df_test['tsmixer_delta'] = result_df_test['tsmixer_pred']/result_df_test['Close']-1

#### Проведем бектестинг на моделях LSTM, TSMixer и Catboost

In [49]:
test_df_lstm = result_df_test.copy()
test_df_catboost = result_df_test.copy()
test_df_tsmixer = result_df_test.copy()
test_df_lstm.loc[:,["signal"]] = test_df_lstm['lstm_pred']
test_df_catboost.loc[:,["signal"]] = test_df_catboost['catboost_pred']
test_df_tsmixer.loc[:,["signal"]] = test_df_tsmixer['tsmixer_delta'].apply(lambda x: 0 if x < -0.01 else 2 if x > 0.01 else 1)

In [50]:
# Добавим столбец где будет указан id трейда
def prepare_data(test_df):
    for ticker in tickers:
        test_df.loc[test_df['tic'] == ticker,['shift']] = test_df.loc[test_df['tic'] == ticker,'signal'].shift()
        test_df.loc[test_df['tic'] == ticker,['shift']].fillna(0, inplace=True)
        test_df.loc[test_df['tic'] == ticker,['action_x']] =test_df.loc[test_df['tic'] == ticker,'signal'].ne(test_df.loc[test_df['tic'] == ticker,'shift'])
        test_df.loc[test_df['tic'] == ticker,['action_id']] = test_df.loc[test_df['tic'] == ticker,'signal'].ne(test_df.loc[test_df['tic'] == ticker,'signal'].shift()).cumsum()
    return test_df

In [51]:
#Создаем стратегию для покупки / продажи согласно моделям
class ML_strategy(Strategy):
    def init(self):
        # переопределяем Индикатор по колонке Signal
        self.signal = self.I(lambda: self.data.Signal)
        self.previous_signal = 0

    def next(self):
        current_signal = self.signal[0]
        if current_signal != self.previous_signal:
            if current_signal == 2:
                if self.position.is_short:
                    self.position.close()

                if not self.position.is_long:
                    self.buy()
            elif current_signal == 0:
                if self.position.is_long:
                    self.position.close()

                if not self.position.is_short:
                    self.sell()
            elif current_signal == 1:
                if self.position:
                    self.position.close()


        self.previous_signal = current_signal
        
# Делаем для каждой модели и акции свой объект класса Backtest
def bt_str(test_df):    
    db_all = pd.DataFrame()
    for ticker in test_df['tic'].unique():  
        # Подготовка данных для бэктестинга
        bt_df = test_df[test_df['tic'] == ticker].copy()
        bt_df.columns = bt_df.columns.str.capitalize()
        bt_df.rename(columns={'Date': 'Datetime'}, inplace=True)
        bt_df["Datetime"] = pd.to_datetime(bt_df["Datetime"])
        bt_df.set_index('Datetime', inplace=True)
        
        # Создаем объект класса Backtest
        bt = Backtest(bt_df, ML_strategy, cash=1_000_000, commission=.002, exclusive_orders=True) # сделки идут последовательно
        
        stats = bt.run()
        
        db_all[ticker] =  stats[:27]
    return db_all

In [52]:
#Готовим данные
test_df_lstm = prepare_data(test_df_lstm)
test_df_catboost = prepare_data(test_df_catboost)
test_df_tsmixer = prepare_data(test_df_tsmixer)
#Запускаем бектестинг  
result_lstm = bt_str(test_df_lstm)
result_catboost = bt_str(test_df_catboost)
result_tsmixer = bt_str(test_df_tsmixer)

  x = value / self._data.Close
  market_log_returns = np.log(c[1:] / c[:-1])
  market_log_returns = np.log(c[1:] / c[:-1])
  x = value / self._data.Close
  market_log_returns = np.log(c[1:] / c[:-1])
  market_log_returns = np.log(c[1:] / c[:-1])
  x = value / self._data.Close
  market_log_returns = np.log(c[1:] / c[:-1])
  market_log_returns = np.log(c[1:] / c[:-1])
  x = value / self._data.Close
  market_log_returns = np.log(c[1:] / c[:-1])
  market_log_returns = np.log(c[1:] / c[:-1])
  x = value / self._data.Close
  market_log_returns = np.log(c[1:] / c[:-1])
  market_log_returns = np.log(c[1:] / c[:-1])
  x = value / self._data.Close
  market_log_returns = np.log(c[1:] / c[:-1])
  market_log_returns = np.log(c[1:] / c[:-1])
                                                      

In [53]:
def analyze_backtest_results(df):
    """
    Возвращает сводную статистику по всем акциям.
    """
    metrics = {
        'Total Trades': df.loc['# Trades'].sum(),
        'Win Rate [%]': df.loc['Win Rate [%]'].mean(),  # среднее по всем тикерам
        'Avg. Trade Duration': str(pd.to_timedelta(df.loc['Avg. Trade Duration']).mean()).split('.')[0],
        'Sharpe Ratio': df.loc['Sharpe Ratio'].mean() if 'Sharpe Ratio' in df.index else float('nan'),
        'Sortino Ratio': df.loc['Sortino Ratio'].mean() if 'Sortino Ratio' in df.index else float('nan'),
    }

    # Вычисляем общий Return на основе начального и финального капитала
    initial_capital = 1_000_000*len(df.columns)

    final_equity = df.loc['Equity Final [$]'].sum()
    total_return = (final_equity - initial_capital) / initial_capital * 100
    metrics['Total Return (%)'] = total_return

    return pd.Series(metrics)

In [54]:
# Сравним результаты моделей
summary_lstm = analyze_backtest_results(result_lstm)
summary_transformer = analyze_backtest_results(result_catboost)
summary_tsmixer = analyze_backtest_results(result_tsmixer)

# Выводим результаты сравнения
comparison = pd.DataFrame({
    'LSTM': summary_lstm,
    'Catboost': summary_transformer,
    'TSMixer': summary_tsmixer
})

print("📊 Сравнение моделей:")
print(comparison.T)

📊 Сравнение моделей:
         Total Trades Win Rate [%] Avg. Trade Duration Sharpe Ratio  \
LSTM                0          NaN                 NaT    -0.038574   
Catboost            0          NaN                 NaT    -0.104881   
TSMixer             0          NaN                 NaT    -0.336737   

         Sortino Ratio Total Return (%)  
LSTM          0.038854         1.349754  
Catboost      -0.08584        -0.473082  
TSMixer      -0.246321        -1.395946  


#### Обучаем мета модель на предсказанных параметрах (взяла стекинг на XGBoost)

In [55]:
meta_features = [ 'lstm_prob_0', 'lstm_prob_1',
       'lstm_prob_2','catboost_prob_0', 'catboost_prob_1', 
       'catboost_prob_2','tsmixer_delta']

In [56]:
X_train_meta = result_df_val[meta_features].values
y_val = result_df_val['target'].values
X_test_meta = result_df_test[meta_features].values
y_test = result_df_test['target'].values

In [60]:
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
from optuna.samplers import TPESampler


def objective(trial, X, y, n_splits=5):
    params = {
        "objective": "multi:softprob",
        "num_class": len(np.unique(y)),
        "eval_metric": "mlogloss",
        "booster": trial.suggest_categorical("booster", ["gbtree", "dart"]),
        "lambda": trial.suggest_float("lambda", 1e-8, 10.0, log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 10.0, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "eta": trial.suggest_float("eta", 0.01, 0.3),
        "subsample": trial.suggest_float("subsample", 0.4, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 20),
        "verbosity": 0,
    }

    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    scores = []

    for train_idx, valid_idx in skf.split(X, y):
        X_train, X_valid = X[train_idx], X[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        dtrain = xgb.DMatrix(X_train, label=y_train, missing=np.inf)
        dvalid = xgb.DMatrix(X_valid, label=y_valid, missing=np.inf)

        model = xgb.train(params, dtrain, num_boost_round=100, evals=[(dvalid, "valid")], early_stopping_rounds=10, verbose_eval=False)
        preds = model.predict(dvalid)
        score = log_loss(y_valid, preds)
        scores.append(score)

    return np.mean(scores)


# --- Запуск Optuna ---
study = optuna.create_study(sampler=TPESampler(), direction="minimize")
study.optimize(lambda trial: objective(trial, X_train_meta, y_val), n_trials=50)

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2025-06-10 16:52:29,090] A new study created in memory with name: no-name-0c512fc9-0c44-4fc9-81d3-885c8765d5e5
[I 2025-06-10 16:52:32,119] Trial 0 finished with value: 0.03143094764215023 and parameters: {'booster': 'gbtree', 'lambda': 0.11155448138495057, 'alpha': 1.110281756473099e-07, 'max_depth': 11, 'eta': 0.16151033328962303, 'subsample': 0.9308445629117826, 'colsample_bytree': 0.43537965258832245, 'min_child_weight': 15}. Best is trial 0 with value: 0.03143094764215023.
[I 2025-06-10 16:52:36,415] Trial 1 finished with value: 0.03174448566326978 and parameters: {'booster': 'gbtree', 'lambda': 0.0001915023072964698, 'alpha': 0.00030445052986450996, 'max_depth': 11, 'eta': 0.07050823546137755, 'subsample': 0.5163489136784537, 'colsample_bytree': 0.7001517087815887, 'min_child_weight': 11}. Best is trial 0 with value: 0.03143094764215023.
[I 2025-06-10 16:52:51,759] Trial 2 finished with value: 0.03365076305838401 and parameters: {'booster': 'dart', 'lambda': 9.016909758504511e-

Best trial:
  Value: 0.030383428262186422
  Params: 
    booster: gbtree
    lambda: 0.000364821394731696
    alpha: 3.781118902961473e-05
    max_depth: 3
    eta: 0.15766827802366298
    subsample: 0.8708989376150514
    colsample_bytree: 0.8477683817110591
    min_child_weight: 9


In [62]:
# --- Обучение с лучшими параметрами ---
best_params = study.best_params
best_params.update({
    "objective": "multi:softprob",
    "num_class": len(np.unique(y_val)),
    "eval_metric": "mlogloss",
    "verbosity": 0
})

dtrain_full = xgb.DMatrix(X_train_meta, label=y_val, missing=np.inf)
model = xgb.train(best_params, dtrain_full, num_boost_round=100)

# --- Предсказание на тесте ---
dtest = xgb.DMatrix(X_test_meta, missing=np.inf)
preds = model.predict(dtest)

In [63]:
# Предсказываем классы
pred_classes = np.argmax(preds, axis=1)

# Точность
acc = accuracy_score(y_test, pred_classes)

# F1-score (macro)
f1 = f1_score(y_test, pred_classes, average='weighted')

# Log loss
loss = log_loss(y_test, preds)

print(f"Accuracy: {acc:.4f}")
print(f"F1 Score (weighted): {f1:.4f}")
print(f"Log Loss: {loss:.4f}")

# Отчет по классификации
print(classification_report(y_test, pred_classes))

Accuracy: 0.9481
F1 Score (weighted): 0.9481
Log Loss: 0.2608
              precision    recall  f1-score   support

           0       0.95      0.92      0.94     19740
           1       0.96      0.96      0.96     38243
           2       0.92      0.95      0.94     18204

    accuracy                           0.95     76187
   macro avg       0.94      0.94      0.94     76187
weighted avg       0.95      0.95      0.95     76187



## Бэктест мета модели

Посмотрим на тестовой выборке, как модель поведет себя и будет ли она прибыльна. Используем библиотеку backtesting

In [71]:
result_df_test['signal'] = pred_classes

In [73]:
import warnings
warnings.filterwarnings("ignore")
result_df_test = prepare_data(result_df_test)
#Запускаем    
df_meta = bt_str(result_df_test)

                                                      

In [74]:
df_meta

Unnamed: 0,ABIO,ABRD,AFKS,AFLT,ALRS,APTK,AQUA,BANE,BANEP,BELU,...,RENI,VEON-RX,LENT,VKCO,POSI,TGKB,AKMM,TGKN,LQDT,GECO
Start,2021-02-19 00:00:00,2021-02-19 00:00:00,2021-02-19 00:00:00,2021-02-19 00:00:00,2021-02-19 00:00:00,2021-02-19 00:00:00,2021-02-19 00:00:00,2021-02-19 00:00:00,2021-02-19 00:00:00,2021-02-19 00:00:00,...,2022-10-18 00:00:00,2023-04-28 00:00:00,2023-05-16 00:00:00,2023-05-16 00:00:00,2023-05-19 00:00:00,2023-05-30 00:00:00,2023-06-08 00:00:00,2023-06-13 00:00:00,2023-06-19 00:00:00,2024-03-21 00:00:00
End,2025-05-31 00:00:00,2025-05-29 00:00:00,2025-05-31 00:00:00,2025-05-31 00:00:00,2025-05-31 00:00:00,2025-05-31 00:00:00,2025-05-31 00:00:00,2025-05-31 00:00:00,2025-05-31 00:00:00,2025-05-31 00:00:00,...,2025-05-31 00:00:00,2025-05-29 00:00:00,2025-05-31 00:00:00,2025-05-31 00:00:00,2025-05-31 00:00:00,2025-05-29 00:00:00,2025-05-29 00:00:00,2025-05-31 00:00:00,2025-05-29 00:00:00,2025-05-29 00:00:00
Duration,1562 days 00:00:00,1560 days 00:00:00,1562 days 00:00:00,1562 days 00:00:00,1562 days 00:00:00,1562 days 00:00:00,1562 days 00:00:00,1562 days 00:00:00,1562 days 00:00:00,1562 days 00:00:00,...,956 days 00:00:00,762 days 00:00:00,746 days 00:00:00,746 days 00:00:00,743 days 00:00:00,730 days 00:00:00,721 days 00:00:00,718 days 00:00:00,710 days 00:00:00,434 days 00:00:00
Exposure Time [%],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Equity Final [$],1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1115237.416,963621.199,1000000.0,...,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0
Equity Peak [$],1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,2133789.916,2033785.699,1000000.0,...,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0,1000000.0
Return [%],0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.523742,-3.63788,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Buy & Hold Return [%],109.342835,-22.683983,-57.013699,-0.825848,-52.387829,-28.020756,142.772277,12.854501,-2.072539,-87.614186,...,131.966351,49.004739,96.736914,-48.965652,-34.559944,0.0,36.529252,0.0,37.096774,-43.664635
Return (Ann.) [%],0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.859079,-0.953209,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Volatility (Ann.) [%],0.0,0.0,0.0,0.0,0.0,0.0,0.0,44.850702,52.178029,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Итоговое сравнение моделей

In [75]:
summary_meta = analyze_backtest_results(df_meta)

# Выводим результаты сравнения
comparison = pd.DataFrame({
    'META': summary_meta,
    'LSTM': summary_lstm,
    'Catboost': summary_transformer,
    'TSMixer': summary_tsmixer
})

print("📊 Сравнение моделей:")
print(comparison.T)

📊 Сравнение моделей:
         Total Trades Win Rate [%] Avg. Trade Duration Sharpe Ratio  \
META                0          NaN                 NaT    -0.068333   
LSTM                0          NaN                 NaT    -0.038574   
Catboost            0          NaN                 NaT    -0.104881   
TSMixer             0          NaN                 NaT    -0.336737   

         Sortino Ratio Total Return (%)  
META         -0.053877        -0.288116  
LSTM          0.038854         1.349754  
Catboost      -0.08584        -0.473082  
TSMixer      -0.246321        -1.395946  


Вывод: качество моделей считаю не плохим, lstm порадовала. Но на бектестинге мета модель сделала все равно минус. Скорее всего это из-за сильных просадках, на которые влияют новости. Хорошо бы добавить в качестве фичей информацию о новостях.