In [None]:
#import getpass

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
import os
import time
from datetime import datetime
import lightgbm as lgb

#Feature Engineering

In [None]:
import os

In [None]:
def load_all_from_csv(folder='crypto_csv'):
    data_dict = {}
    for fname in os.listdir(folder):
        if fname.endswith(".csv"):
            symbol = fname.replace(".csv", "").replace("_", "/")
            df = pd.read_csv(os.path.join(folder, fname), parse_dates=['timestamp'])
            data_dict[symbol] = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
    return data_dict

In [None]:
data_h = load_all_from_csv('crypto_csv_hourly')

In [None]:
data_h['BTC/USDT']

Unnamed: 0,timestamp,open,high,low,close,volume
0,2019-01-01 00:00:00,3701.23,3713.00,3689.88,3700.31,686.367420
1,2019-01-01 01:00:00,3700.20,3702.73,3684.22,3689.69,613.539115
2,2019-01-01 02:00:00,3689.67,3695.95,3675.04,3690.00,895.302181
3,2019-01-01 03:00:00,3690.00,3699.77,3685.78,3693.13,796.714818
4,2019-01-01 04:00:00,3692.32,3720.00,3685.94,3692.71,1317.452909
...,...,...,...,...,...,...
55626,2025-05-09 06:00:00,102979.35,103261.71,102928.77,103230.11,1025.817990
55627,2025-05-09 07:00:00,103230.11,104361.30,103127.76,103628.77,3530.711920
55628,2025-05-09 08:00:00,103628.76,103930.00,102652.33,102968.25,3123.084800
55629,2025-05-09 09:00:00,102968.25,103285.72,102421.00,102915.58,1946.173750


In [None]:
!pip install ta

Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=38e8167162716bdc406700bc3279685deaf298c9d775ca6a2038860fc0a970a4
  Stored in directory: /root/.cache/pip/wheels/a1/d7/29/7781cc5eb9a3659d032d7d15bdd0f49d07d2b24fec29f44bc4
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
import ta
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
def add_technical_indicators(df):
    # Скользящие средние
    df['SMA_7'] = ta.trend.sma_indicator(df['close'], window=7)
    df['SMA_21'] = ta.trend.sma_indicator(df['close'], window=21)
    df['EMA_7'] = ta.trend.ema_indicator(df['close'], window=7)
    df['EMA_21'] = ta.trend.ema_indicator(df['close'], window=21)

    # MACD
    df['MACD'] = ta.trend.macd(df['close'])
    df['MACD_signal'] = ta.trend.macd_signal(df['close'])
    df['MACD_diff'] = ta.trend.macd_diff(df['close'])

    # RSI
    df['RSI_14'] = ta.momentum.rsi(df['close'], window=14)

    # Bollinger Bands
    bb_indicator = ta.volatility.BollingerBands(close=df['close'], window=20, window_dev=2)
    df['BB_upper'] = bb_indicator.bollinger_hband()
    df['BB_middle'] = bb_indicator.bollinger_mavg()
    df['BB_lower'] = bb_indicator.bollinger_lband()

    # ATR (волатильность)
    df['ATR_14'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'], window=14)

    # Стохастик
    stoch = ta.momentum.StochasticOscillator(high=df['high'], low=df['low'], close=df['close'], window=14, smooth_window=3)
    df['STOCH_slowk'] = stoch.stoch()
    df['STOCH_slowd'] = stoch.stoch_signal()

    # OBV (объем)
    df['OBV'] = ta.volume.on_balance_volume(df['close'], df['volume'])

    # Добавляем CCI (Commodity Channel Index)
    # Рассчитаем Typical Price
    df['TP'] = (df['high'] + df['low'] + df['close']) / 3
    # SMA от Typical Price
    n = 20
    df['TP_SMA'] = df['TP'].rolling(window=n).mean()
    # Среднее абсолютное отклонение (MAD)
    df['TP_MAD'] = df['TP'].rolling(window=n).apply(lambda x: np.mean(np.abs(x - np.mean(x))), raw=True)
    # CCI
    df['CCI_20'] = (df['TP'] - df['TP_SMA']) / (0.015 * df['TP_MAD'])

    # Производные признаки
    df['return_1h'] = df['close'].pct_change()
    df['log_return'] = np.log(df['close'] / df['close'].shift(1))
    df['range'] = df['high'] - df['low']
    df['close_open_diff'] = df['close'] - df['open']

    # Временные признаки
    df['hour'] = df['timestamp'].dt.hour
    df['dayofweek'] = df['timestamp'].dt.dayofweek

    return df

In [None]:
from sklearn.feature_selection import RFE

def feature_selection_rfe(df, target_col='log_return', n_features_to_select=10):
    df = df.dropna()
    df['target'] = df[target_col].shift(-1)
    df = df.dropna()

    X = df.drop(columns=['target', 'timestamp'])
    y = df['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    selector = RFE(model, n_features_to_select=n_features_to_select, step=1)
    selector = selector.fit(X_train, y_train)

    selected_features = X.columns[selector.support_].tolist()
    print(f"Selected features by RFE ({len(selected_features)}): {selected_features}")

    # Обучаем модель на выбранных признаках
    model.fit(X_train[selected_features], y_train)
    preds = model.predict(X_test[selected_features])
    print(f"RMSE on test set: {mean_squared_error(y_test, preds, squared=False):.6f}")

    return df[selected_features + ['target']]


In [None]:
from sklearn.linear_model import LassoCV

def feature_selection_lasso(df, target_col='log_return', alpha=0.001):
    df = df.dropna()
    df['target'] = df[target_col].shift(-1)
    df = df.dropna()

    X = df.drop(columns=['target', 'timestamp'])
    y = df['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    model = LassoCV(alphas=[alpha], cv=5, random_state=42)
    model.fit(X_train, y_train)

    coef = pd.Series(model.coef_, index=X.columns)
    selected_features = coef[coef != 0].index.tolist()
    print(f"Selected features by Lasso ({len(selected_features)}): {selected_features}")

    preds = model.predict(X_test[selected_features])
    print(f"RMSE on test set: {mean_squared_error(y_test, preds, squared=False):.6f}")

    return df[selected_features + ['target']]


In [None]:
!pip install shap



In [None]:
import shap

def feature_selection_shap(df, target_col='log_return', top_n=10):
    df = df.dropna()
    df['target'] = df[target_col].shift(-1)
    df = df.dropna()

    X = df.drop(columns=['target', 'timestamp'])
    y = df['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_train)

    shap_importance = np.abs(shap_values).mean(axis=0)
    feature_importances = pd.Series(shap_importance, index=X.columns)
    feature_importances = feature_importances.sort_values(ascending=False)

    selected_features = feature_importances.head(top_n).index.tolist()
    print(f"Selected top {top_n} features by SHAP: {selected_features}")

    preds = model.predict(X_test[selected_features])
    print(f"RMSE on test set: {mean_squared_error(y_test, preds, squared=False):.6f}")

    return df[selected_features + ['target']]


In [None]:
def feature_selection(df, target_col='log_return'):
    df = df.dropna()
    df['target'] = df[target_col].shift(-1)
    df = df.dropna()

    X = df.drop(columns=['target', 'timestamp'])
    y = df['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    print(f"RMSE on test set: {mean_squared_error(y_test, preds) ** 0.5 :.6f}")

    feature_importances = pd.Series(model.feature_importances_, index=X.columns)
    feature_importances = feature_importances.sort_values(ascending=False)

    threshold = feature_importances.mean()
    selected_features = feature_importances[feature_importances > threshold].index.tolist()

    print("Selected features based on importance:")
    print(selected_features)

    return df[selected_features + ['target']]

In [None]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LassoCV

In [None]:
def feature_selection(df, target_col='log_return', rfe_features=10, lasso_alpha=0.001):
    df = df.dropna()
    df['target'] = df[target_col].shift(-1)
    df = df.dropna()

    X = df.drop(columns=['target', 'timestamp'])
    y = df['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    results = {}

    # 1. LightGBM feature importance
    lgb_train = lgb.Dataset(X_train, label=y_train)
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'n_jobs': -1,
        'seed': 42
    }
    lgb_model = lgb.train(params, lgb_train, num_boost_round=100)
    lgb_importance = pd.Series(lgb_model.feature_importance(importance_type='gain'), index=X.columns)
    lgb_importance = lgb_importance.sort_values(ascending=False)
    lgb_selected = lgb_importance[lgb_importance > lgb_importance.mean()].index.tolist()

    lgb_preds = lgb_model.predict(X_test)
    lgb_rmse = mean_squared_error(y_test, lgb_preds) ** 0.5
    results['LightGBM_Importance'] = {'features': lgb_selected, 'rmse': lgb_rmse}
    print(f"[LightGBM Importance] RMSE: {lgb_rmse:.6f}")
    print(f"[LightGBM Importance] Selected features ({len(lgb_selected)}): {lgb_selected}\n")

    # 2. RFE с LightGBM
    lgb_estimator = lgb.LGBMRegressor(n_estimators=100, n_jobs=-1, random_state=42)
    rfe_selector = RFE(estimator=lgb_estimator, n_features_to_select=rfe_features, step=5, verbose=1)
    rfe_selector = rfe_selector.fit(X_train, y_train)
    rfe_selected = X.columns[rfe_selector.support_].tolist()

    rfe_estimator = lgb.LGBMRegressor(n_estimators=100, n_jobs=-1, random_state=42)
    rfe_estimator.fit(X_train[rfe_selected], y_train)
    rfe_preds = rfe_estimator.predict(X_test[rfe_selected])
    rfe_rmse = mean_squared_error(y_test, rfe_preds) ** 0.5
    results['RFE_LightGBM'] = {'features': rfe_selected, 'rmse': rfe_rmse}
    print(f"[RFE LightGBM] RMSE: {rfe_rmse:.6f}")
    print(f"[RFE LightGBM] Selected features ({len(rfe_selected)}): {rfe_selected}\n")

    # 3. LassoCV
    lasso_model = LassoCV(alphas=[lasso_alpha], cv=5, random_state=42, max_iter=10000)
    lasso_model.fit(X_train, y_train)
    coef = pd.Series(lasso_model.coef_, index=X.columns)
    lasso_selected = coef[coef != 0].index.tolist()

    lasso_preds = lasso_model.predict(X_test)
    lasso_rmse = mean_squared_error(y_test, lasso_preds) ** 0.5
    results['LassoCV'] = {'features': lasso_selected, 'rmse': lasso_rmse}
    print(f"[LassoCV] RMSE: {lasso_rmse:.6f}")
    print(f"[LassoCV] Selected features ({len(lasso_selected)}): {lasso_selected}\n")

    return results, df

In [None]:
def process_pair(df_pair):
    df_pair['timestamp'] = pd.to_datetime(df_pair['timestamp'])
    df_pair = add_technical_indicators(df_pair)
    df_selected = feature_selection(df_pair, target_col='log_return')
    return df_selected

In [None]:
selected_data = {}

for pair, df_pair in data_h.items():
    print(f"Processing {pair}...")
    try:
        selected_df = process_pair(df_pair)
        selected_data[pair] = selected_df
        print(f"Finished processing {pair}. Selected features shape: {selected_df.shape}\n")
    except Exception as e:
        print(f"Error processing {pair}: {e}\n")

Processing XRP/USDT...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


[LightGBM Importance] RMSE: 0.011538
[LightGBM Importance] Selected features (15): ['volume', 'OBV', 'return_1h', 'STOCH_slowd', 'STOCH_slowk', 'range', 'TP_MAD', 'RSI_14', 'hour', 'MACD_signal', 'CCI_20', 'ATR_14', 'MACD', 'MACD_diff', 'close_open_diff']

Fitting estimator with 30 features.
Fitting estimator with 25 features.
Fitting estimator with 20 features.
Fitting estimator with 15 features.
[RFE LightGBM] RMSE: 0.011230
[RFE LightGBM] Selected features (10): ['volume', 'MACD_diff', 'RSI_14', 'ATR_14', 'STOCH_slowk', 'STOCH_slowd', 'OBV', 'TP_MAD', 'return_1h', 'range']

[LassoCV] RMSE: 0.010370
[LassoCV] Selected features (3): ['volume', 'OBV', 'CCI_20']

Error processing XRP/USDT: 'tuple' object has no attribute 'shape'

Processing TRX/USDT...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


[LightGBM Importance] RMSE: 0.007609
[LightGBM Importance] Selected features (15): ['volume', 'range', 'MACD_signal', 'STOCH_slowk', 'MACD_diff', 'ATR_14', 'OBV', 'return_1h', 'STOCH_slowd', 'CCI_20', 'TP_MAD', 'RSI_14', 'close_open_diff', 'hour', 'MACD']

Fitting estimator with 30 features.
Fitting estimator with 25 features.
Fitting estimator with 20 features.
Fitting estimator with 15 features.
[RFE LightGBM] RMSE: 0.006480
[RFE LightGBM] Selected features (10): ['volume', 'MACD_signal', 'MACD_diff', 'BB_lower', 'ATR_14', 'STOCH_slowd', 'OBV', 'CCI_20', 'return_1h', 'range']

[LassoCV] RMSE: 0.005827
[LassoCV] Selected features (3): ['volume', 'STOCH_slowd', 'OBV']

Error processing TRX/USDT: 'tuple' object has no attribute 'shape'

Processing BTC/USDT...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


[LightGBM Importance] RMSE: 0.005924
[LightGBM Importance] Selected features (14): ['ATR_14', 'TP_MAD', 'OBV', 'volume', 'STOCH_slowk', 'range', 'MACD', 'STOCH_slowd', 'CCI_20', 'return_1h', 'MACD_diff', 'RSI_14', 'MACD_signal', 'hour']

Fitting estimator with 30 features.
Fitting estimator with 25 features.
Fitting estimator with 20 features.
Fitting estimator with 15 features.
[RFE LightGBM] RMSE: 0.005896
[RFE LightGBM] Selected features (10): ['volume', 'MACD_diff', 'RSI_14', 'ATR_14', 'STOCH_slowk', 'STOCH_slowd', 'OBV', 'TP_MAD', 'CCI_20', 'return_1h']



  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(


[LassoCV] RMSE: 0.005662
[LassoCV] Selected features (20): ['open', 'high', 'close', 'volume', 'SMA_7', 'SMA_21', 'EMA_7', 'EMA_21', 'MACD', 'MACD_diff', 'BB_upper', 'BB_middle', 'BB_lower', 'OBV', 'TP', 'TP_SMA', 'TP_MAD', 'CCI_20', 'range', 'close_open_diff']

Error processing BTC/USDT: 'tuple' object has no attribute 'shape'

Processing SOL/USDT...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


[LightGBM Importance] RMSE: 0.009925
[LightGBM Importance] Selected features (14): ['volume', 'OBV', 'return_1h', 'ATR_14', 'RSI_14', 'MACD_diff', 'MACD_signal', 'STOCH_slowd', 'STOCH_slowk', 'TP_MAD', 'MACD', 'CCI_20', 'hour', 'range']

Fitting estimator with 30 features.
Fitting estimator with 25 features.
Fitting estimator with 20 features.
Fitting estimator with 15 features.
[RFE LightGBM] RMSE: 0.009945
[RFE LightGBM] Selected features (10): ['volume', 'MACD_diff', 'RSI_14', 'BB_lower', 'STOCH_slowk', 'STOCH_slowd', 'OBV', 'TP_MAD', 'CCI_20', 'return_1h']

[LassoCV] RMSE: 0.009762
[LassoCV] Selected features (5): ['close', 'volume', 'STOCH_slowd', 'OBV', 'CCI_20']

Error processing SOL/USDT: 'tuple' object has no attribute 'shape'

Processing ETH/USDT...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


[LightGBM Importance] RMSE: 0.007711
[LightGBM Importance] Selected features (14): ['volume', 'STOCH_slowd', 'ATR_14', 'STOCH_slowk', 'OBV', 'MACD_signal', 'RSI_14', 'range', 'hour', 'CCI_20', 'return_1h', 'MACD_diff', 'TP_MAD', 'MACD']

Fitting estimator with 30 features.
Fitting estimator with 25 features.
Fitting estimator with 20 features.
Fitting estimator with 15 features.
[RFE LightGBM] RMSE: 0.007722
[RFE LightGBM] Selected features (10): ['volume', 'MACD_diff', 'BB_lower', 'ATR_14', 'STOCH_slowk', 'STOCH_slowd', 'OBV', 'CCI_20', 'return_1h', 'hour']



  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


[LassoCV] RMSE: 0.007496
[LassoCV] Selected features (8): ['open', 'volume', 'SMA_7', 'OBV', 'TP', 'CCI_20', 'range', 'hour']

Error processing ETH/USDT: 'tuple' object has no attribute 'shape'



  model = cd_fast.enet_coordinate_descent(


# Новая попытка создать рабочую модель (от 17.05 16:20)

## test

In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from sklearn.feature_selection import RFE
from sklearn.linear_model import LassoCV
import torch.nn as nn
from torch.optim import Adam

In [None]:
import torch.nn as nn

class BiLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=2, dropout=0.2):
        super(BiLSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers,
                            batch_first=True, bidirectional=True, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * 2, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        out = self.dropout(out)
        out = self.fc(out)
        return out.squeeze()

In [None]:
class CryptoDataset(Dataset):
    def __init__(self, df, feature_cols, target_col='log_return', seq_len=24):
        self.seq_len = seq_len
        df = df.dropna(subset=feature_cols + [target_col]).reset_index(drop=True)
        self.scaler = StandardScaler()
        features = df[feature_cols].values
        self.features = self.scaler.fit_transform(features)
        self.targets = df[target_col].values

    def __len__(self):
        return len(self.targets) - self.seq_len

    def __getitem__(self, idx):
        x = self.features[idx:idx+self.seq_len]
        y = self.targets[idx + self.seq_len]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

In [None]:
from torch.optim import Adam
from sklearn.metrics import mean_squared_error

In [None]:
def inverse_transform(scaler, data_scaled, feature_index=0):
    """
    Обратное преобразование стандартизованных данных для одного признака.
    scaler: объект StandardScaler
    data_scaled: np.array или список значений в стандартизованном виде
    feature_index: индекс признака в scaler.mean_ и scaler.scale_
    """
    mean = scaler.mean_[feature_index]
    scale = scaler.scale_[feature_index]
    return data_scaled * scale + mean


def compute_financial_metrics(y_true_scaled, y_pred_scaled, scaler, target_feature_index=0):
    y_true = inverse_transform(scaler, y_true_scaled, target_feature_index)
    y_pred = inverse_transform(scaler, y_pred_scaled, target_feature_index)

    returns_true = np.diff(y_true) / y_true[:-1]
    returns_pred = np.diff(y_pred) / y_pred[:-1]
    directional_accuracy = np.mean(np.sign(returns_true) == np.sign(returns_pred))
    hit_ratio = directional_accuracy

    excess_returns = returns_pred
    sharpe_ratio = np.mean(excess_returns) / (np.std(excess_returns) + 1e-9) * np.sqrt(365*24)

    rmse = np.sqrt(np.mean((y_true - y_pred)**2))

    return {
        'Directional Accuracy': directional_accuracy,
        'Hit Ratio': hit_ratio,
        'Sharpe Ratio': sharpe_ratio,
        'RMSE': rmse
    }

In [None]:
def train_model(model, train_loader, val_loader, epochs=50, lr=1e-3, device='cuda'):
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = Adam(model.parameters(), lr=lr)
    best_val_loss = float('inf')
    patience = 10
    trigger_times = 0

    for epoch in range(epochs):
        model.train()
        train_losses = []
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            preds = model(x_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        model.eval()
        val_losses = []
        val_preds, val_targets = [], []
        with torch.no_grad():
            for x_val, y_val in val_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                preds = model(x_val)
                loss = criterion(preds, y_val)
                val_losses.append(loss.item())
                val_preds.extend(preds.cpu().numpy())
                val_targets.extend(y_val.cpu().numpy())

        avg_train_loss = np.mean(train_losses)
        avg_val_loss = np.mean(val_losses)
        val_rmse = np.sqrt(mean_squared_error(val_targets, val_preds))

        print(f"Epoch {epoch+1}: Train Loss={avg_train_loss:.6f}, Val Loss={avg_val_loss:.6f}, Val RMSE={val_rmse:.6f}")

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            trigger_times = 0
            torch.save(model.state_dict(), 'best_bilstm_model.pth')
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print("Early stopping triggered")
                break

In [None]:
def evaluate_model_with_metrics(model, test_loader, dataset, feature_index=0, device='cuda'):
    model.load_state_dict(torch.load('best_bilstm_model.pth'))
    model.to(device)
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for x_test, y_test in test_loader:
            x_test, y_test = x_test.to(device), y_test.to(device)
            pred = model(x_test)
            preds.extend(pred.cpu().numpy())
            targets.extend(y_test.cpu().numpy())

    preds = np.array(preds)
    targets = np.array(targets)

    scaler = dataset.scaler

    metrics = compute_financial_metrics(targets, preds, scaler, target_feature_index=feature_index)
    print("Evaluation metrics on denormalized data:")
    for k, v in metrics.items():
        print(f"{k}: {v:.6f}")

    return metrics

In [None]:
def ensure_close_feature(features, mandatory_feature='close'):
    if mandatory_feature not in features:
        features.append(mandatory_feature)
    return features
# --- Feature selection methods ---
def feature_selection_methods(df, target_col='log_return'):
    df = df.dropna()
    df['target'] = df[target_col].shift(-1)
    df = df.dropna()
    X = df.drop(columns=['target', 'timestamp'])
    y = df['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    results = {}

    # 1. LightGBM Importance
    lgb_train = lgb.Dataset(X_train, label=y_train)
    params = {'objective': 'regression', 'metric': 'rmse', 'verbosity': -1, 'boosting_type': 'gbdt', 'n_jobs': -1, 'seed': 42}
    lgb_model = lgb.train(params, lgb_train, num_boost_round=100)
    lgb_importance = pd.Series(lgb_model.feature_importance(importance_type='gain'), index=X.columns).sort_values(ascending=False)
    lgb_selected = lgb_importance[lgb_importance > lgb_importance.mean()].index.tolist()
    lgb_selected = ensure_close_feature(lgb_selected)
    results['LightGBM_Importance'] = lgb_selected

    # 2. RFE с LightGBM
    lgb_estimator = lgb.LGBMRegressor(n_estimators=100, n_jobs=-1, random_state=42)
    rfe_selector = RFE(estimator=lgb_estimator, n_features_to_select=min(10, len(X.columns)), step=5, verbose=0)
    rfe_selector = rfe_selector.fit(X_train, y_train)
    rfe_selected = X.columns[rfe_selector.support_].tolist()
    rfe_selected = ensure_close_feature(rfe_selected)
    results['RFE_LightGBM'] = rfe_selected

    # 3. LassoCV
    lasso_model = LassoCV(cv=5, random_state=42, max_iter=10000)
    lasso_model.fit(X_train, y_train)
    coef = pd.Series(lasso_model.coef_, index=X.columns)
    lasso_selected = coef[coef != 0].index.tolist()
    lasso_selected = ensure_close_feature(lasso_selected)
    results['LassoCV'] = lasso_selected

    return results

In [None]:
def run_pipeline(data_dict, seq_len=24, batch_size=64, device='cuda'):
    for coin, df in data_dict.items():
        print(f"\n=== Processing {coin} ===")
        feature_sets = feature_selection_methods(df)
        for method_name, features in feature_sets.items():
            print(f"\nMethod: {method_name}, selected features count: {len(features)}")
            if len(features) == 0:
                print("No features selected, skipping...")
                continue

            dataset = CryptoDataset(df, feature_cols=features, target_col='log_return', seq_len=seq_len)
            train_size = int(len(dataset) * 0.7)
            val_size = int(len(dataset) * 0.15)
            test_size = len(dataset) - train_size - val_size

            train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
            test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

            model = BiLSTMModel(input_dim=len(features)).to(device)
            print(f"Training BiLSTM on {coin} with features from {method_name}...")
            train_model(model, train_loader, val_loader, epochs=25, lr=1e-4, device=device)
            metrics = evaluate_model_with_metrics(model, test_loader, test_dataset.dataset, feature_index=features.index('close'), device=device)
            print(f"Final metrics for {coin} with {method_name}: {metrics}")

## Эксперименты с BiLSTM

In [None]:
run_pipeline(data_h, seq_len=96, batch_size=128, device='cuda')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)



=== Processing XRP/USDT ===

Method: LightGBM_Importance, selected features count: 16
Training BiLSTM on XRP/USDT with features from LightGBM_Importance...
Epoch 1: Train Loss=0.000454, Val Loss=0.000120, Val RMSE=0.011032
Epoch 2: Train Loss=0.000260, Val Loss=0.000116, Val RMSE=0.010824
Epoch 3: Train Loss=0.000205, Val Loss=0.000115, Val RMSE=0.010781
Epoch 4: Train Loss=0.000186, Val Loss=0.000114, Val RMSE=0.010769
Epoch 5: Train Loss=0.000168, Val Loss=0.000115, Val RMSE=0.010780
Epoch 6: Train Loss=0.000158, Val Loss=0.000114, Val RMSE=0.010747
Epoch 7: Train Loss=0.000152, Val Loss=0.000114, Val RMSE=0.010771
Epoch 8: Train Loss=0.000148, Val Loss=0.000114, Val RMSE=0.010767
Epoch 9: Train Loss=0.000145, Val Loss=0.000114, Val RMSE=0.010753
Epoch 10: Train Loss=0.000141, Val Loss=0.000114, Val RMSE=0.010760
Epoch 11: Train Loss=0.000139, Val Loss=0.000114, Val RMSE=0.010763
Epoch 12: Train Loss=0.000138, Val Loss=0.000114, Val RMSE=0.010756
Epoch 13: Train Loss=0.000137, Val L

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)



Method: LightGBM_Importance, selected features count: 16
Training BiLSTM on TRX/USDT with features from LightGBM_Importance...
Epoch 1: Train Loss=0.000430, Val Loss=0.000094, Val RMSE=0.009751
Epoch 2: Train Loss=0.000229, Val Loss=0.000089, Val RMSE=0.009495
Epoch 3: Train Loss=0.000174, Val Loss=0.000089, Val RMSE=0.009471
Epoch 4: Train Loss=0.000144, Val Loss=0.000088, Val RMSE=0.009450
Epoch 5: Train Loss=0.000128, Val Loss=0.000089, Val RMSE=0.009463
Epoch 6: Train Loss=0.000118, Val Loss=0.000088, Val RMSE=0.009436
Epoch 7: Train Loss=0.000112, Val Loss=0.000089, Val RMSE=0.009463
Epoch 8: Train Loss=0.000108, Val Loss=0.000088, Val RMSE=0.009434
Epoch 9: Train Loss=0.000103, Val Loss=0.000089, Val RMSE=0.009462
Epoch 10: Train Loss=0.000101, Val Loss=0.000088, Val RMSE=0.009447
Epoch 11: Train Loss=0.000099, Val Loss=0.000088, Val RMSE=0.009443
Epoch 12: Train Loss=0.000097, Val Loss=0.000088, Val RMSE=0.009432
Epoch 13: Train Loss=0.000096, Val Loss=0.000088, Val RMSE=0.0094

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(



Method: LightGBM_Importance, selected features count: 15
Training BiLSTM on BTC/USDT with features from LightGBM_Importance...
Epoch 1: Train Loss=0.000268, Val Loss=0.000056, Val RMSE=0.007516
Epoch 2: Train Loss=0.000127, Val Loss=0.000053, Val RMSE=0.007319
Epoch 3: Train Loss=0.000091, Val Loss=0.000053, Val RMSE=0.007283
Epoch 4: Train Loss=0.000073, Val Loss=0.000053, Val RMSE=0.007299
Epoch 5: Train Loss=0.000065, Val Loss=0.000052, Val RMSE=0.007265
Epoch 6: Train Loss=0.000060, Val Loss=0.000052, Val RMSE=0.007271
Epoch 7: Train Loss=0.000057, Val Loss=0.000052, Val RMSE=0.007271
Epoch 8: Train Loss=0.000055, Val Loss=0.000052, Val RMSE=0.007263
Epoch 9: Train Loss=0.000054, Val Loss=0.000052, Val RMSE=0.007270
Epoch 10: Train Loss=0.000053, Val Loss=0.000052, Val RMSE=0.007268
Epoch 11: Train Loss=0.000052, Val Loss=0.000052, Val RMSE=0.007261
Epoch 12: Train Loss=0.000051, Val Loss=0.000052, Val RMSE=0.007256
Epoch 13: Train Loss=0.000050, Val Loss=0.000052, Val RMSE=0.0072

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)



Method: LightGBM_Importance, selected features count: 15
Training BiLSTM on SOL/USDT with features from LightGBM_Importance...
Epoch 1: Train Loss=0.000591, Val Loss=0.000192, Val RMSE=0.013887
Epoch 2: Train Loss=0.000392, Val Loss=0.000188, Val RMSE=0.013720
Epoch 3: Train Loss=0.000317, Val Loss=0.000188, Val RMSE=0.013713
Epoch 4: Train Loss=0.000287, Val Loss=0.000186, Val RMSE=0.013644
Epoch 5: Train Loss=0.000263, Val Loss=0.000185, Val RMSE=0.013623
Epoch 6: Train Loss=0.000246, Val Loss=0.000185, Val RMSE=0.013633
Epoch 7: Train Loss=0.000237, Val Loss=0.000186, Val RMSE=0.013653
Epoch 8: Train Loss=0.000228, Val Loss=0.000186, Val RMSE=0.013642
Epoch 9: Train Loss=0.000223, Val Loss=0.000185, Val RMSE=0.013623
Epoch 10: Train Loss=0.000221, Val Loss=0.000185, Val RMSE=0.013620
Epoch 11: Train Loss=0.000216, Val Loss=0.000185, Val RMSE=0.013623
Epoch 12: Train Loss=0.000214, Val Loss=0.000185, Val RMSE=0.013615
Epoch 13: Train Loss=0.000212, Val Loss=0.000184, Val RMSE=0.0136

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)



Method: LightGBM_Importance, selected features count: 15
Training BiLSTM on ETH/USDT with features from LightGBM_Importance...
Epoch 1: Train Loss=0.000362, Val Loss=0.000081, Val RMSE=0.008916
Epoch 2: Train Loss=0.000195, Val Loss=0.000079, Val RMSE=0.008777
Epoch 3: Train Loss=0.000147, Val Loss=0.000079, Val RMSE=0.008787
Epoch 4: Train Loss=0.000122, Val Loss=0.000078, Val RMSE=0.008751
Epoch 5: Train Loss=0.000106, Val Loss=0.000078, Val RMSE=0.008723
Epoch 6: Train Loss=0.000098, Val Loss=0.000078, Val RMSE=0.008737
Epoch 7: Train Loss=0.000093, Val Loss=0.000078, Val RMSE=0.008711
Epoch 8: Train Loss=0.000089, Val Loss=0.000078, Val RMSE=0.008721
Epoch 9: Train Loss=0.000085, Val Loss=0.000078, Val RMSE=0.008710
Epoch 10: Train Loss=0.000084, Val Loss=0.000078, Val RMSE=0.008714
Epoch 11: Train Loss=0.000083, Val Loss=0.000078, Val RMSE=0.008705
Epoch 12: Train Loss=0.000082, Val Loss=0.000078, Val RMSE=0.008714
Epoch 13: Train Loss=0.000081, Val Loss=0.000078, Val RMSE=0.0087

## Теперь собираем гибрид

In [None]:
!pip install performer-pytorch

Collecting performer-pytorch
  Downloading performer_pytorch-1.1.4-py3-none-any.whl.metadata (763 bytes)
Collecting local-attention>=1.1.1 (from performer-pytorch)
  Downloading local_attention-1.11.1-py3-none-any.whl.metadata (907 bytes)
Collecting axial-positional-embedding>=0.1.0 (from performer-pytorch)
  Downloading axial_positional_embedding-0.3.12-py3-none-any.whl.metadata (4.3 kB)
Collecting hyper-connections>=0.1.8 (from local-attention>=1.1.1->performer-pytorch)
  Downloading hyper_connections-0.1.15-py3-none-any.whl.metadata (5.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.6->performer-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.6->performer-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.6->performer-pytorch)
  Down

In [None]:
from performer_pytorch import Performer

class BiLSTMPerformerHybrid(nn.Module):
    def __init__(self, input_dim, lstm_hidden=64, lstm_layers=2, performer_dim=128,
                 performer_depth=3, performer_heads=4, dropout=0.1):
        super().__init__()

        self.bilstm = nn.LSTM(input_dim, lstm_hidden, num_layers=lstm_layers,
                              batch_first=True, bidirectional=True, dropout=dropout)

        self.project = nn.Linear(lstm_hidden * 2, performer_dim)

        self.performer = Performer(
            dim=performer_dim,
            depth=performer_depth,
            heads=performer_heads,
            causal=False,
            nb_features=256,
            dropout=dropout
        )

        self.fc = nn.Sequential(
            nn.LayerNorm(performer_dim),
            nn.Linear(performer_dim, 1)
        )

    def forward(self, x):

        lstm_out, _ = self.bilstm(x)  # (batch, seq_len, lstm_hidden*2)

        proj = self.project(lstm_out)  # (batch, seq_len, performer_dim)

        performer_out = self.performer(proj)  # (batch, seq_len, performer_dim)

        last_hidden = performer_out[:, -1, :]  # (batch, performer_dim)

        out = self.fc(last_hidden).squeeze(-1)  # (batch,)
        return out

In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from sklearn.feature_selection import RFE
from sklearn.linear_model import LassoCV
import torch.nn as nn
from torch.optim import Adam
from performer_pytorch import Performer  # pip install performer-pytorch

class CryptoDataset(Dataset):
    def __init__(self, df, feature_cols, target_col='log_return', seq_len=24):
        self.seq_len = seq_len
        df = df.dropna(subset=feature_cols + [target_col]).reset_index(drop=True)
        self.scaler = StandardScaler()
        features = df[feature_cols].values
        self.features = self.scaler.fit_transform(features)
        self.targets = df[target_col].values

    def __len__(self):
        return len(self.targets) - self.seq_len

    def __getitem__(self, idx):
        x = self.features[idx:idx+self.seq_len]
        y = self.targets[idx + self.seq_len]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

class BiLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=2, dropout=0.2):
        super(BiLSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers,
                            batch_first=True, bidirectional=True, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * 2, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        out = self.dropout(out)
        out = self.fc(out)
        return out.squeeze()

class PerformerModel(nn.Module):
    def __init__(self, input_dim, performer_dim=128, performer_depth=3, performer_heads=4, dim_head=32):
        super(PerformerModel, self).__init__()
        self.project = nn.Linear(input_dim, performer_dim)
        self.performer = Performer(
            dim=performer_dim,
            depth=performer_depth,
            heads=performer_heads,
            dim_head=dim_head,
            causal=False,
            ff_dropout = 0.1,
            attn_dropout = 0.1
        )
        self.fc = nn.Sequential(
            nn.LayerNorm(performer_dim),
            nn.Linear(performer_dim, 1)
        )
    def forward(self, x):
        x = self.project(x)
        x = self.performer(x)
        x = x[:, -1, :]
        out = self.fc(x).squeeze(-1)
        return out

def train_model(model, train_loader, val_loader, epochs=50, lr=1e-3, device='cuda'):
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = Adam(model.parameters(), lr=lr)
    best_val_loss = float('inf')
    patience = 10
    trigger_times = 0

    for epoch in range(epochs):
        model.train()
        train_losses = []
        for x_batch, y_batch in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            preds = model(x_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        model.eval()
        val_losses = []
        val_preds, val_targets = [], []
        with torch.no_grad():
            for x_val, y_val in val_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                preds = model(x_val)
                loss = criterion(preds, y_val)
                val_losses.append(loss.item())
                val_preds.extend(preds.cpu().numpy())
                val_targets.extend(y_val.cpu().numpy())

        avg_train_loss = np.mean(train_losses)
        avg_val_loss = np.mean(val_losses)
        val_rmse = np.sqrt(mean_squared_error(val_targets, val_preds))

        print(f"Epoch {epoch+1}: Train Loss={avg_train_loss:.6f}, Val Loss={avg_val_loss:.6f}, Val RMSE={val_rmse:.6f}")

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            trigger_times = 0
            if isinstance(model, BiLSTMModel):
                torch.save(model.state_dict(), f'best_bilstm.pth')
            else:
                torch.save(model.state_dict(), f'best_performer.pth')
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print("Early stopping triggered")
                break

def inverse_transform(scaler, data_scaled, feature_index=0):
    mean = scaler.mean_[feature_index]
    scale = scaler.scale_[feature_index]
    return data_scaled * scale + mean

def compute_financial_metrics(y_true_scaled, y_pred_scaled, scaler, target_feature_index=0):
    y_true = inverse_transform(scaler, y_true_scaled, target_feature_index)
    y_pred = inverse_transform(scaler, y_pred_scaled, target_feature_index)

    returns_true = np.diff(y_true) / y_true[:-1]
    returns_pred = np.diff(y_pred) / y_pred[:-1]

    directional_accuracy = np.mean(np.sign(returns_true) == np.sign(returns_pred))
    hit_ratio = directional_accuracy

    sharpe_ratio = np.mean(returns_pred) / (np.std(returns_pred) + 1e-9) * np.sqrt(365*24)

    rmse = np.sqrt(np.mean((y_true - y_pred)**2))

    return {
        'Directional Accuracy': directional_accuracy,
        'Hit Ratio': hit_ratio,
        'Sharpe Ratio': sharpe_ratio,
        'RMSE': rmse
    }

def predict_model(model, model_type, data_loader, device='cuda'):
    if model_type == 'bilstm':
        path = f'best_bilstm.pth'
    else:
        path = f'best_performer.pth'

    model.load_state_dict(torch.load(path))
    model.to(device)
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            pred = model(x)
            preds.extend(pred.cpu().numpy())
            targets.extend(y.cpu().numpy())
    return np.array(preds), np.array(targets)

# --- Feature selection with RFE ---
def select_features_rfe(df, target_col='log_return', n_features=10):
    df = df.dropna()
    df['target'] = df[target_col].shift(-1)
    df = df.dropna()
    X = df.drop(columns=['target', 'timestamp'])
    y = df['target']

    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, shuffle=False)

    lgb_estimator = lgb.LGBMRegressor(n_estimators=100, n_jobs=-1, random_state=42)
    rfe_selector = RFE(estimator=lgb_estimator, n_features_to_select=n_features, step=5, verbose=0)
    rfe_selector.fit(X_train, y_train)
    selected_features = X.columns[rfe_selector.support_].tolist()

    if 'close' not in selected_features:
        selected_features.append('close')

    return selected_features

def run_hybrid_pipeline(data_dict, seq_len=24, batch_size=64, device='cuda'):
    for coin, df in data_dict.items():
        print(f"\n=== Processing {coin} ===")

        features = select_features_rfe(df, n_features=10)
        print(f"Selected features (RFE): {features}")

        dataset = CryptoDataset(df, feature_cols=features, target_col='log_return', seq_len=seq_len)
        train_size = int(len(dataset) * 0.7)
        val_size = int(len(dataset) * 0.15)
        test_size = len(dataset) - train_size - val_size

        train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        bilstm = BiLSTMModel(input_dim=len(features)).to(device)
        print("Training BiLSTM...")
        train_model(bilstm, train_loader, val_loader, epochs=15, lr=1e-3, device=device)
        preds_val_bilstm, y_val = preds_val_bilstm, y_val = predict_model(bilstm, 'bilstm', val_loader, device=device)

        performer = PerformerModel(input_dim=len(features)).to(device)
        print("Training Performer...")
        train_model(performer, train_loader, val_loader, epochs=15, lr=1e-3, device=device)
        preds_val_performer, _ = predict_model(performer, 'performer', val_loader, device=device)

        # Вычисляем оптимальный вес объединения на валидации
        errors_bilstm = preds_val_bilstm - y_val
        errors_performer = preds_val_performer - y_val

        sigma1 = np.var(errors_bilstm)
        sigma2 = np.var(errors_performer)
        rho = np.corrcoef(errors_bilstm, errors_performer)[0,1]

        w_opt = (sigma2 - rho * np.sqrt(sigma1) * np.sqrt(sigma2)) / (sigma1 + sigma2 - 2 * rho * np.sqrt(sigma1) * np.sqrt(sigma2))
        w_opt = np.clip(w_opt, 0, 0.95)
        print(f"Optimal ensemble weight w: {w_opt:.4f}")

        preds_test_bilstm, y_test = predict_model(bilstm, 'bilstm', test_loader, device=device)
        preds_test_performer, _ = predict_model(performer,'performer', test_loader, device=device)

        combined_preds = w_opt * preds_test_bilstm + (1 - w_opt) * preds_test_performer

        scaler = dataset.scaler
        price_index = features.index('close')  # индекс для денормализации по цене
        metrics = compute_financial_metrics(y_test, preds_test_bilstm, scaler, price_index)
        print(f"Final metrics for {coin} (BiLSTM model):")
        for k, v in metrics.items():
            print(f"{k}: {v:.6f}")
        metrics = compute_financial_metrics(y_test, preds_test_performer, scaler, price_index)
        print(f"Final metrics for {coin} (Performer model):")
        for k, v in metrics.items():
            print(f"{k}: {v:.6f}")
        metrics = compute_financial_metrics(y_test, combined_preds, scaler, price_index)
        print(f"Final metrics for {coin} (hybrid model):")
        for k, v in metrics.items():
            print(f"{k}: {v:.6f}")



In [None]:
run_hybrid_pipeline(data_h, seq_len=240, batch_size=128, device='cuda')


=== Processing XRP/USDT ===


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


Selected features (RFE): ['volume', 'MACD_diff', 'RSI_14', 'ATR_14', 'STOCH_slowk', 'STOCH_slowd', 'OBV', 'TP_MAD', 'return_1h', 'range', 'close']
Training BiLSTM...
Epoch 1: Train Loss=0.000172, Val Loss=0.000130, Val RMSE=0.011387
Epoch 2: Train Loss=0.000127, Val Loss=0.000126, Val RMSE=0.011237
Epoch 3: Train Loss=0.000123, Val Loss=0.000126, Val RMSE=0.011216
Epoch 4: Train Loss=0.000122, Val Loss=0.000126, Val RMSE=0.011208
Epoch 5: Train Loss=0.000122, Val Loss=0.000125, Val RMSE=0.011184
Epoch 6: Train Loss=0.000121, Val Loss=0.000125, Val RMSE=0.011183
Epoch 7: Train Loss=0.000121, Val Loss=0.000125, Val RMSE=0.011176
Epoch 8: Train Loss=0.000121, Val Loss=0.000125, Val RMSE=0.011183
Epoch 9: Train Loss=0.000121, Val Loss=0.000125, Val RMSE=0.011174
Epoch 10: Train Loss=0.000121, Val Loss=0.000125, Val RMSE=0.011166
Epoch 11: Train Loss=0.000121, Val Loss=0.000125, Val RMSE=0.011176
Epoch 12: Train Loss=0.000121, Val Loss=0.000125, Val RMSE=0.011164
Epoch 13: Train Loss=0.0001

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


Selected features (RFE): ['volume', 'MACD_signal', 'MACD_diff', 'BB_lower', 'ATR_14', 'STOCH_slowd', 'OBV', 'CCI_20', 'return_1h', 'range', 'close']
Training BiLSTM...
Epoch 1: Train Loss=0.000216, Val Loss=0.000106, Val RMSE=0.010278
Epoch 2: Train Loss=0.000104, Val Loss=0.000104, Val RMSE=0.010224
Epoch 3: Train Loss=0.000093, Val Loss=0.000104, Val RMSE=0.010187
Epoch 4: Train Loss=0.000090, Val Loss=0.000103, Val RMSE=0.010170
Epoch 5: Train Loss=0.000088, Val Loss=0.000103, Val RMSE=0.010143
Epoch 6: Train Loss=0.000087, Val Loss=0.000103, Val RMSE=0.010149
Epoch 7: Train Loss=0.000086, Val Loss=0.000103, Val RMSE=0.010134
Epoch 8: Train Loss=0.000086, Val Loss=0.000103, Val RMSE=0.010138
Epoch 9: Train Loss=0.000086, Val Loss=0.000103, Val RMSE=0.010153
Epoch 10: Train Loss=0.000085, Val Loss=0.000103, Val RMSE=0.010155
Epoch 11: Train Loss=0.000085, Val Loss=0.000103, Val RMSE=0.010157
Epoch 12: Train Loss=0.000085, Val Loss=0.000102, Val RMSE=0.010128
Epoch 13: Train Loss=0.00

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


Selected features (RFE): ['volume', 'MACD_diff', 'RSI_14', 'ATR_14', 'STOCH_slowk', 'STOCH_slowd', 'OBV', 'TP_MAD', 'CCI_20', 'return_1h', 'close']
Training BiLSTM...
Epoch 1: Train Loss=0.000159, Val Loss=0.000050, Val RMSE=0.007090
Epoch 2: Train Loss=0.000060, Val Loss=0.000050, Val RMSE=0.007064
Epoch 3: Train Loss=0.000055, Val Loss=0.000050, Val RMSE=0.007057
Epoch 4: Train Loss=0.000052, Val Loss=0.000050, Val RMSE=0.007057
Epoch 5: Train Loss=0.000052, Val Loss=0.000050, Val RMSE=0.007042
Epoch 6: Train Loss=0.000051, Val Loss=0.000050, Val RMSE=0.007044
Epoch 7: Train Loss=0.000051, Val Loss=0.000050, Val RMSE=0.007036
Epoch 8: Train Loss=0.000050, Val Loss=0.000049, Val RMSE=0.007034
Epoch 9: Train Loss=0.000050, Val Loss=0.000049, Val RMSE=0.007032
Epoch 10: Train Loss=0.000050, Val Loss=0.000049, Val RMSE=0.007034
Epoch 11: Train Loss=0.000050, Val Loss=0.000049, Val RMSE=0.007029
Epoch 12: Train Loss=0.000050, Val Loss=0.000049, Val RMSE=0.007028
Epoch 13: Train Loss=0.000

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


Selected features (RFE): ['volume', 'MACD_diff', 'RSI_14', 'BB_lower', 'STOCH_slowk', 'STOCH_slowd', 'OBV', 'TP_MAD', 'CCI_20', 'return_1h', 'close']
Training BiLSTM...
Epoch 1: Train Loss=0.000263, Val Loss=0.000216, Val RMSE=0.014671
Epoch 2: Train Loss=0.000201, Val Loss=0.000206, Val RMSE=0.014340
Epoch 3: Train Loss=0.000194, Val Loss=0.000206, Val RMSE=0.014325
Epoch 4: Train Loss=0.000192, Val Loss=0.000205, Val RMSE=0.014301
Epoch 5: Train Loss=0.000191, Val Loss=0.000205, Val RMSE=0.014301
Epoch 6: Train Loss=0.000190, Val Loss=0.000205, Val RMSE=0.014297
Epoch 7: Train Loss=0.000190, Val Loss=0.000205, Val RMSE=0.014288
Epoch 8: Train Loss=0.000190, Val Loss=0.000205, Val RMSE=0.014276
Epoch 9: Train Loss=0.000190, Val Loss=0.000205, Val RMSE=0.014276
Epoch 10: Train Loss=0.000190, Val Loss=0.000205, Val RMSE=0.014271
Epoch 11: Train Loss=0.000189, Val Loss=0.000205, Val RMSE=0.014275
Epoch 12: Train Loss=0.000189, Val Loss=0.000205, Val RMSE=0.014280
Epoch 13: Train Loss=0.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target'] = df[target_col].shift(-1)


Selected features (RFE): ['volume', 'MACD_diff', 'BB_lower', 'ATR_14', 'STOCH_slowk', 'STOCH_slowd', 'OBV', 'CCI_20', 'return_1h', 'hour', 'close']
Training BiLSTM...
Epoch 1: Train Loss=0.000193, Val Loss=0.000073, Val RMSE=0.008546
Epoch 2: Train Loss=0.000099, Val Loss=0.000071, Val RMSE=0.008453
Epoch 3: Train Loss=0.000090, Val Loss=0.000072, Val RMSE=0.008493
Epoch 4: Train Loss=0.000086, Val Loss=0.000072, Val RMSE=0.008462
Epoch 5: Train Loss=0.000084, Val Loss=0.000072, Val RMSE=0.008458
Epoch 6: Train Loss=0.000082, Val Loss=0.000071, Val RMSE=0.008404
Epoch 7: Train Loss=0.000081, Val Loss=0.000071, Val RMSE=0.008412
Epoch 8: Train Loss=0.000080, Val Loss=0.000071, Val RMSE=0.008409
Epoch 9: Train Loss=0.000080, Val Loss=0.000071, Val RMSE=0.008400
Epoch 10: Train Loss=0.000080, Val Loss=0.000071, Val RMSE=0.008401
Epoch 11: Train Loss=0.000079, Val Loss=0.000071, Val RMSE=0.008406
Epoch 12: Train Loss=0.000079, Val Loss=0.000071, Val RMSE=0.008401
Epoch 13: Train Loss=0.000

## Учимся прогнозировать на 6 часов вперед

In [None]:
import matplotlib.pyplot as plt
import os

def plot_training_curves(train_loss, val_loss, coin, output_dir='plots'):
    os.makedirs(output_dir, exist_ok=True)
    plt.figure(figsize=(10, 6))
    plt.plot(train_loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.title(f'{coin} Training/Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('MSE Loss')
    plt.legend()
    plt.savefig(os.path.join(output_dir, f'{coin}_training_curves.png'))
    plt.close()

def plot_predictions_vs_actuals_full(true_prices, pred_prices, timestamps, coin, max_points=150, output_dir='plots'):
    os.makedirs(output_dir, exist_ok=True)

    if len(true_prices) > max_points:
        true_prices = true_prices[-max_points:]
        pred_prices = pred_prices[-max_points:]
        timestamps = timestamps[-max_points:]

    plt.figure(figsize=(14, 7))
    plt.plot(timestamps, true_prices, label='Реальная цена', linewidth=2)
    plt.plot(timestamps, pred_prices, label='Прогноз цены', linestyle='--')

    plt.xlabel('Время')
    plt.ylabel('Цена')

    ymin = min(min(true_prices), min(pred_prices)) * 0.95
    ymax = max(max(true_prices), max(pred_prices)) * 1.05
    plt.ylim(ymin, ymax)

    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f'{coin}_actual_vs_predicted.png'))
    plt.close()

In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
import lightgbm as lgb
import torch.nn as nn
from torch.optim import Adam
from performer_pytorch import Performer

In [None]:
def create_multi_step_targets(df, target_col='log_return', horizon=6):
    for i in range(1, horizon + 1):
        df[f'target_t+{i}'] = df[target_col].shift(-i)
    df = df.dropna().reset_index(drop=True)
    return df

def select_features_rfe(df, target_col='log_return', n_features=10):
    df = df.dropna()
    df['target'] = df[target_col].shift(-1)
    df = df.dropna()
    X = df.drop(columns=['target', 'timestamp'])
    y = df['target']

    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, shuffle=False)

    lgb_estimator = lgb.LGBMRegressor(n_estimators=100, n_jobs=-1, random_state=42)
    rfe_selector = RFE(estimator=lgb_estimator, n_features_to_select=n_features, step=5)
    rfe_selector.fit(X_train, y_train)
    selected_features = X.columns[rfe_selector.support_].tolist()

    if 'close' not in selected_features:
        selected_features.append('close')

    return selected_features

In [None]:
class MultiStepDataset(torch.utils.data.Dataset):
    def __init__(self, df, feature_cols, target_col, seq_len=120):
        self.seq_len = seq_len
        self.df = df.reset_index(drop=True)

        # Отдельно обрабатываем close для правильного восстановления
        self.close = df['close'].values
        features = df[feature_cols].values

        self.scaler = StandardScaler()
        self.features = self.scaler.fit_transform(features)

        self.targets = df[target_col].values

    def __getitem__(self, idx):
        x = self.features[idx:idx+self.seq_len]
        y = self.targets[idx+self.seq_len]
        close_price = self.close[idx+self.seq_len-1]  # Последняя цена в окне
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32), close_price


In [None]:
class HybridPerformerBiLSTM(nn.Module):
    def __init__(self, input_dim, horizon=6, lstm_hidden=64, lstm_layers=2,
                 performer_dim=128, performer_depth=3, performer_heads=4, dim_head=32, dropout=0.1):
        super().__init__()
        # BiLSTM
        self.bilstm = nn.LSTM(input_dim, lstm_hidden, num_layers=lstm_layers,
                              batch_first=True, bidirectional=True, dropout=dropout)
        self.bilstm_fc = nn.Linear(lstm_hidden * 2, horizon)

        # Performer
        self.project = nn.Linear(input_dim, performer_dim)
        self.performer = Performer(
            dim=performer_dim,
            depth=performer_depth,
            heads=performer_heads,
            dim_head=dim_head,
            causal=False,
            ff_dropout = 0.1,
            attn_dropout = 0.1
        )
        self.performer_fc = nn.Linear(performer_dim, horizon)

        self.weight = nn.Parameter(torch.tensor(0.5))
    def forward(self, x):
        lstm_out, _ = self.bilstm(x)
        lstm_last = lstm_out[:, -1, :]
        lstm_pred = self.bilstm_fc(lstm_last)

        proj = self.project(x)
        performer_out = self.performer(proj)
        performer_last = performer_out[:, -1, :]
        performer_pred = self.performer_fc(performer_last)

        w = torch.sigmoid(self.weight)
        ц = np.clip(w, 0, 0.95)
        out = w * lstm_pred + (1 - w) * performer_pred
        return out

In [None]:
def train_model(model, train_loader, val_loader, epochs=5, lr=1e-4, device='cuda'):
    import numpy as np
    import torch

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()
    model.to(device)

    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        model.train()
        running_train_loss = 0.0
        num_train_batches = 0

        for x_batch, y_batch, *_ in train_loader:
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(x_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()

            running_train_loss += loss.item()
            num_train_batches += 1

        avg_train_loss = running_train_loss / num_train_batches
        train_losses.append(avg_train_loss)

        model.eval()
        running_val_loss = 0.0
        num_val_batches = 0

        with torch.no_grad():
            for x_val, y_val, *_ in val_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                output = model(x_val)
                loss = criterion(output, y_val)
                running_val_loss += loss.item()
                num_val_batches += 1

        avg_val_loss = running_val_loss / num_val_batches
        val_losses.append(avg_val_loss)

        print(f"Epoch {epoch+1}/{epochs} Train Loss: {avg_train_loss:.6f} Val Loss: {avg_val_loss:.6f}")

    return train_losses, val_losses

In [None]:
def inverse_transform(scaler, data_scaled, feature_index=0):
    mean = scaler.mean_[feature_index]
    scale = scaler.scale_[feature_index]
    return data_scaled * scale + mean

def sharpe_ratio(returns, risk_free_rate=0.0):
    excess_returns = returns - risk_free_rate
    mean_excess_return = np.mean(excess_returns)
    std_excess_return = np.std(excess_returns) + 1e-9  # чтобы избежать деления на 0
    return mean_excess_return / std_excess_return * np.sqrt(365*24)

def compute_metrics_multistep(y_true_logr, y_pred_logr, initial_prices):
    horizon = y_true_logr.shape[1]
    metrics_per_step = []
    for i in range(horizon):
        y_true = initial_prices * np.exp(y_true_logr[:, i])
        y_pred = initial_prices * np.exp(y_pred_logr[:, i])

        rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
        returns_true = (y_true - initial_prices) / initial_prices
        returns_pred = (y_pred - initial_prices) / initial_prices

        da = np.mean(np.sign(returns_true) == np.sign(returns_pred))
        sr = sharpe_ratio(returns_pred)

        metrics_per_step.append({
            'step': i + 1,
            'RMSE': rmse,
            'Directional Accuracy': da,
            'Sharpe Ratio': sr
        })
    return metrics_per_step

In [None]:
def evaluate_model(model, data_loader, dataset, df, coin, seq_len=120, device='cuda'):

    model.eval()
    preds, targets, last_prices = [], [], []
    indices = []

    with torch.no_grad():
        for i, (x, y, close_price) in enumerate(data_loader):
            x, y = x.to(device), y.to(device)
            pred = model(x)

            preds.append(pred.cpu().numpy())
            targets.append(y.cpu().numpy())
            last_prices.append(close_price.numpy())

            batch_start = len(train_dataset) + len(val_dataset) + i * x.shape[0]
            indices.extend(list(range(batch_start, batch_start + x.shape[0])))

    # horizon=1
    preds = np.vstack(preds)[:, 0]
    targets = np.vstack(targets)[:, 0]
    last_prices = np.concatenate(last_prices)

    true_prices = last_prices * np.exp(targets)
    pred_prices = last_prices * np.exp(preds)

    timestamps = df.iloc[indices]['timestamp'].values

    min_len = min(len(true_prices), len(pred_prices), len(timestamps))
    true_prices = true_prices[:min_len]
    pred_prices = pred_prices[:min_len]
    timestamps = timestamps[:min_len]

    plot_predictions_vs_actuals_full(true_prices, pred_prices, timestamps, coin.replace('/', '_'))

    return compute_metrics_multistep(targets[:, None], preds[:, None], last_prices)


In [None]:
def plot_predictions_vs_actuals_full(true_prices, pred_prices, timestamps, coin, max_points=150, output_dir='plots'):
    os.makedirs(output_dir, exist_ok=True)

    timestamps = pd.to_datetime(timestamps)

    plot_df = pd.DataFrame({
        'timestamp': timestamps,
        'Actual': true_prices,
        'Predicted': pred_prices
    }).set_index('timestamp')

    if len(plot_df) > max_points:
        plot_df = plot_df.iloc[-max_points:]

    plt.figure(figsize=(16, 8))

    plt.plot(plot_df.index, plot_df['Actual'], label='Actual Price', linewidth=2, alpha=0.8)
    plt.plot(plot_df.index, plot_df['Predicted'], label='Predicted Price', linestyle='--', linewidth=1.5)

    plt.title(f'{coin} почасовые данные')
    plt.xlabel('Время')
    plt.ylabel('Цена ($)')

    plt.gcf().autofmt_xdate()
    plt.grid(True, which='both', linestyle='--', alpha=0.5)

    plt.legend()
    plt.tight_layout()

    plt.savefig(os.path.join(output_dir, f'{coin}_actual_vs_predicted.png'), dpi=300)
    plt.close()

In [None]:
def run_pipeline_all_coins(data_dict, target_col='log_return', horizon=6, seq_len=120, batch_size=64, device='cuda'):
    for coin, df in data_dict.items():
        print(f"\n=== Processing {coin} ===")

        df = create_multi_step_targets(df, target_col=target_col, horizon=horizon)
        target_cols = [f'target_t+{i}' for i in range(1, horizon+1)]
        feature_cols = [col for col in df.columns if col not in target_cols]

        features = select_features_rfe(df[feature_cols], target_col=target_col, n_features=10)
        if 'close' not in features:
            features.append('close')

        dataset = MultiStepDataset(
            df=df,
            feature_cols=features,
            target_col=target_cols,
            seq_len=seq_len
        )
        train_size = int(len(dataset) * 0.7)
        val_size = int(len(dataset) * 0.15)
        test_size = len(dataset) - train_size - val_size

        indices = np.arange(len(dataset))
        train_indices = indices[:train_size]
        val_indices = indices[train_size:train_size+val_size]
        test_indices = indices[train_size+val_size:]

        train_dataset = torch.utils.data.Subset(dataset, train_indices)
        val_dataset = torch.utils.data.Subset(dataset, val_indices)
        test_dataset = torch.utils.data.Subset(dataset, test_indices)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        model = HybridPerformerBiLSTM(
            input_dim=len(features),
            horizon=horizon
        ).to(device)

        train_loss, val_loss = train_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            epochs=15,
            lr=1e-4,
            device=device
        )

        plot_training_curves(train_loss, val_loss, coin.replace('/', '_'))

        metrics = evaluate_model(
            model=model,
            data_loader=test_loader,
            dataset=dataset,
            df=df,
            coin=coin,
            seq_len=seq_len,
            device=device
        )

        print(f"\nMetrics for {coin}:")
        for m in metrics:
            print(f"Step {m['step']}:")
            print(f"  RMSE: {m['RMSE']:.4f}")
            print(f"  DA: {m['Directional Accuracy']:.2%}")
            print(f"  Sharpe: {m['Sharpe Ratio']:.2f}\n")

In [None]:
os.makedirs('plots', exist_ok=True)

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
run_pipeline_all_coins(data_h, target_col='log_return', horizon=1, seq_len=360, batch_size=64, device='cuda')