## 1ая попытка

In [None]:
import os
import pandas as pd
import numpy as np
import joblib
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset, WeightedRandomSampler
from sklearn.preprocessing import RobustScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, balanced_accuracy_score
from torch.optim.lr_scheduler import CosineAnnealingLR

# ========== 0. Параметры ==========
SYMBOL        = 'BTC/USDT'
TIMEFRAMES    = {'short':'15m','medium':'1h','long':'4h'}
DATA_DIR      = 'csv_data'

HORIZON_STEPS = {'short':12,'medium':24,'long':42}
UP_TH, DOWN_TH = 0.005, -0.005

FEATURES = [
    'open','high','low','close','volume','return',
    'EMA_20','EMA_50','RSI','ATR','ADX','CCI',
    'Stoch_K','Stoch_D','hour','weekday'
]
LOOKBACK    = 60
EPOCHS      = 40
BATCH_SIZE  = 32
LR_INITIAL  = 1e-3
DEVICE      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ========== 1. Load CSVs и генерация меток ==========
raw = {}
for horizon, tf_name in TIMEFRAMES.items():
    path = os.path.join(DATA_DIR, f"{SYMBOL.replace('/','_')}_{tf_name}.csv")
    df = pd.read_csv(path, parse_dates=['timestamp'])
    fut = df['close'].shift(-HORIZON_STEPS[horizon])
    pct = (fut - df['close']) / df['close']
    df['label'] = np.where(pct > UP_TH, 0, np.where(pct < DOWN_TH, 2, 1))
    df.dropna(inplace=True)
    raw[horizon] = df.reset_index(drop=True)
    dist = raw[horizon]['label'].value_counts(normalize=True).round(3).to_dict()
    print(f"{horizon:6} {raw[horizon].shape} dist: {dist}")

# ========== 2. Построение выборок, RobustScaler, class_weight ==========
datasets = {}
scalers = {}
class_weights = {}

for horizon, df in raw.items():
    sc = RobustScaler()
    X_all = sc.fit_transform(df[FEATURES])
    seqs, labs = [], []
    for i in range(len(df) - LOOKBACK):
        seqs.append(X_all[i:i+LOOKBACK])
        labs.append(int(df['label'].iloc[i+LOOKBACK]))
    X_arr = np.array(seqs)
    y_arr = np.array(labs)

    classes_arr = np.unique(y_arr)
    cw = compute_class_weight(
        class_weight='balanced',
        classes=classes_arr,
        y=y_arr
    )
    class_weights[horizon] = dict(zip(classes_arr, cw))

    datasets[horizon] = (X_arr, y_arr)
    scalers[horizon] = sc
    print(f"{horizon:6} X={X_arr.shape} y={y_arr.shape} cw={class_weights[horizon]}")

# ========== 3. Focal Loss ==========
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        super().__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = reduction

    def forward(self, logits, targets):
        ce = F.cross_entropy(logits, targets, weight=self.alpha, reduction='none')
        p = F.softmax(logits, dim=1)
        pt = p.gather(1, targets.unsqueeze(1)).squeeze(1)
        loss = (1 - pt) ** self.gamma * ce
        return loss.mean() if self.reduction=='mean' else loss.sum()

# ========== 4. Positional Encoding ==========
class PositionalEncoding(nn.Module):
    def __init__(self, seq_len, d_model):
        super().__init__()
        pos = torch.arange(seq_len).unsqueeze(1).float()
        i   = torch.arange(d_model).unsqueeze(0).float()
        angles = pos / (10000 ** ((2*(i//2)) / d_model))
        pe = torch.zeros(seq_len, d_model)
        pe[:, 0::2] = torch.sin(angles[:, 0::2])
        pe[:, 1::2] = torch.cos(angles[:, 1::2])
        self.register_buffer('pe', pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :]

# ========== 5. Гибридная модель ==========
class HybridTransformer(nn.Module):
    def __init__(self, seq_len, d_model, num_classes=3):
        super().__init__()
        self.conv1 = nn.Conv1d(d_model, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(64, 64, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool1d(2)
        self.lstm  = nn.LSTM(64, 32, batch_first=True, bidirectional=True)
        self.drop_lstm = nn.Dropout(0.2)
        self.proj  = nn.Linear(64, d_model)
        self.pos_enc = PositionalEncoding(seq_len//2, d_model)

        self.transformer_blocks = nn.ModuleList()
        for _ in range(3):
            self.transformer_blocks.append(nn.ModuleDict({
                'mha': nn.MultiheadAttention(d_model, num_heads=4, batch_first=True),
                'ln1': nn.LayerNorm(d_model),
                'ffn': nn.Sequential(nn.Linear(d_model,256), nn.ReLU(), nn.Linear(256,d_model)),
                'ln2': nn.LayerNorm(d_model),
                'drop': nn.Dropout(0.1)
            }))

        self.pool2 = nn.AdaptiveAvgPool1d(1)
        self.head  = nn.Sequential(
            nn.Linear(d_model,128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128,num_classes)
        )

    def forward(self, x):
        x = x.transpose(1,2)  # (B, d_model, T)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool1(x)     # (B,64,T/2)
        x = x.transpose(1,2)  # (B,T/2,64)
        x,_ = self.lstm(x)    # (B,T/2,64)
        x = self.drop_lstm(x)
        x = self.proj(x)      # (B,T/2,d_model)
        x = self.pos_enc(x)
        for blk in self.transformer_blocks:
            attn_out,_ = blk['mha'](x,x,x)
            x = blk['ln1'](x+attn_out)
            ffn_out = blk['ffn'](x)
            x = blk['ln2'](x+ffn_out)
            x = blk['drop'](x)
        x = x.transpose(1,2)  # (B,d_model,T/2)
        x = self.pool2(x).squeeze(-1)  # (B,d_model)
        logits = self.head(x)
        return logits

# ========== 6. Dataset ==========
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.y)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# ========== 7. Тренинг и валидация ==========
for horizon, (X_arr, y_arr) in datasets.items():
    # split indices
    n = len(y_arr)
    split = int(0.8 * n)
    train_idx = list(range(split))
    val_idx   = list(range(split, n))

    # sampler only for train
    cw = class_weights[horizon]
    sample_weights = [1.0/cw[int(y_arr[i])] for i in train_idx]
    sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

    dataset    = TimeSeriesDataset(X_arr, y_arr)
    train_ds   = Subset(dataset, train_idx)
    val_ds     = Subset(dataset, val_idx)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler)
    val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False)

    # model, loss, optimizer, scheduler
    model     = HybridTransformer(LOOKBACK, X_arr.shape[-1]).to(DEVICE)
    alpha     = torch.tensor([cw[i] for i in range(3)], dtype=torch.float32).to(DEVICE)
    criterion = FocalLoss(alpha=alpha, gamma=2.0)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR_INITIAL)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)

    # train loop
    for epoch in range(1, EPOCHS+1):
        model.train()
        total_loss = 0.0
        for xb, yb in train_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            optimizer.zero_grad()
            logits = model(xb)
            loss   = criterion(logits, yb)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * xb.size(0)
        scheduler.step()
        print(f"[{horizon}] Epoch {epoch}/{EPOCHS} — Loss: {total_loss/len(train_ds):.4f}")

    # eval
    model.eval()
    preds, trues = [], []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(DEVICE)
            logits = model(xb)
            preds.extend(torch.argmax(logits, dim=1).cpu().numpy())
            trues.extend(yb.numpy())

    # подсчёт метрик
    bal_acc = balanced_accuracy_score(trues, preds)
    print(f"\n=== {horizon.upper()} — Balanced Accuracy: {bal_acc:.3f} ===")
    print(classification_report(
        trues,
        preds,
        target_names=['Long', 'Hold', 'Short']
    ))

    # save
    torch.save(model.state_dict(), f"btc_hybrid_transformer_{horizon}.pt")
    joblib.dump(scalers[horizon], f"scaler_{horizon}.gz")

short  (7951, 42) dist: {1: 0.572, 2: 0.23, 0: 0.198}
medium (7951, 42) dist: {0: 0.389, 2: 0.371, 1: 0.241}
long   (7895, 42) dist: {0: 0.486, 2: 0.436, 1: 0.078}
short  X=(7891, 60, 16) y=(7891,) cw={0: 1.69044558697515, 1: 0.5803912915563401, 2: 1.4588648548715104}
medium X=(7891, 60, 16) y=(7891,) cw={0: 0.8604296150910479, 1: 1.3785814116002795, 2: 0.8989519252677147}
long   X=(7835, 60, 16) y=(7835,) cw={0: 0.6900044033465433, 1: 4.2259978425026965, 2: 0.760975135975136}
[short] Epoch 1/40 — Loss: 0.3418
[short] Epoch 2/40 — Loss: 0.3235
[short] Epoch 3/40 — Loss: 0.3160
[short] Epoch 4/40 — Loss: 0.3120
[short] Epoch 5/40 — Loss: 0.3099
[short] Epoch 6/40 — Loss: 0.3014
[short] Epoch 7/40 — Loss: 0.2961
[short] Epoch 8/40 — Loss: 0.2806
[short] Epoch 9/40 — Loss: 0.2859
[short] Epoch 10/40 — Loss: 0.2613
[short] Epoch 11/40 — Loss: 0.2650
[short] Epoch 12/40 — Loss: 0.2488
[short] Epoch 13/40 — Loss: 0.2358
[short] Epoch 14/40 — Loss: 0.2284
[short] Epoch 15/40 — Loss: 0.2279
[s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## 2 попытка (snapshot edit)

In [2]:
################################################################################
#  PART 2 ────────────────────────────────────────────────── ПРЕДСКАЗАНИЕ       #
################################################################################
import os, copy, numpy as np, pandas as pd
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import RobustScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    balanced_accuracy_score, f1_score,
    matthews_corrcoef, confusion_matrix,
    classification_report
)
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR

# ========== ПАРАМЕТРЫ ==========
SYMBOL        = "BTC/USDT"
DATA_DIR      = "csv_data"
TIMEFRAMES    = {"short":"15m","medium":"1h","long":"4h"}
HORIZON_STEPS = {"short":12,"medium":24,"long":42}
Q_LOW, Q_HIGH = 0.15, 0.85

LOOKBACK      = 60
BATCH_SIZE    = 64
EPOCHS        = 45        # всего эпох
SNAPSHOTS     = 3         # снимков в ансамбле
LR_INITIAL    = 1e-3
GAMMA_FOCAL   = 2.0
SMOOTHING     = 0.01      # уменьшили сглаживание
DEVICE        = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# ── вспомогательные классы ─────────────────────────────────────────────────── #
class SeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self): return len(self.y)
    def __getitem__(self, i): return self.X[i], self.y[i]

class FocalSmoothLoss(nn.Module):
    def __init__(self, alpha, gamma=2.0, smoothing=0.01):
        super().__init__()
        self.alpha, self.gamma, self.smooth = alpha, gamma, smoothing
    def forward(self, logits, target):
        C = logits.size(1)
        with torch.no_grad():
            true = torch.full_like(logits, self.smooth/(C-1))
            true.scatter_(1, target.unsqueeze(1), 1-self.smooth)
        prob = F.softmax(logits, dim=1).clamp(1e-7,1.0)
        ce   = -(true * torch.log(prob)).sum(dim=1)
        pt   = (prob * true).sum(dim=1)
        loss = self.alpha[target] * ((1-pt)**self.gamma) * ce
        return loss.mean()

class SpatialDropout(nn.Dropout2d):
    def forward(self, x):
        x = x.permute(0,2,1).unsqueeze(3)
        x = super().forward(x)
        return x.squeeze(3).permute(0,2,1)

class SEBlock(nn.Module):
    def __init__(self, dim, reduction=8):
        super().__init__()
        self.fc1 = nn.Linear(dim, dim // reduction)
        self.fc2 = nn.Linear(dim // reduction, dim)
    def forward(self, x):
        # x: (B, dim)
        w = F.relu(self.fc1(x))        # (B, dim//r)
        w = torch.sigmoid(self.fc2(w)) # (B, dim)
        return x * w                   # (B, dim)


class LearnPosEmbed(nn.Module):
    def __init__(self, length, dim):
        super().__init__()
        self.pe = nn.Embedding(length, dim)
    def forward(self, x):
        pos = torch.arange(x.size(1), device=x.device)
        return x + self.pe(pos).unsqueeze(0)

class GRUAttnModel(nn.Module):
    def __init__(self, input_dim, hidden=64, n_cls=3, drop_p=0.1):
        super().__init__()
        self.pos   = LearnPosEmbed(LOOKBACK, input_dim)
        self.sdrop = SpatialDropout(0.05)                # уменьшили p
        self.gru   = nn.GRU(input_dim, hidden, batch_first=True, bidirectional=True)
        self.attn  = nn.Linear(hidden*2, 1)
        self.se    = SEBlock(hidden*2)
        self.head  = nn.Sequential(
            nn.LayerNorm(hidden*2),
            nn.Dropout(drop_p),
            nn.Linear(hidden*2,128),
            nn.ReLU(),
            nn.Dropout(drop_p/2),
            nn.Linear(128,n_cls)
        )
    def forward(self, x):
        x = self.sdrop(self.pos(x))
        h, _ = self.gru(x)                   # (B,T,2H)
        w    = F.softmax(self.attn(h),1)     # (B,T,1)
        ctx  = (h * w).sum(1)                # (B,2H)
        ctx  = self.se(ctx)                  # (B,2H)
        return self.head(ctx)                # (B,n_cls)


# ── основной цикл по таймфреймам ───────────────────────────────────────────── #
for tag, tf in TIMEFRAMES.items():
    # 1) Загрузка и динамическая разметка
    df = pd.read_csv(f"{DATA_DIR}/{SYMBOL.replace('/','_')}_{tf}.csv", parse_dates=["timestamp"])
    fut = df["close"].shift(-HORIZON_STEPS[tag])
    ret = (fut - df["close"])/df["close"]
    low_th, high_th = ret.quantile(Q_LOW), ret.quantile(Q_HIGH)
    df["label"] = np.where(ret>high_th,0, np.where(ret<low_th,2,1))
    df.dropna(inplace=True); df.reset_index(drop=True, inplace=True)

    # 2) Split train/val по времени
    train_df, val_df = train_test_split(df, shuffle=False, test_size=0.2)

    # 3) Scaler
    feats  = [c for c in df.columns if c not in ("timestamp","label")]
    scaler = RobustScaler().fit(train_df[feats])
    X_tr0, y_tr0 = scaler.transform(train_df[feats]), train_df["label"].to_numpy()
    X_va,  y_va  = scaler.transform(val_df[feats]),  val_df["label"].to_numpy()

    # 4) Manual oversample Long (0) & Short (2)
    X_tr, y_tr = X_tr0.copy(), y_tr0.copy()
    for cls in (0,2):
        idx = np.where(y_tr0==cls)[0]
        # дублируем в 2 раза
        X_tr = np.vstack([X_tr, X_tr0[idx]])
        y_tr = np.concatenate([y_tr, y_tr0[idx]])

    # 5) Построение seqs
    def make_seq(X,y):
        seqs, labs = [], []
        for i in range(len(X)-LOOKBACK):
            seqs.append(X[i:i+LOOKBACK])
            labs.append(y[i+LOOKBACK])
        return np.array(seqs), np.array(labs)
    X_train, y_train = make_seq(X_tr, y_tr)
    X_val,   y_val   = make_seq(X_va, y_va)

    # 6) Class weights & sampler
    classes_arr = np.unique(y_train)
    cw = compute_class_weight(class_weight="balanced", classes=classes_arr, y=y_train)
    # alpha вручную: снижаем Hold
    alpha = torch.tensor([1.0, 0.3, 1.0], device=DEVICE)
    sampler = WeightedRandomSampler(
        weights=[1.0/cw[y] for y in y_train],
        num_samples=len(y_train),
        replacement=True
    )

    train_ds = SeqDataset(X_train, y_train)
    val_ds   = SeqDataset(X_val,   y_val)
    tr_ld = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler)
    va_ld = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False)

    # 7) Model, Loss, Optimizer, Scheduler
    model     = GRUAttnModel(input_dim=X_train.shape[2], drop_p=0.1).to(DEVICE)
    # init bias по частотам до-oversample
    freqs   = np.bincount(y_tr0)/len(y_tr0)
    bias_init = np.log(freqs/freqs.max())
    with torch.no_grad():
        model.head[-1].bias.copy_(torch.tensor(bias_init, device=DEVICE))
    criterion = FocalSmoothLoss(alpha, gamma=GAMMA_FOCAL, smoothing=SMOOTHING)
    optimizer = AdamW(model.parameters(), lr=LR_INITIAL, weight_decay=1e-4)
    cycle_len = EPOCHS // SNAPSHOTS
    scheduler = CosineAnnealingLR(optimizer, T_max=cycle_len)

    # 8) Training + Snapshots
    snapshots = []
    for snap in range(SNAPSHOTS):
        for ep in range(1, cycle_len+1):
            model.train()
            total_loss = 0.0
            for xb,yb in tr_ld:
                xb,yb = xb.to(DEVICE), yb.to(DEVICE)
                optimizer.zero_grad()
                loss = criterion(model(xb), yb)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()*xb.size(0)
            scheduler.step()
            print(f"[{tag}] snap {snap+1}/{SNAPSHOTS} epoch {ep}/{cycle_len} loss {total_loss/len(train_ds):.4f}")
        snapshots.append(copy.deepcopy(model.state_dict()))

    # 9) Ensemble Eval
    model.eval()
    probs_list = []
    with torch.no_grad():
        for state in snapshots:
            model.load_state_dict(state)
            batch_probs = []
            for xb,_ in va_ld:
                xb = xb.to(DEVICE)
                batch_probs.append(F.softmax(model(xb),1).cpu().numpy())
            probs_list.append(np.vstack(batch_probs))
    avg_probs = np.mean(probs_list,axis=0)
    preds = avg_probs.argmax(1)
    trues = y_val

    # 10) Metrics
    labels = [0,1,2]; names=["Long","Hold","Short"]
    bal = balanced_accuracy_score(trues,preds)
    mcc = matthews_corrcoef(trues,preds)
    f1s = f1_score(trues,preds,labels=labels,average=None,zero_division=0)
    cm  = confusion_matrix(trues,preds,labels=labels)

    print(f"\n=== {tag.upper()} RESULTS ===")
    print(f"Balanced Acc: {bal:.3f}   MCC: {mcc:.3f}")
    print("F1 per class:", dict(zip(names,f1s)))
    print("Confusion matrix:\n", pd.DataFrame(cm,index=names,columns=names))
    print(classification_report(trues,preds,labels=labels,target_names=names,zero_division=0))

    # 11) Save
    os.makedirs("models",exist_ok=True)
    torch.save(model.state_dict(), f"models/gru_attn_{tag}.pt")
    joblib.dump(scaler, f"models/scaler_{tag}.gz")


[short] snap 1/3 epoch 1/15 loss 0.2082
[short] snap 1/3 epoch 2/15 loss 0.1786
[short] snap 1/3 epoch 3/15 loss 0.1629
[short] snap 1/3 epoch 4/15 loss 0.1426
[short] snap 1/3 epoch 5/15 loss 0.1314
[short] snap 1/3 epoch 6/15 loss 0.1237
[short] snap 1/3 epoch 7/15 loss 0.1112
[short] snap 1/3 epoch 8/15 loss 0.1110
[short] snap 1/3 epoch 9/15 loss 0.1017
[short] snap 1/3 epoch 10/15 loss 0.0959
[short] snap 1/3 epoch 11/15 loss 0.0901
[short] snap 1/3 epoch 12/15 loss 0.0900
[short] snap 1/3 epoch 13/15 loss 0.0864
[short] snap 1/3 epoch 14/15 loss 0.0877
[short] snap 1/3 epoch 15/15 loss 0.0868
[short] snap 2/3 epoch 1/15 loss 0.0855
[short] snap 2/3 epoch 2/15 loss 0.0837
[short] snap 2/3 epoch 3/15 loss 0.0838
[short] snap 2/3 epoch 4/15 loss 0.0878
[short] snap 2/3 epoch 5/15 loss 0.0857
[short] snap 2/3 epoch 6/15 loss 0.0869
[short] snap 2/3 epoch 7/15 loss 0.0863
[short] snap 2/3 epoch 8/15 loss 0.0834
[short] snap 2/3 epoch 9/15 loss 0.0804
[short] snap 2/3 epoch 10/15 loss 

## 3 попытка (transformer edit)

In [3]:
pip install pytorch-lightning pytorch-forecasting torchmetrics


Note: you may need to restart the kernel to use updated packages.


In [4]:
# model_tft.py ─────────────────────────────────────────────────────────────────
import os
import joblib
import numpy as np
import pandas as pd
import torch

from sklearn.metrics import balanced_accuracy_score, classification_report

# теперь сразу из lightning.pytorch
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping

from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss

# ──────────────────────────────────────────────────────────────────────────────
#  ПАРАМЕТРЫ
# ──────────────────────────────────────────────────────────────────────────────
SYMBOL        = "BTC/USDT"
DATA_DIR      = "csv_data"
TIMEFRAMES    = {"short":"15m","medium":"1h","long":"4h"}
HORIZON_STEPS = {"short":12,"medium":24,"long":42}

LOOKBACK      = 96       # длина encoder
PRED_LEN      = 1        # длина decoder (one-step)
BATCH_SIZE    = 64
MAX_EPOCHS    = 20
LR            = 3e-4
UP_TH, DOWN_TH= 0.005, -0.005

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ──────────────────────────────────────────────────────────────────────────────
for tag, tf in TIMEFRAMES.items():
    print(f"\n▶▶▶ TRAINING TFT for '{tag}' ({tf})")

    # 1) Load CSV
    df = pd.read_csv(
        os.path.join(DATA_DIR, f"{SYMBOL.replace('/','_')}_{tf}.csv"),
        parse_dates=["timestamp"]
    ).sort_values("timestamp").reset_index(drop=True)
    df["symbol"]     = SYMBOL.replace("/", "_")
    df["target"]     = df["close"].shift(-HORIZON_STEPS[tag])
    df["return_fwd"] = (df["target"] - df["close"]) / df["close"]
    df.dropna(inplace=True); df.reset_index(drop=True, inplace=True)
    df["time_idx"]   = (
        (df["timestamp"] - df["timestamp"].min())
        .dt.total_seconds() // pd.Timedelta(tf).total_seconds()
    ).astype(int)

    # train/val split
    cutoff   = int(df["time_idx"].max() * 0.8)
    train_df = df[df["time_idx"] <= cutoff].copy()
    val_df   = df[df["time_idx"] >  cutoff].copy()

    # feature columns
    feature_cols = [
        c for c in df.columns
        if c not in ("timestamp","target","return_fwd","time_idx","symbol")
    ]

    # 2) TimeSeriesDataSet for train
    training = TimeSeriesDataSet(
        train_df,
        time_idx="time_idx",
        target="return_fwd",
        group_ids=["symbol"],
        static_categoricals=["symbol"],
        static_reals=[],
        time_varying_known_categoricals=[],
        time_varying_known_reals=["time_idx"],
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=feature_cols,
        max_encoder_length=LOOKBACK,
        max_prediction_length=PRED_LEN,
        allow_missing_timesteps=True,
        target_normalizer=GroupNormalizer(
            groups=["symbol"], transformation="softplus"
        ),
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
    )

    # 3) Validation dataset from train
    validation = TimeSeriesDataSet.from_dataset(
        training, val_df,
        predict=True,
        stop_randomization=True
    )

    # 4) DataLoaders
    train_loader = training.to_dataloader(
        train=True,
        batch_size=BATCH_SIZE,
        num_workers=4
    )
    val_loader = validation.to_dataloader(
        train=False,
        batch_size=BATCH_SIZE,
        num_workers=4
    )

    # 5) Build TFT
    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=LR,
        hidden_size=64,
        attention_head_size=4,
        dropout=0.1,
        hidden_continuous_size=32,
        output_size=1,
        loss=QuantileLoss([0.5]),
        log_interval=10,
        reduce_on_plateau_patience=4,
        optimizer="adamw"
    ).to(DEVICE)

    # 6) Trainer (lightning.pytorch)
    trainer = pl.Trainer(
        max_epochs=MAX_EPOCHS,
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        devices=1,
        gradient_clip_val=0.1,
        callbacks=[EarlyStopping(
            monitor="val_loss", patience=5, mode="min"
        )]
    )
    trainer.fit(tft, train_loader, val_loader)

     # 7) Predict + classification
    # ============================
    raw_preds = tft.predict(val_loader)
    # если пришёл тензор, скопировать на CPU
    if isinstance(raw_preds, torch.Tensor):
        raw_preds = raw_preds.cpu().numpy()
    # избавляемся от лишних осей
    raw_preds = np.asarray(raw_preds).squeeze().flatten()  # → shape (N,)

    # теперь порогуем
    preds = np.where(raw_preds > UP_TH, 0,
             np.where(raw_preds < DOWN_TH, 2, 1))

    # true-классы тоже вектор
    true_ret = val_df["return_fwd"].to_numpy()
    trues = np.where(true_ret > UP_TH, 0,
             np.where(true_ret < DOWN_TH, 2, 1))

    # проверяем длины
    assert preds.shape[0] == trues.shape[0], (
        f"preds len {preds.shape[0]} vs trues len {trues.shape[0]}"
    )

    bal = balanced_accuracy_score(trues, preds)
    print(f"\n'{tag.upper()}' Balanced Accuracy: {bal:.3f}\n")
    print(classification_report(
        trues, preds,
        target_names=["Long","Hold","Short"],
        zero_division=0
    ))



▶▶▶ TRAINING TFT for 'short' (15m)


c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been remo

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'predict_dataloader' to speed up the dataloader worker initialization.


AssertionError: preds len 1 vs trues len 1988

In [5]:
# model_tft.py ─────────────────────────────────────────────────────────────────
import os, joblib
import numpy as np
import pandas as pd
import torch

from sklearn.metrics import balanced_accuracy_score, classification_report

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping

from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss

# ──────────────────────────────────────────────────────────────────────────────
#  ПАРАМЕТРЫ
# ──────────────────────────────────────────────────────────────────────────────
SYMBOL        = "BTC/USDT"
DATA_DIR      = "csv_data"
TIMEFRAMES    = {"short":"15m","medium":"1h","long":"4h"}
HORIZON_STEPS = {"short":12,"medium":24,"long":42}

LOOKBACK      = 96       # длина encoder
PRED_LEN      = 1        # длина decoder (one-step)
BATCH_SIZE    = 64
MAX_EPOCHS    = 20
LR            = 3e-4
UP_TH, DOWN_TH= 0.005, -0.005

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ──────────────────────────────────────────────────────────────────────────────
for tag, tf in TIMEFRAMES.items():
    print(f"\n▶▶▶ TRAINING TFT for '{tag}' ({tf})")

    # 1) Load & prepare DataFrame
    df = pd.read_csv(
        os.path.join(DATA_DIR, f"{SYMBOL.replace('/','_')}_{tf}.csv"),
        parse_dates=["timestamp"]
    ).sort_values("timestamp").reset_index(drop=True)
    df["symbol"]     = SYMBOL.replace("/", "_")
    df["target"]     = df["close"].shift(-HORIZON_STEPS[tag])
    df["return_fwd"] = (df["target"] - df["close"]) / df["close"]
    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)
    df["time_idx"] = (
        (df["timestamp"] - df["timestamp"].min())
        .dt.total_seconds() // pd.Timedelta(tf).total_seconds()
    ).astype(int)

    # train/val split by time_idx
    cutoff   = int(df["time_idx"].max() * 0.8)
    train_df = df[df["time_idx"] <= cutoff].copy()
    val_df   = df[df["time_idx"] >  cutoff].copy()

    # feature columns
    feature_cols = [
        c for c in df.columns
        if c not in ("timestamp","target","return_fwd","time_idx","symbol")
    ]

    # 2) build training TimeSeriesDataSet
    training = TimeSeriesDataSet(
        train_df,
        time_idx="time_idx",
        target="return_fwd",
        group_ids=["symbol"],
        static_categoricals=["symbol"],
        static_reals=[],
        time_varying_known_categoricals=[],
        time_varying_known_reals=["time_idx"],
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=feature_cols,
        max_encoder_length=LOOKBACK,
        max_prediction_length=PRED_LEN,
        allow_missing_timesteps=True,
        target_normalizer=GroupNormalizer(groups=["symbol"], transformation="softplus"),
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
    )

    # 3) build validation dataset from training config
    validation = TimeSeriesDataSet.from_dataset(
        training, val_df, predict=True, stop_randomization=True
    )

    # 4) DataLoaders
    train_loader = training.to_dataloader(train=True,  batch_size=BATCH_SIZE, num_workers=4)
    val_loader   = validation.to_dataloader(train=False, batch_size=BATCH_SIZE, num_workers=4)

    # 5) init TFT
    tft = TemporalFusionTransformer.from_dataset(
        training,
        learning_rate=LR,
        hidden_size=64,
        attention_head_size=4,
        dropout=0.1,
        hidden_continuous_size=32,
        output_size=1,
        loss=QuantileLoss([0.5]),
        log_interval=10,
        reduce_on_plateau_patience=4,
        optimizer="adamw"
    ).to(DEVICE)

    # 6) train
    trainer = pl.Trainer(
        max_epochs=MAX_EPOCHS,
        accelerator="gpu" if torch.cuda.is_available() else "cpu",
        devices=1,
        gradient_clip_val=0.1,
        callbacks=[EarlyStopping(monitor="val_loss", patience=5, mode="min")]
    )
    trainer.fit(tft, train_loader, val_loader)

    # 7) predict + classification
    raw_preds = tft.predict(validation)            # shape (num_samples, 1)
    if isinstance(raw_preds, torch.Tensor):        # if tensor → numpy
        raw_preds = raw_preds.detach().cpu().numpy()
    raw_preds = np.array(raw_preds).reshape(-1)     # flatten to (N,)

    # true returns for val_df, skip first LOOKBACK
    true_ret = val_df["return_fwd"].to_numpy()[LOOKBACK:]
    assert len(true_ret) == len(raw_preds), f"{len(raw_preds)} vs {len(true_ret)}"

    preds = np.where(raw_preds > UP_TH, 0,
             np.where(raw_preds < DOWN_TH, 2, 1))
    trues = np.where(true_ret > UP_TH, 0,
             np.where(true_ret < DOWN_TH, 2, 1))

    bal = balanced_accuracy_score(trues, preds)
    print(f"\n'{tag.upper()}' Balanced Accuracy: {bal:.3f}\n")
    print(classification_report(trues, preds, target_names=["Long","Hold","Short"], zero_division=0))

    # 8) save artifacts
    os.makedirs("models", exist_ok=True)
    torch.save(tft.state_dict(),    f"models/tft_{tag}.pt")
    joblib.dump(training,            f"models/dataset_{tag}.pkl")
    joblib.dump(feature_cols,        f"models/featcols_{tag}.pkl")

    print(f"Saved model   → models/tft_{tag}.pt")
    print(f"Saved dataset → models/dataset_{tag}.pkl")



▶▶▶ TRAINING TFT for 'short' (15m)


c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\utilities\parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params | Mode 
----------------------------------------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\DLR_ACER\anaconda3\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


AssertionError: 1 vs 1892

In [6]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_name = 'google/flan-t5-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)
nlm = AutoModelForSeq2SeqLM.from_pretrained(model_name)

def narrate(trend, horizon):
    prompt = (f'As a crypto–trading assistant, explain in one sentence what a {trend} signal means for BTC/USDT '
              f'on the {horizon} horizon and suggest an action for a cautious trader.')
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids
    output_ids = nlm.generate(input_ids, max_new_tokens=40)
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Example:
print(narrate('Long', 'medium‑term'))

Downloading:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/308M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
import joblib
for h, sc in scalers.items():
    joblib.dump(sc, f'scaler_{h}.gz')
print('Saved all scalers.')

ХЗ ЧТО

In [24]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import balanced_accuracy_score

# ===== Focal Loss с пониженными весами для HOLD =====
class FocalLossWithClassWeights(nn.Module):
    def __init__(self, gamma=2, weight=None):
        super().__init__()
        self.gamma = gamma
        self.weight = weight

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, weight=self.weight, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma) * ce_loss
        return focal_loss.mean()

# ===== Модель ConvGRU + Attention =====
class ConvGRUAttentionModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, gru_layers=1, num_classes=3):
        super().__init__()
        self.conv1 = nn.Conv1d(input_size, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU()

        self.gru = nn.GRU(input_size=64, hidden_size=hidden_size, num_layers=gru_layers,
                          batch_first=True, bidirectional=True)

        self.attn = nn.Linear(hidden_size * 2, 1)

        self.fc = nn.Sequential(
            nn.Linear(hidden_size * 2, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        # x: (batch, seq_len, features)
        x = x.permute(0, 2, 1)              # (batch, features, seq_len)
        x = self.relu(self.bn1(self.conv1(x)))
        x = x.permute(0, 2, 1)              # (batch, seq_len, conv_features)

        gru_out, _ = self.gru(x)            # (batch, seq_len, hidden*2)

        attn_weights = torch.softmax(self.attn(gru_out), dim=1)  # (batch, seq_len, 1)
        context = torch.sum(attn_weights * gru_out, dim=1)       # (batch, hidden*2)

        return self.fc(context)

# ===== Тренировочная функция =====
def train_model(model, train_loader, val_loader, epochs=20, lr=1e-3, device='cpu'):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

    # Классы: Long (0), Hold (1), Short (2)
    class_weights = torch.tensor([1.2, 0.4, 1.3], dtype=torch.float32).to(device)
    criterion = FocalLossWithClassWeights(weight=class_weights)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in train_loader:
            # Поддержка batch в виде dict или tuple
            if isinstance(batch, dict):
                inputs = batch['X'].to(device)
                labels = batch['y'].to(device)
            else:
                inputs, labels = batch
                inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        scheduler.step()

        # Валидация
        model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for batch in val_loader:
                if isinstance(batch, dict):
                    inputs = batch['X'].to(device)
                    labels = batch['y'].to(device)
                else:
                    inputs, labels = batch
                    inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                preds = torch.argmax(outputs, dim=1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(preds.cpu().numpy())

        score = balanced_accuracy_score(y_true, y_pred)
        print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss:.4f} | Balanced Accuracy: {score:.4f}")
    
    return model

# ===== Пример использования =====
# model = ConvGRUAttentionModel(input_size=len(FEATURES)).to(DEVICE)
# model = train_model(model, train_loader, val_loader, epochs=40, lr=1e-3, device=DEVICE)


In [25]:
model = ConvGRUAttentionModel(input_size=len(FEATURES)).to(DEVICE)


In [None]:
model = train_model(model, train_loader, val_loader, epochs=40, lr=1e-3, device=DEVICE)
