In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from einops import rearrange
from torch.utils.data import Subset
from sklearn.model_selection import train_test_split

–û–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ–º –Ω–∞—à–∏ –¥–∞–Ω–Ω—ã–µ –ø–µ—Ä–µ–¥ –ø–æ–¥–∞—á–µ–π –≤ –º–æ–¥–µ–ª—å

In [2]:
class CryptoDataset(Dataset):
    def __init__(self, dfs: dict, window_size=196, predict_steps=24):
        self.window_size = window_size
        self.predict_steps = predict_steps
        self.sequences, self.targets, self.coin_ids = [], [], []
        self.scalers = {}
        self.label_encoder = LabelEncoder()
        coin_names = list(dfs.keys())
        self.label_encoder.fit(coin_names)

        for coin in coin_names:
            df = dfs[coin].copy()
            df = df.drop(columns=["timestamp"])
            self.scalers[coin] = StandardScaler()
            scaled = self.scalers[coin].fit_transform(df.drop(columns=["close"]))
            close = df["close"].values

            for i in range(len(scaled) - window_size - predict_steps):
                x = scaled[i:i+window_size]
                y = close[i+window_size:i+window_size+predict_steps]
                self.sequences.append(x)
                self.targets.append(y)
                self.coin_ids.append(self.label_encoder.transform([coin])[0])

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x = torch.tensor(self.sequences[idx], dtype=torch.float32)
        coin_id = torch.tensor(self.coin_ids[idx], dtype=torch.long)
        y = torch.tensor(self.targets[idx], dtype=torch.float32)
        return x, coin_id, y

In [3]:
def create_splits(dataset: Dataset, val_size=0.1, test_size=0.1, seed=42):
    indices = np.arange(len(dataset))
    train_val_idx, test_idx = train_test_split(indices, test_size=test_size, random_state=seed, shuffle=False)
    train_idx, val_idx = train_test_split(train_val_idx, test_size=val_size / (1 - test_size), random_state=seed, shuffle=False)
    return Subset(dataset, train_idx), Subset(dataset, val_idx), Subset(dataset, test_idx)

In [4]:
class PerformerBlock(nn.Module):
    def __init__(self, dim, heads=4, dropout=0.1):
        super().__init__()
        assert dim % heads == 0, f"dim={dim} must be divisible by heads={heads}"
        self.heads = heads
        self.qkv = nn.Linear(dim, dim * 3)
        self.dropout = nn.Dropout(dropout)
        self.proj = nn.Linear(dim, dim)

    def forward(self, x):  # x: [B, T, D]
        B, T, D = x.shape
        H = self.heads

        # –ü–æ–ª—É—á–µ–Ω–∏–µ q, k, v
        qkv = self.qkv(x).chunk(3, dim=-1)  # –∫–∞–∂–¥–∞—è [B, T, D]
        q, k, v = map(lambda t: rearrange(t, 'b t (h d) -> b h t d', h=H), qkv)  # [B, H, T, D_h]

        q = torch.nn.functional.elu(q) + 1
        k = torch.nn.functional.elu(k) + 1

        kv = torch.einsum('bhnd,bhne->bhde', k, v)  # [B, H, D, D]
        k_sum = k.sum(dim=2, keepdim=True)         # [B, H, 1, D]
        z = 1 / (torch.einsum('bhnd,bhnd->bhn', q, k_sum.expand_as(q)) + 1e-6).unsqueeze(-1)  # [B, H, T, 1]
        out = torch.einsum('bhnd,bhde->bhne', q, kv) * z  # [B, H, T, D]

        out = rearrange(out, 'b h t d -> b t (h d)')  # [B, T, D]
        return self.proj(self.dropout(out))


In [5]:
class HybridModel(nn.Module):
    def __init__(self, input_dim, coin_count, hidden_dim=64, heads=4, predict_steps=24):
        super().__init__()
        self.predict_steps = predict_steps
        self.coin_embedding = nn.Embedding(coin_count, input_dim)
        self.performer = PerformerBlock(dim=input_dim, heads=heads)
        self.bilstm = nn.LSTM(input_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.regressor = nn.Sequential(
            nn.Linear(hidden_dim * 2, 64),
            nn.ReLU(),
            nn.Linear(64, predict_steps)
        )

    def forward(self, x, coin_id):
        emb = self.coin_embedding(coin_id).unsqueeze(1).expand_as(x)
        x = x + emb
        x = self.performer(x)
        out, _ = self.bilstm(x)
        out = out[:, -1, :]
        return self.regressor(out)

In [14]:
def train_model(model, train_loader, val_loader, epochs=20, lr=1e-3):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    for epoch in range(epochs):
        model.train()
        total_train_loss = 0
        for x, coin_id, y in train_loader:
            x = x.to(device)
            coin_id = coin_id.to(device)
            y = y.to(device)

            pred = model(x, coin_id)
            loss = loss_fn(pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()

        # Validation
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for x, coin_id, y in val_loader:
                x = x.to(device)
                coin_id = coin_id.to(device)
                y = y.to(device)

                pred = model(x, coin_id)
                val_loss = loss_fn(pred, y)
                total_val_loss += val_loss.item()

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {total_train_loss / len(train_loader):.4f} | Val Loss: {total_val_loss / len(val_loader):.4f}")


In [21]:
def evaluate_model(model, dataset_subset: Dataset, parent_dataset: CryptoDataset):
    model.eval()
    preds, trues, coins = [], [], []

    with torch.no_grad():
        for i in range(len(dataset_subset)):
            x, coin_id, y = dataset_subset[i]
            x = x.unsqueeze(0).to(device)
            coin_id = coin_id.unsqueeze(0).to(device)

            pred = model(x, coin_id).squeeze().cpu().numpy()
            preds.append(pred)
            trues.append(y.numpy())
            coins.append(coin_id.cpu().item())

    preds, trues = np.array(preds), np.array(trues)
    label_decoder = parent_dataset.label_encoder.inverse_transform(coins)
    df_all = pd.DataFrame({'coin': label_decoder})
    metrics = {}

    for step in range(preds.shape[1]):
        df_all[f"pred_{step}"] = preds[:, step]
        df_all[f"true_{step}"] = trues[:, step]

    for coin in df_all['coin'].unique():
        df_coin = df_all[df_all['coin'] == coin]
        p, t = [], []
        for step in range(preds.shape[1]):
            p += df_coin[f"pred_{step}"].tolist()
            t += df_coin[f"true_{step}"].tolist()

        p, t = np.array(p), np.array(t)
        ret = np.diff(p) / p[:-1]
        true_ret = np.diff(t) / t[:-1]

        sharpe = np.mean(ret - true_ret) / (np.std(ret - true_ret) + 1e-8)
        downside = ret[ret < 0]
        sortino = np.mean(ret - true_ret) / (np.std(downside) + 1e-8) if len(downside) > 0 else 0
        da = np.mean(np.sign(ret) == np.sign(true_ret))
        dd = max_drawdown(t)

        metrics[coin] = {
            'MAE': mean_absolute_error(t, p),
            'RMSE': mean_squared_error(t, p) ** 0.5 ,
            'MAPE (%)': np.mean(np.abs((t - p) / t)) * 100,
            'Directional Accuracy': da,
            'Sharpe Ratio': sharpe,
            'Sortino Ratio': sortino,
            'Max Drawdown (%)': dd
        }

    return df_all, metrics


def max_drawdown(prices):
    prices = np.array(prices)
    cum_max = np.maximum.accumulate(prices)
    dd = (cum_max - prices) / cum_max
    return np.max(dd) * 100

In [8]:
dfs = {
    'BTC': pd.read_csv("BTC_USDT.csv"),
    'ETH': pd.read_csv("ETH_USDT.csv"),
    'SOL': pd.read_csv("SOL_USDT.csv"),
    'XRP': pd.read_csv("XRP_USDT.csv"),
    'TRX': pd.read_csv("TRX_USDT.csv"),
}

# –ü—Ä–µ–¥–æ–±—Ä–∞–±–æ—Ç–∫–∞: —Å–æ—Ä—Ç–∏—Ä–æ–≤–∫–∞ –∏ –æ–±–Ω—É–ª–µ–Ω–∏–µ NaN
for coin in dfs:
    dfs[coin] = dfs[coin].sort_values("timestamp").fillna(method="ffill").fillna(method="bfill")

  dfs[coin] = dfs[coin].sort_values("timestamp").fillna(method="ffill").fillna(method="bfill")


In [16]:
WINDOW_SIZE = 196
PREDICT_STEPS = 24
BATCH_SIZE = 64
EPOCHS = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [17]:
dataset = CryptoDataset(dfs, window_size=WINDOW_SIZE, predict_steps=PREDICT_STEPS)

# –î–µ–ª–∏–º –Ω–∞ train/val/test
train_set, val_set, test_set = create_splits(dataset, val_size=0.1, test_size=0.1)

# DataLoaders
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

# –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –º–æ–¥–µ–ª–∏
model = HybridModel(
    input_dim=dataset[0][0].shape[1],
    coin_count=len(dfs),
    predict_steps=PREDICT_STEPS
).to(device)

# –û–±—É—á–µ–Ω–∏–µ —Å –≤–∞–ª–∏–¥–∞—Ü–∏–µ–π
train_model(model, train_loader, val_loader, epochs=EPOCHS)

# –§–∏–Ω–∞–ª—å–Ω–∞—è –æ—Ü–µ–Ω–∫–∞
df_eval, metrics = evaluate_model(model, test_set)

print("\nüìä Test Metrics per Coin:")
for coin, met in metrics.items():
    print(f"{coin}:")
    for k, v in met.items():
        print(f"  {k}: {v:.4f}")

Epoch 1/20 | Train Loss: 263306377.4105 | Val Loss: 885968.1819
Epoch 2/20 | Train Loss: 17215527.0103 | Val Loss: 1520905.3644
Epoch 3/20 | Train Loss: 1116070.9445 | Val Loss: 26506.7100
Epoch 4/20 | Train Loss: 855580.9440 | Val Loss: 4949.3232
Epoch 5/20 | Train Loss: 699525.2274 | Val Loss: 2498.2128
Epoch 6/20 | Train Loss: 669496.7005 | Val Loss: 2530.9342
Epoch 7/20 | Train Loss: 648623.0835 | Val Loss: 3308.1640
Epoch 8/20 | Train Loss: 641665.0240 | Val Loss: 2458.9328
Epoch 9/20 | Train Loss: 621155.8278 | Val Loss: 1981.3857
Epoch 10/20 | Train Loss: 552376.9575 | Val Loss: 968.1539
Epoch 11/20 | Train Loss: 502786.2149 | Val Loss: 738.7449
Epoch 12/20 | Train Loss: 447039.6878 | Val Loss: 231.3269
Epoch 13/20 | Train Loss: 448420.0735 | Val Loss: 2398.7109
Epoch 14/20 | Train Loss: 394637.7741 | Val Loss: 1133.3978
Epoch 15/20 | Train Loss: 405928.4475 | Val Loss: 1062.5809
Epoch 16/20 | Train Loss: 377992.8859 | Val Loss: 359.0260
Epoch 17/20 | Train Loss: 382031.5242 | V

AttributeError: 'Subset' object has no attribute 'label_encoder'

In [22]:
df_eval, metrics = evaluate_model(model, test_set, dataset)

print("\nüìä Test Metrics per Coin:")
for coin, met in metrics.items():
    print(f"{coin}:")
    for k, v in met.items():
        print(f"  {k}: {v:.4f}")


üìä Test Metrics per Coin:
TRX:
  MAE: 17.5171
  RMSE: 33.8819
  MAPE (%): 12209.8860
  Directional Accuracy: 0.4864
  Sharpe Ratio: -0.0025
  Sortino Ratio: -0.0018
  Max Drawdown (%): 89.3263


In [23]:
for coin, met in metrics.items():
    print(f"{coin}:")

TRX:
