In [1]:
import pandas as pd
import sys
import os
parent_dir = os.path.dirname(os.path.abspath(''))
sys.path.append(parent_dir)

from utils import const

data_path = "../data/binance/BTCUSDT-1m-2024-all-with-indicators.csv"

df = pd.read_csv(data_path)
df["open_time"] = pd.to_datetime(df["open_time"], unit="ms")
df = df.set_index("open_time")

In [2]:
df["target"] = (df["close"].shift(-1) > df["close"]).astype(int)
df = df.dropna()

X = df[const.factor_cols]
y = df["target"]

X_train = X[X.index < const.split_time]
X_test = X[X.index >= const.split_time]
y_train = y[y.index < const.split_time]
y_test = y[y.index >= const.split_time]

In [3]:
!which python
!pip show torch

/opt/miniconda3/bin/python
Name: torch
Version: 2.4.1
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: /opt/miniconda3/lib/python3.12/site-packages
Requires: filelock, fsspec, jinja2, networkx, setuptools, sympy, typing-extensions
Required-by: accelerate, autogluon.multimodal, autogluon.timeseries, browsergym-visualwebarena, fastai, lightning, openai-whisper, pytorch-lightning, pytorch-metric-learning, pytorch-transformers, timm, torchaudio, torchdiffeq, torchmetrics, torchvision


In [4]:


import torch

import torch.nn as nn
import torch.optim as optim


factor = const.factor_cols[0]
median = X_train[factor].median()
env1_idx = X_train[factor] <= median
env2_idx = X_train[factor] > median

envs = [
    (torch.tensor(X_train[env1_idx].values, dtype=torch.float32), torch.tensor(y_train[env1_idx].values, dtype=torch.float32)),
    (torch.tensor(X_train[env2_idx].values, dtype=torch.float32), torch.tensor(y_train[env2_idx].values, dtype=torch.float32)),
]

class MLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.net(x).squeeze(-1)

def irm_penalty(logits, y):
    scale = torch.tensor(1.0, requires_grad=True)
    loss = nn.BCEWithLogitsLoss()(logits * scale, y)
    grad = torch.autograd.grad(loss, [scale], create_graph=True)[0]
    return torch.sum(grad**2)

input_dim = X_train.shape[1]
model = MLP(input_dim)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
n_steps = 3000
lambda_irm = 1e2

for step in range(n_steps):
    total_loss = 0
    penalty = 0
    for X_e, y_e in envs:
        logits = model(X_e)
        loss = nn.BCEWithLogitsLoss()(logits, y_e)
        total_loss += loss
        penalty += irm_penalty(logits, y_e)
    loss = total_loss / len(envs) + lambda_irm * penalty / len(envs)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if step % 500 == 0:
        print(f"Step {step}, Loss: {loss.item():.4f}, Penalty: {penalty.item():.4f}")

with torch.no_grad():
    importance = model.net[0].weight.abs().sum(dim=0).cpu().numpy()
    factor_importance = sorted(zip(const.factor_cols, importance), key=lambda x: -x[1])
    for name, score in factor_importance:
        print(f"{name}: {score:.4f}")

Step 0, Loss: 10057569796096.0000, Penalty: 201151397888.0000
Step 500, Loss: 471950784.0000, Penalty: 9438972.0000
Step 1000, Loss: 206894592.0000, Penalty: 4137863.0000
Step 1500, Loss: 87330488.0000, Penalty: 1746591.2500
Step 2000, Loss: 31845030.0000, Penalty: 636889.3750
Step 2500, Loss: 12660110.0000, Penalty: 253195.0938
macd: 3.0331
macd_diff: 2.5697
macd_signal: 2.3900
donchian_low: 2.1621
roc: 2.0951
count: 2.0629
bb_bbh: 2.0156
cci: 1.9881
close: 1.9811
bb_bbl: 1.8934
mfi: 1.8632
stoch_d: 1.8203
taker_buy_volume: 1.7605
atr: 1.6771
high: 1.6709
williams_r: 1.6636
bb_bbm: 1.6472
obv: 1.6449
rsi: 1.6324
adx: 1.6195
ema_20: 1.6076
sma_20: 1.5381
low: 1.5043
ignore: 1.4860
vwap: 1.4759
taker_buy_quote_volume: 1.4711
donchian_high: 1.4395
open: 1.4065
volume: 1.2928
quote_volume: 1.2597
stoch_k: 1.0160
