In [6]:
import torch
import torch.optim as optim
import sys
sys.path.append('../src') 
from model import SphericalRNN
from trainer import train_epoch
import pandas as pd
import warnings
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import os
import random
from tqdm import tqdm

warnings.filterwarnings("ignore")

In [5]:
def get_features(path="../data/xrp_futures.csv", n_timesteps=50):
    df = pd.read_csv(path)
    x = (df.close.values - df.open.values) / df.open.values
    x = (x - x.mean()) / x.std()
    samples = []
    prices = []
    features = []
    for i in tqdm(range(n_timesteps, len(df))):
        samples.append(np.array([x[i - n_timesteps : i]]).reshape(-1, 1))
        Open, High, Low, Close = df.iloc[i - n_timesteps : i, 2:6].values.T
        # total_delta, volatility_per_step, volatility_total
        features.append(
            [
                (Close[-1] - Open[0]) / Open[0],
                np.mean(((High - Low) / Open)),
                Close.std() / Open[0],
            ]
        )
    return torch.tensor(samples), np.array(features), prices


x_train, features, prices = get_features(n_timesteps=50)

100%|██████████| 59540/59540 [00:07<00:00, 7632.97it/s]


In [7]:
def set_seed(seed: int):
    # 1. Python built-in random
    random.seed(seed)

    # 2. NumPy
    np.random.seed(seed)

    # 3. Torch (CPU)
    torch.manual_seed(seed)

    # 4. Torch (all GPUs, if you’re using CUDA)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    # 5. cuDNN – for reproducibility (but may slow things down)
    #    * deterministic = True makes operations deterministic
    #    * benchmark = False stops cuDNN from trying to find
    #      the fastest algorithm (which can introduce randomness)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # 6. (Optional) Force a fixed hash seed for Python >=3.3
    os.environ["PYTHONHASHSEED"] = str(seed)


# Usage
set_seed(42)

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"

n_clusters = 3
dim_hidden = 16
n_layers = 0
model = (
    SphericalRNN(
        input_size=1,
        hidden_size=6,
        embedding_dim=4,
        n_layers=1,
        mlp_hidden_dims=[dim_hidden for i in range(n_layers)],
    )
    .to(dtype=torch.float64, device=device)
    .eval(),
)[0]
torch.manual_seed(42)
lr = 1e-4
n_epochs = 3
batch_size = 32
optimizer = optim.Adam(model.parameters(), lr=lr)
for i in range(n_epochs):
    train_epoch(
        model=model,
        x_train=x_train,
        optimizer=optimizer,
        device=device,
        n_clusters=n_clusters,
        batch_size=batch_size,
    )
model.eval()

SphericalRNN(
  (lstm): LSTM(1, 6, batch_first=True)
  (mlp): Sequential(
    (0): Linear(in_features=6, out_features=4, bias=True)
  )
)

In [9]:
kmeans = KMeans(n_clusters=n_clusters, random_state=0)
z = model(x_train.to(device=device)).cpu()
labels = kmeans.fit_predict(z.detach().cpu().numpy())
labels[:10]

array([1, 1, 0, 1, 0, 1, 1, 1, 1, 1], dtype=int32)

In [10]:
res_data = []
for i in range(n_clusters):
    res_data.append(np.mean(features[labels == i], axis=0))

pd.DataFrame(
    data=res_data, columns=["total_delta", "volatility_per_step", "volatility_total"]
)

Unnamed: 0,total_delta,volatility_per_step,volatility_total
0,0.005254,0.00768,0.011827
1,-0.000447,0.008703,0.014264
2,0.02633,0.011764,0.021689


In [11]:
import torch
import gc

# Clear unused memory from Python and CUDA
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()