In [1]:
from datetime import date, datetime, timedelta
from core.time_utils import Bounds
from core.exchange import Exchange
from core.currency import Currency
from core.utils import configure_logging

from lightgbm import Booster

import pandas as pd
import numpy as np

configure_logging()

In [20]:
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from models.prediction.pipes.price_prediction import PrimaryPricePrediction


train_bounds: Bounds = Bounds.for_days(
    start_inclusive=date(2025, 5, 1), end_exclusive=date(2025, 5, 20)
)
test_bounds: Bounds = Bounds.for_days(
    start_inclusive=date(2025, 5, 20), end_exclusive=date(2025, 5, 25)
)

pipe = PrimaryPricePrediction(
    train_bounds=train_bounds,
    test_bounds=test_bounds,
    exchange=Exchange.OKX_SPOT,
    target_currencies=[Currency.BTC, Currency.ETH, Currency.TRX, Currency.ADA, Currency.SOL],
    forecast_steps=timedelta(seconds=5)
)

In [None]:
booster: Booster = pipe.build_model_pipeline()

<h4>Build test sample one more time</h4>

In [None]:
from models.prediction.build_sample import BuildDataset
from ml_base.sample import MLDataset
from ml_base.enums import DatasetType


dataset: MLDataset = (
    BuildDataset(
        exchange=Exchange.OKX_SPOT, 
        target_currencies=[Currency.BTC, Currency.ETH, Currency.TRX, Currency.ADA, Currency.SOL],
        forecast_step=timedelta(seconds=5)
    )
    .create_dataset(bounds=test_bounds, ds_type=DatasetType.TEST)
)

In [10]:
y_pred: np.ndarray = booster.predict(dataset.data)

In [24]:
from sklearn.metrics import r2_score, classification_report, accuracy_score

r2_score(y_pred=y_pred, y_true=dataset.label)

0.038923048433340934

In [25]:
y_pred_binary: np.ndarray = (y_pred > 0).astype(int)
y_true_binary: np.ndarray = (dataset.label > 0).astype(int)

print(
    classification_report(y_pred=y_pred_binary, y_true=y_true_binary)
)

              precision    recall  f1-score   support

           0       0.73      0.60      0.66   2855003
           1       0.42      0.57      0.49   1464756

    accuracy                           0.59   4319759
   macro avg       0.58      0.59      0.57   4319759
weighted avg       0.63      0.59      0.60   4319759



In [26]:
accuracy_score(y_pred=y_pred_binary, y_true=y_true_binary)

0.5922853566599433

In [29]:
dataset.label.describe().to_frame()

Unnamed: 0,output
count,4319759.0
mean,0.003442812
std,2.604118
min,-62.68939
25%,-0.5690872
50%,0.0
75%,0.5970149
max,73.10164


In [33]:
booster.feature_importance()

array([ 21,  19,   3,   1,   2,   2,   0,   0,   0,   0,   0,   0,   9,
         0,   1,   0,   0,   5,  15,  16,   1,   2,   0,   1,   0,   0,
         0,   0,   0,   0,  28,   0,   0,   3,   0,  10,  33,  22,   2,
         2,   4,   2,   0,   0,   0,   0,   0,   0,  61,   0,   7,  11,
         0,   6,  60,  23,   5,   6,  10,   9,   0,   0,   0,   0,   0,
         0,  87,   2,   1,  12,   0,   8, 100,  20,   4,  16,  20,  12,
         0,   0,   0,   0,   0,   0, 106,   1,   1,  32,   1,  12, 153,
        15,  13,  23,  12,  24,   0,   0,   0,   0,   0,   0, 127,   0,
         0,  16,   0,  24, 134,   5,  13,  11,   9,  16,   0,   0,   0,
         0,   0,   0, 114,   0,   0,  10,   0,  20,  75,   4,   8,  13,
        14,  17,   0,   0,   0,   0,   0,   0,  83,   0,   0,  14,   0,
        12,  61,   5,   7,  21,  13,  18,   0,   0,   0,   0,   0,   0,
        44,   0,   0,  24,   0,  13,  14,   5,   1,   0,   1,   7,   0,
         0,   0,   0,   0,   0,  14,   3,   0,   1,   0,   4,   

In [32]:
is_btc = dataset.data["currency_index"] == Currency.BTC.value
dataset.data.head(10)

Unnamed: 0,SELF-asset_return-500MS@BINANCE_SPOT,SELF-slippage_imbalance-500MS@BINANCE_SPOT,SELF-flow_imbalance_500MS@BINANCE_SPOT,SELF-powerlaw_alpha-500MS@BINANCE_SPOT,SELF-share_of_long_trades-500MS@BINANCE_SPOT,SELF-sigma-500MS@BINANCE_SPOT,SELF-asset_return-500MS@BINANCE_USDM,SELF-slippage_imbalance-500MS@BINANCE_USDM,SELF-flow_imbalance_500MS@BINANCE_USDM,SELF-powerlaw_alpha-500MS@BINANCE_USDM,...,ETH-powerlaw_alpha-300S@BINANCE_USDM,ETH-share_of_long_trades-300S@BINANCE_USDM,ETH-sigma-300S@BINANCE_USDM,ETH-asset_return-300S@OKX_SPOT,ETH-slippage_imbalance-300S@OKX_SPOT,ETH-flow_imbalance_300S@OKX_SPOT,ETH-powerlaw_alpha-300S@OKX_SPOT,ETH-share_of_long_trades-300S@OKX_SPOT,ETH-sigma-300S@OKX_SPOT,currency_index
1,0.0,,-0.972905,1.394691,0.5,0.005774,0.0,,-0.972905,1.394691,...,,,,0.0,,,,,,1
2,0.0,,1.0,1.424687,1.0,0.0,0.0,,1.0,1.424687,...,,,,0.0,,,,,,1
3,-0.001838,,-1.0,inf,0.0,,-0.001838,,-1.0,inf,...,,,,0.0,,,,,,1
4,0.0,,,,,,0.0,,,,...,,,,0.0,,,,,,1
5,0.000431,,1.0,6.831843,1.0,0.0,0.000431,,1.0,6.831843,...,,,,0.0,,,,,,1
6,0.0,,1.0,1.332798,1.0,0.0,0.0,,1.0,1.332798,...,,,,0.0,,,,,,1
7,0.0,,1.0,inf,1.0,,0.0,,1.0,inf,...,,,,0.0,,,,,,1
8,0.0,,1.0,inf,1.0,,0.0,,1.0,inf,...,,,,0.0,,,,,,1
9,0.0,,0.243564,2.257128,0.666667,0.005774,0.0,,0.243564,2.257128,...,,,,0.0,,,,,,1
10,0.431522,1.0,1.0,1.274848,1.0,2.039164,0.431522,1.0,1.0,1.274848,...,,,,0.0,,,,,,1


In [27]:
import plotly.graph_objects as go


fig: go.Figure = go.Figure()
fig.add_trace(
    go.Scatter(x=data)
)