# Experimenting with Tree Based Models

## Imports

In [1]:
import sys
from pathlib import Path
sys.path.append(str(Path("..").resolve()))

from live_trader.ml_model.utils import ProbabilisticClassifier
from live_trader.ml_model import ML_Pipeline

import numpy as np


2026-01-15 16:13:38.894315: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2026-01-15 16:13:38.960286: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-01-15 16:13:40.194937: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
  if not hasattr(np, "object"):


In [2]:
# Scikit-learn
from sklearn.ensemble import RandomForestClassifier

# Other models
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier


## Modelling

### Random Forest

In [3]:
def build_rf(_: np.ndarray) -> ProbabilisticClassifier:
    model = RandomForestClassifier(
        n_estimators=300,
        max_depth=5,
        min_samples_leaf=20,
        class_weight="balanced",
        random_state=42,
        n_jobs=-1
    )
    return model


### LightGBM

In [4]:
def buildLGBM(_: np.ndarray) -> ProbabilisticClassifier:
    return LGBMClassifier(
            n_estimators=500,
            learning_rate=0.05,
            max_depth=5,
            num_leaves=31,
            subsample=0.8,
            colsample_bytree=0.8,
            class_weight="balanced",
            random_state=42
        )

### XGBoost

In [5]:
def buildXGB(_: np.ndarray) -> ProbabilisticClassifier:
    return XGBClassifier(
            n_estimators=500,
            learning_rate=0.05,
            max_depth=4,
            subsample=0.8,
            colsample_bytree=0.8,
            eval_metric="logloss",
            random_state=42,
            n_jobs=-1
        )

### CatBoost

In [6]:
def build_catboost(_: np.ndarray) -> ProbabilisticClassifier:
    """
    Construct an untrained CatBoost binary classifier.

    This model is compatible with:
    - scikit-learn style training (.fit)
    - predict_proba for probability extraction
    - joblib serialization
    - the ProbabilisticClassifier protocol

    Notes:
        - Silent mode disables CatBoost's verbose logging.
        - Class weights are automatically balanced.
        - Early stopping is handled during training if desired.
    """

    model = CatBoostClassifier(
        iterations=500,
        depth=6,
        learning_rate=0.05,
        loss_function="Logloss",
        eval_metric="AUC",
        auto_class_weights="Balanced",
        random_seed=42,
        verbose=False,
    )

    return model

## Testing
with only SPY

In [7]:
symbol = "spy"

In [8]:
side, _ = await ML_Pipeline(build_rf, symbol, {})
print(f"{symbol}: {side}")

spy: SideSignal.HOLD


In [9]:
side, _ = await ML_Pipeline(buildLGBM, symbol, {})
print(f"{symbol}: {side}")

[LightGBM] [Info] Number of positive: 66, number of negative: 45
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000093 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 543
[LightGBM] [Info] Number of data points in the train set: 111, number of used features: 14
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=-0.000000
[LightGBM] [Info] Start training from score -0.000000




spy: SideSignal.BUY




In [10]:
side, _ = await ML_Pipeline(buildXGB, symbol, {})
print(f"{symbol}: {side}")

spy: SideSignal.BUY


In [11]:
side, _ = await ML_Pipeline(build_catboost, symbol, {})
print(f"{symbol}: {side}")

spy: SideSignal.BUY
