In [1]:
# This is necessary to recognize the modules
import os
import sys
from decimal import Decimal
import warnings

warnings.filterwarnings("ignore")

root_path = os.path.abspath(os.path.join(os.getcwd(), '../..'))
sys.path.append(root_path)

In [2]:
from core.data_sources.clob import CLOBDataSource

# Get trading rules and candles
clob = CLOBDataSource()

In [3]:
clob.load_candles_cache(root_path)

In [4]:
connector_name = "binance"
trading_pair = "BTC-USDT"
interval = "1s"

candles = clob.candles_cache[(connector_name, trading_pair, interval)]

In [5]:
import pandas as pd

df_with_features = pd.read_parquet(os.path.join(root_path, "data", "features_df", f"{connector_name}|{trading_pair}|{interval}.parquet"))
df_with_features.head()


Unnamed: 0_level_0,quote_asset_volume,n_trades,target,close_type,BBL_20_2.0,BBM_20_2.0,BBU_20_2.0,BBB_20_2.0,BBP_20_2.0,BBL_50_2.0,...,STOCHk_14_3_3,STOCHd_14_3_3,ADX_14,DMP_14,DMN_14,open_ret,high_ret,low_ret,close_ret,buy_volume_ratio
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-02-11 17:01:02,-0.18944512,-0.40347281,-0.95883943,-1,1.55863153,1.68980434,1.80637557,1.45505462,1.36770935,1.48558364,...,1.16438408,1.19122848,0.56876921,1.7271614,-1.6308477,2.80110673,0.01835593,2.66681169,0.02211746,-1.34680264
2025-02-11 17:01:03,-0.21373543,-0.43433341,-0.94254878,-1,1.55841478,1.70893998,1.84454824,1.66967107,1.19163722,1.47259659,...,1.16438408,1.19122848,0.67734981,1.7271614,-1.6308477,0.02060666,0.01835593,0.02343565,0.02211746,1.01773425
2025-02-11 17:01:04,-0.19984956,-0.41118796,-0.92643581,-1,1.56479542,1.7282912,1.87660651,1.81348977,1.0571018,1.46149103,...,1.16424133,1.19117978,0.7769559,1.72628955,-1.6299792,0.01685488,0.01835593,0.01987129,0.02025083,-1.33838811
2025-02-11 17:01:05,-0.04194927,-0.40347281,-0.91047395,-1,1.57642777,1.74763882,1.90345005,1.89901195,0.94895522,1.46064081,...,1.16409858,1.19108238,0.8685779,1.72628955,-1.6299792,0.01873077,0.01646049,0.02165347,0.02211746,-1.36109773
2025-02-11 17:01:06,0.05979258,1.00839971,-0.90301416,-1,1.5965585,1.76367022,1.91528846,1.85350163,0.60365635,1.46525588,...,1.03219915,1.14598865,0.79332454,1.00007267,-0.90737594,0.01873077,0.02025136,-1.6232961,-1.70077466,-1.35312485


In [None]:
# Import required libraries for modeling
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Prepare features and target
feature_columns = [col for col in df_with_features.columns if col not in ['timestamp', 'tl', 'stop_loss_time', 
                                                                         'take_profit_time', 'close_time', 'close_type',
                                                                         'real_class', 'ret']]
X = df_with_features[feature_columns]
y = df_with_features['close_type']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Initialize Random Forest with optimized parameters for financial data
rf_model = RandomForestClassifier(
    n_estimators=200,          # More trees for better stability
    max_depth=10,              # Limit depth to prevent overfitting
    min_samples_split=10,      # Minimum samples required to split
    min_samples_leaf=5,        # Minimum samples in leaf nodes
    max_features='sqrt',       # Standard practice for classification
    class_weight='balanced',   # Handle class imbalance
    random_state=42,           # For reproducibility
    n_jobs=-1                  # Use all available cores
)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Print model performance
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
