In [1]:
from pathlib import Path
import os
import sys

# Detect project root (parent of notebooks directory)
PROJECT_ROOT = Path.cwd()
if PROJECT_ROOT.name == "notebooks":
    PROJECT_ROOT = PROJECT_ROOT.parent

os.chdir(PROJECT_ROOT)

# Add src path for module imports
sys.path.append(str(PROJECT_ROOT / "src"))

print("Running from:", PROJECT_ROOT)


Running from: c:\Users\devme\OneDrive\Desktop\trader-behavior-insights


In [2]:
import numpy as np
import pandas as pd

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GroupKFold

from src.data_utils import read_parquet
from src.modeling_advanced import optuna_lgbcv, train_lstm


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df = read_parquet("trades_processed.parquet")
df['target'] = (df['closedpnl'] > 0).astype(int)

features = ['score','score_3d','score_7d','leverage','log_notional','time_of_day']
df = df.dropna(subset=features + ['target'])
df.head()


Unnamed: 0,account,symbol,execution_price,size,size_usd,side,start_position,direction,closedpnl,transaction_hash,...,avg_return_100,pnl_stability_100,conviction,log_notional,price_ret,volatility,volatility_bucket,sentiment_vol_interaction,risk_per_trade,target
0,0x28736f43f1e871e6aa8b1148d38d4994275d72c4,@1,43.46,1.14,49.54,SELL,3.558754,Sell,17.328997,0xa55ea020fc4e3e153b530418b99da001b6006b1370ab...,...,0.107266,0.252152,0.009549,3.922852,,0.0,0,0.0,0.980215,1
1,0x28736f43f1e871e6aa8b1148d38d4994275d72c4,@1,96.099,1.11,106.67,SELL,1.117281,Sell,-47.982752,0xc78a1a36e263ac325c5c041d01c85f02062f00456577...,...,0.015866,0.061143,0.003565,4.67907,1.211206,0.0,0,0.0,1.348423,0
2,0x28736f43f1e871e6aa8b1148d38d4994275d72c4,@1,79.36224,0.007281,0.58,SELL,0.007281,Spot Dust Conversion,0.0,0x00000000000000000000000000000000000000000000...,...,0.03011,0.050965,6.2e-05,0.456036,-0.174162,0.979603,3,25.469669,0.010848,0
34,0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891,@10,0.00027,59732.0,16.14,SELL,138107.968,Sell,0.0,0x91d7871b0da61c3e8b1b040ec1d8b601ae00be30e645...,...,0.318002,0.463767,0.314721,2.841495,,0.0,0,0.0,0.941662,0
35,0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891,@10,0.000269,78375.0,21.12,SELL,78375.96803,Sell,-0.063484,0x91d7871b0da61c3e8b1b040ec1d8b601ae00be30e645...,...,0.301106,0.456678,0.426114,3.096292,-0.002997,0.0,0,0.0,1.075767,0


In [4]:
X = df[features]
y = df['target']
groups = df['account']


In [5]:
study = optuna_lgbcv(
    X, y, groups,
    n_trials=40
)

print("Best AUC:", study.best_value)
print("Best Params:", study.best_params)


[I 2025-11-26 00:52:44,488] A new study created in memory with name: no-name-3c681386-b6a1-4e5e-9d0d-ce7d502494d7
[I 2025-11-26 00:52:45,999] Trial 0 finished with value: 0.595271870002153 and parameters: {'num_leaves': 25, 'learning_rate': 0.08536425281986118, 'feature_fraction': 0.7502731670503116, 'bagging_fraction': 0.7584872923762078, 'bagging_freq': 1, 'lambda_l1': 0.008872066917432633, 'lambda_l2': 2.602611528655456e-07, 'min_data_in_leaf': 100}. Best is trial 0 with value: 0.595271870002153.
[I 2025-11-26 00:52:49,065] Trial 1 finished with value: 0.5822909632606962 and parameters: {'num_leaves': 200, 'learning_rate': 0.006439619881976047, 'feature_fraction': 0.7904854431815375, 'bagging_fraction': 0.8655258812584687, 'bagging_freq': 7, 'lambda_l1': 8.210854617100498, 'lambda_l2': 1.0941677343891663e-08, 'min_data_in_leaf': 142}. Best is trial 0 with value: 0.595271870002153.
[I 2025-11-26 00:52:50,779] Trial 2 finished with value: 0.5899018463358698 and parameters: {'num_leave

Best AUC: 0.6046420511786149
Best Params: {'num_leaves': 76, 'learning_rate': 0.014374316602835472, 'feature_fraction': 0.8199230084235357, 'bagging_fraction': 0.8224255227083129, 'bagging_freq': 1, 'lambda_l1': 0.8263897189664671, 'lambda_l2': 1.5793288038446045, 'min_data_in_leaf': 299}


In [6]:
seq_features = ['score','score_3d','leverage','log_notional','time_of_day']

# for speed, sample subset
df_small = df.sample(n=min(60000, len(df)), random_state=42)
lstm_model = train_lstm(
    df_small,
    seq_features,
    seq_len=20,
    epochs=6,
    batch_size=128,
    device='cpu'
)


Epoch 1 TrainLoss=0.6498 ValAUC=0.6479519324895797
Epoch 2 TrainLoss=0.6265 ValAUC=0.6853937401246204
Epoch 3 TrainLoss=0.6094 ValAUC=0.6947134961359068
Epoch 4 TrainLoss=0.6005 ValAUC=0.7057452087205178
Epoch 5 TrainLoss=0.5962 ValAUC=0.7003840284777947
Epoch 6 TrainLoss=0.5949 ValAUC=0.705089763522428


In [7]:
from src.backtest import apply_size_scaling, metrics_from_daily_series

df['time'] = pd.to_datetime(df['time'])
df = df.set_index('time')

# Example: reduce size during extreme fear
df['extreme_fear'] = df['score'] <= 20

counterfactual = apply_size_scaling(df, 'extreme_fear', 0.5)
metrics = metrics_from_daily_series(counterfactual['closedpnl_scaled'])

metrics


{'total_pnl': 3622932.9933275003,
 'max_drawdown': -122672.000883,
 'daily_mean': 4267.294456216137,
 'daily_vol': 40099.13168328014}

In [9]:
import os
import joblib

# Create directory if not exists
os.makedirs("models", exist_ok=True)

# Save Optuna study
joblib.dump(study, "models/lightgbm_optuna.pkl")

# Save LSTM weights
joblib.dump(lstm_model.state_dict(), "models/lstm_state_dict.pt")

print("Models saved successfully.")


Models saved successfully.
