In [1]:
from pathlib import Path
import os
import sys
import pandas as pd
import numpy as np

# Detect project root (parent of /notebooks directory)
PROJECT_ROOT = Path.cwd()
if PROJECT_ROOT.name == "notebooks":
    PROJECT_ROOT = PROJECT_ROOT.parent

os.chdir(PROJECT_ROOT)
sys.path.append(str(PROJECT_ROOT / "src"))

from src.data_utils import load_raw_trades, load_raw_fg, save_parquet
from src.feature_engineering import feature_pipeline

pd.set_option("display.max_columns", 200)

RAW = PROJECT_ROOT / "data" / "raw"
PROCESSED = PROJECT_ROOT / "data" / "processed"
PROCESSED.mkdir(parents=True, exist_ok=True)

print("✔ Working directory:", PROJECT_ROOT)
print("✔ Raw data path:", RAW)
print("✔ Processed path:", PROCESSED)


✔ Working directory: c:\Users\devme\OneDrive\Desktop\trader-behavior-insights
✔ Raw data path: c:\Users\devme\OneDrive\Desktop\trader-behavior-insights\data\raw
✔ Processed path: c:\Users\devme\OneDrive\Desktop\trader-behavior-insights\data\processed


In [2]:
trades = load_raw_trades("historical.csv")
fg = load_raw_fg("fear_greed_index.csv")

print("✔ Trades loaded:", trades.shape)
print("✔ Fear/Greed loaded:", fg.shape)
trades.head()


✔ Trades loaded: (211224, 16)
✔ Fear/Greed loaded: (2644, 5)


Unnamed: 0,account,symbol,execution_price,size,size_usd,side,start_position,direction,closedpnl,transaction_hash,order_id,crossed,fee,trade_id,timestamp,time
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,0.0,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.345404,895000000000000.0,1730000000000.0,2024-02-12 22:50:00+00:00
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,986.524596,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0056,443000000000000.0,1730000000000.0,2024-02-12 22:50:00+00:00
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,1002.518996,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,660000000000000.0,1730000000000.0,2024-02-12 22:50:00+00:00
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,1146.558564,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050043,1080000000000000.0,1730000000000.0,2024-02-12 22:50:00+00:00
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,1289.488521,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.003055,1050000000000000.0,1730000000000.0,2024-02-12 22:50:00+00:00


In [3]:
trades_fe = feature_pipeline(trades)

print("✔ After feature pipeline:", trades_fe.shape)
trades_fe.head()


✔ After feature pipeline: (211224, 52)


Unnamed: 0,account,symbol,execution_price,size,size_usd,side,start_position,direction,closedpnl,transaction_hash,order_id,crossed,fee,trade_id,timestamp,time,notional,return_pct,win,trade_date,time_of_day,weekday,weekend,leverage,date,score,classification,date_minus1,date_plus1,score_minus1,classification_minus1,score_plus1,classification_plus1,score_3d,score_7d,sentiment_shift,winrate_10,avg_return_10,pnl_stability_10,winrate_30,avg_return_30,pnl_stability_30,winrate_100,avg_return_100,pnl_stability_100,conviction,log_notional,price_ret,volatility,volatility_bucket,sentiment_vol_interaction,risk_per_trade
132706,0x28736f43f1e871e6aa8b1148d38d4994275d72c4,@1,43.46,1.14,49.54,SELL,3.558754,Sell,17.328997,0xa55ea020fc4e3e153b530418b99da001b6006b1370ab...,53644285595,True,0.01734,674000000000000.0,1730000000000.0,2024-08-12 18:00:00+00:00,49.5444,0.349767,1,2024-08-12,18.0,0.0,0,1,2024-08-12,25.0,Fear,2024-08-11,2024-08-13,39.0,Fear,31.0,Fear,34.666667,31.142857,3.52381,0.4,0.112294,0.243525,0.466667,0.045205,0.144445,0.41,0.107266,0.252152,0.009549,3.922852,,0.0,0,0.0,0.980215
138994,0x28736f43f1e871e6aa8b1148d38d4994275d72c4,@1,96.099,1.11,106.67,SELL,1.117281,Sell,-47.982752,0xc78a1a36e263ac325c5c041d01c85f02062f00456577...,68561054935,True,0.037334,1090000000000000.0,1740000000000.0,2025-02-02 22:20:00+00:00,106.66989,-0.449825,0,2025-02-02,22.333333,6.0,1,1,2025-02-02,60.0,Greed,2025-02-01,2025-02-03,68.0,Greed,44.0,Fear,68.0,69.857143,-1.857143,0.0,-0.044982,0.142247,0.0,-0.014994,0.082126,0.27,0.015866,0.061143,0.003565,4.67907,1.211206,0.0,0,0.0,1.348423
139028,0x28736f43f1e871e6aa8b1148d38d4994275d72c4,@1,79.36224,0.007281,0.58,SELL,0.007281,Spot Dust Conversion,0.0,0x00000000000000000000000000000000000000000000...,68724859046,True,0.0,0.0,1740000000000.0,2025-03-02 05:30:00+00:00,0.577808,0.0,0,2025-03-02,5.5,6.0,1,1,2025-03-02,26.0,Fear,2025-03-01,2025-03-03,20.0,Extreme Fear,33.0,Fear,20.666667,23.857143,-3.190476,0.0,0.0,0.0,0.266667,0.012101,0.022976,0.52,0.03011,0.050965,6.2e-05,0.456036,-0.174162,0.979603,3,25.469669,0.010848
136712,0x28736f43f1e871e6aa8b1148d38d4994275d72c4,@1,139.07,0.35,48.67,BUY,0.067648,Buy,0.0,0x0f1f8fbcea995c205a41041b4bd754016200bbc8d409...,62463687548,True,0.000122,262000000000000.0,1740000000000.0,2025-12-01 21:38:00+00:00,48.6745,0.0,0,2025-12-01,21.633333,0.0,0,1,,,,2025-11-30,2025-12-02,,,,,,,,0.4,0.020216,0.042871,0.666667,0.030795,0.034525,0.43,0.016317,0.029478,0.02177,3.905492,0.752345,0.705716,3,,0.929494
131684,0x28736f43f1e871e6aa8b1148d38d4994275d72c4,@1,28.236,1.45,40.94,BUY,0.0,Buy,0.0,0x6fa91405cb831c3389690417fdab5102015300e64106...,51264314212,True,0.000507,253000000000000.0,1730000000000.0,NaT,40.9422,0.0,0,NaT,,,0,1,,,,NaT,NaT,,,,,,,,0.4,0.384797,0.496871,0.533333,0.324762,0.458545,0.51,0.31103,0.455067,0.016448,3.736292,-0.796966,0.904121,3,,0.814256


In [4]:
# Keep dataset large — only enforce core financial values
mandatory = ["execution_price", "size", "closedpnl"]

trades_fe = trades_fe.dropna(subset=mandatory).reset_index(drop=True)

out_path = save_parquet(trades_fe, "trades_processed.parquet")
print("✔ Saved to:", out_path)
print("✔ Final dataset shape:", trades_fe.shape)


✔ Saved to: C:\Users\devme\OneDrive\Desktop\trader-behavior-insights\data\processed\trades_processed.parquet
✔ Final dataset shape: (211224, 52)


In [9]:
df_check = pd.read_parquet(PROCESSED / "trades_processed.parquet")
print(df_check.shape)
df_check.head()


(211224, 50)


Unnamed: 0,account,symbol,execution_price,size,size_usd,side,start_position,direction,closedpnl,transaction_hash,order_id,crossed,fee,trade_id,timestamp,time,notional,return_pct,win,trade_date,hour_of_day,weekday,weekend,leverage,date,score,classification,date_minus1,date_plus1,score_minus1,classification_minus1,score_plus1,classification_plus1,score_3d,score_7d,sentiment_shift,winrate_10,avg_return_10,pnl_stability_10,winrate_30,avg_return_30,pnl_stability_30,winrate_100,avg_return_100,pnl_stability_100,conviction,log_notional,log_fee_ratio,risk_per_trade,sentiment_alignment
0,0x083384f897ee0f19899168e3b1bec365f52a9012,ETH,4041.0,2.4798,10020.87,BUY,-581.0,Close Short,-1451.600526,0x965123c5a360fc8e7f9904189483fb02014700290974...,53292414040,False,1.002087,974000000000000.0,1730000000000.0,2024-06-12 23:15:00+00:00,10020.8718,-0.144858,0,2024-06-12,23.25,2.0,0,1,2024-06-12,72.0,Greed,2024-06-11,2024-06-13,74.0,Greed,70.0,Greed,72.666667,74.285714,-1.619048,0.0,-0.144858,,0.0,-0.144858,,0.0,-0.144858,,1.0,9.212525,0.0001,0.9999,1.0
1,0x083384f897ee0f19899168e3b1bec365f52a9012,ETH,4041.0,3.0,12123.0,BUY,-578.5202,Close Short,-1756.11,0x018550a4b99b91367208041894840302015400d0c7aa...,53292414040,False,1.2123,162000000000000.0,1730000000000.0,2024-06-12 23:15:00+00:00,12123.0,-0.144858,0,2024-06-12,23.25,2.0,0,1,2024-06-12,72.0,Greed,2024-06-11,2024-06-13,74.0,Greed,70.0,Greed,72.666667,74.285714,-1.619048,0.0,-0.144858,1.795766e-15,0.0,-0.144858,1.795766e-15,0.0,-0.144858,1.795766e-15,1.09493,9.402942,0.0001,1.094832,1.0
2,0x083384f897ee0f19899168e3b1bec365f52a9012,ETH,4041.0,6.1858,24996.82,BUY,-575.5202,Close Short,-3620.981746,0x3f43cdd36aebe9c997b4041894842401e100fe4c6755...,53292414040,False,2.499681,514000000000000.0,1730000000000.0,2024-06-12 23:15:00+00:00,24996.8178,-0.144858,0,2024-06-12,23.25,2.0,0,1,2024-06-12,72.0,Greed,2024-06-11,2024-06-13,74.0,Greed,70.0,Greed,72.666667,74.285714,-1.619048,0.0,-0.144858,4.463041e-15,0.0,-0.144858,4.463041e-15,0.0,-0.144858,4.463041e-15,1.59078,10.126544,0.0001,1.590678,1.0
3,0x083384f897ee0f19899168e3b1bec365f52a9012,ETH,4041.0,0.596,2408.44,BUY,-569.3344,Close Short,-348.88052,0xb8f4dd3b553c727a0bf60418948425020147001033d3...,53292414040,False,0.240843,446000000000000.0,1730000000000.0,2024-06-12 23:15:00+00:00,2408.436,-0.144858,0,2024-06-12,23.25,2.0,0,1,2024-06-12,72.0,Greed,2024-06-11,2024-06-13,74.0,Greed,70.0,Greed,72.666667,74.285714,-1.619048,0.0,-0.144858,2.495378e-14,0.0,-0.144858,2.495378e-14,0.0,-0.144858,2.495378e-14,0.194428,7.787148,0.0001,0.194412,1.0
4,0x083384f897ee0f19899168e3b1bec365f52a9012,ETH,4041.0,2.404,9714.56,BUY,-568.7384,Close Short,-1407.22948,0x011c71b5c80b569eb6200418948425020148009e28c5...,53292414040,False,0.971456,694000000000000.0,1730000000000.0,2024-06-12 23:15:00+00:00,9714.564,-0.144858,0,2024-06-12,23.25,2.0,0,1,2024-06-12,72.0,Greed,2024-06-11,2024-06-13,74.0,Greed,70.0,Greed,72.666667,74.285714,-1.619048,0.0,-0.144858,2.191864e-14,0.0,-0.144858,2.191864e-14,0.0,-0.144858,2.191864e-14,0.819605,9.181484,0.0001,0.819536,1.0
