In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

# Paths
PROJECT_DIR = Path.cwd().parent
DATA_DIR    = PROJECT_DIR / 'data'
FEATURES_DIR= DATA_DIR / 'features'

# Loading the merged DataFrame from 02_eda
df = pd.read_parquet(DATA_DIR / 'cleaned_merged_trades_sentiment.parquet')

In [2]:
df.head()

Unnamed: 0,account,coin,execution_price,size_tokens,size_usd,side,trade_time,start_position,direction,closed_pnl,transaction_hash,order_id,crossed,fee,trade_id,trade_date,sentiment_day,sentiment_value,sentiment_cat
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,2024-12-02 22:50:00,0.0,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.345404,895000000000000.0,2024-12-02,2024-12-02,80.0,Extreme Greed
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,2024-12-02 22:50:00,986.524596,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0056,443000000000000.0,2024-12-02,2024-12-02,80.0,Extreme Greed
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,2024-12-02 22:50:00,1002.518996,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,660000000000000.0,2024-12-02,2024-12-02,80.0,Extreme Greed
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,2024-12-02 22:50:00,1146.558564,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050043,1080000000000000.0,2024-12-02,2024-12-02,80.0,Extreme Greed
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,2024-12-02 22:50:00,1289.488521,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.003055,1050000000000000.0,2024-12-02,2024-12-02,80.0,Extreme Greed


### Sentiment based features

In [3]:
# Build a daily sentiment DataFrame
daily_sent = (
    df[['trade_date','sentiment_value']]
      .drop_duplicates()               
      .set_index('trade_date')
      .sort_index()
)

# Lagged sentiment, yesterday’s score
daily_sent['sent_lag_1'] = daily_sent['sentiment_value'].shift(1)

# Rolling mean & std over past 7 days
daily_sent['sent_roll7_mean'] = daily_sent['sentiment_value'].rolling(7, min_periods=1).mean()
daily_sent['sent_roll7_std']  = daily_sent['sentiment_value'].rolling(7, min_periods=1).std().fillna(0)

# Rolling volatility proxy, daily change
daily_sent['sent_diff_1'] = daily_sent['sentiment_value'].diff(1).fillna(0)

# Merge back onto trades
df = df.merge(
    daily_sent.add_prefix('d_'),       # columns: d_sentiment_value, d_sent_lag_1, etc.
    left_on='trade_date',
    right_index=True,
    how='left'
)


### Trade level features

In [4]:
# Binary target: win
df['is_win'] = (df['closed_pnl'] > 0).astype(int)

# PnL per USD traded
df['pnl_per_usd'] = df['closed_pnl'] / df['size_usd']

# Trade direction as numeric
df['side_buy'] = (df['side'] == 'BUY').astype(int)
df['side_sell']= (df['side'] == 'SELL').astype(int)

### Trader level aggregates

In [5]:
acct_feats = (
    df.groupby('account')
      .agg(
          acct_avg_size    = ('size_usd', 'mean'),
          acct_win_rate    = ('is_win', 'mean'),
          acct_trade_count = ('trade_id','nunique'),
          acct_avg_pnl     = ('closed_pnl','mean'),
      )
      .reset_index()
)

# Merge back
df = df.merge(acct_feats, on='account', how='left')

### Finalize & save

In [6]:
# final feature columns + target
feature_cols = [
    # sentiment features
    'd_sentiment_value','d_sent_lag_1','d_sent_roll7_mean','d_sent_roll7_std','d_sent_diff_1',
    # trade features
    'size_usd','execution_price','side_buy','side_sell','pnl_per_usd',
    # account features
    'acct_avg_size','acct_win_rate','acct_trade_count','acct_avg_pnl'
]

target_cols = ['is_win', 'closed_pnl'] 

df_features = df[feature_cols + target_cols].dropna()

# Save
df_features.to_parquet(FEATURES_DIR / 'trade_features.parquet', index=False)
print("Features saved:", FEATURES_DIR / 'trade_features.parquet')


Features saved: H:\Portfilios\JuniorDataScientist\TradeAnalysis\Trade-Analysis\data\features\trade_features.parquet
