In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sentiment_df = pd.read_csv("../data/fear_greed_index.csv")
trader_df = pd.read_csv("../data/historical_data.csv")

In [4]:
print("Sentiment shape:", sentiment_df.shape)
print("Trader shape:", trader_df.shape)

Sentiment shape: (2644, 4)
Trader shape: (211224, 16)


In [5]:
sentiment_df.head()

Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05


In [6]:
trader_df.head()

Unnamed: 0,Account,Coin,Execution Price,Size Tokens,Size USD,Side,Timestamp IST,Start Position,Direction,Closed PnL,Transaction Hash,Order ID,Crossed,Fee,Trade ID,Timestamp
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,02-12-2024 22:50,0.0,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.345404,895000000000000.0,1730000000000.0
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,02-12-2024 22:50,986.524596,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0056,443000000000000.0,1730000000000.0
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,02-12-2024 22:50,1002.518996,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,660000000000000.0,1730000000000.0
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,02-12-2024 22:50,1146.558564,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050043,1080000000000000.0,1730000000000.0
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,02-12-2024 22:50,1289.488521,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.003055,1050000000000000.0,1730000000000.0


In [7]:
sentiment_df.columns = sentiment_df.columns.str.lower().str.strip()
trader_df.columns = (
    trader_df.columns
    .str.lower()
    .str.strip()
    .str.replace(" ", "_")
)


In [8]:
print("Sentiment Columns:")
print(sentiment_df.columns)

Sentiment Columns:
Index(['timestamp', 'value', 'classification', 'date'], dtype='object')


In [9]:
print("\nTrader Columns:")
print(trader_df.columns)


Trader Columns:
Index(['account', 'coin', 'execution_price', 'size_tokens', 'size_usd', 'side',
       'timestamp_ist', 'start_position', 'direction', 'closed_pnl',
       'transaction_hash', 'order_id', 'crossed', 'fee', 'trade_id',
       'timestamp'],
      dtype='object')


In [10]:
# Convert sentiment date
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'], errors='coerce')

# Keep only date (remove time if any)
sentiment_df['date'] = sentiment_df['date'].dt.date

sentiment_df[['date','classification']].head()

Unnamed: 0,date,classification
0,2018-02-01,Fear
1,2018-02-02,Extreme Fear
2,2018-02-03,Fear
3,2018-02-04,Extreme Fear
4,2018-02-05,Extreme Fear


In [11]:
# Convert trader timestamp (IST)
trader_df['timestamp_ist'] = pd.to_datetime(
    trader_df['timestamp_ist'],
    format='%d-%m-%Y %H:%M',
    errors='coerce'
)

# Create date column
trader_df['date'] = trader_df['timestamp_ist'].dt.date

trader_df[['timestamp_ist','date']].head()

Unnamed: 0,timestamp_ist,date
0,2024-12-02 22:50:00,2024-12-02
1,2024-12-02 22:50:00,2024-12-02
2,2024-12-02 22:50:00,2024-12-02
3,2024-12-02 22:50:00,2024-12-02
4,2024-12-02 22:50:00,2024-12-02


In [12]:
print("Sentiment date range:",
      sentiment_df['date'].min(),
      sentiment_df['date'].max())

print("Trader date range:",
      trader_df['date'].min(),
      trader_df['date'].max())

Sentiment date range: 2018-02-01 2025-05-02
Trader date range: 2023-05-01 2025-05-01


In [13]:
daily_pnl = (
    trader_df
    .groupby(['date', 'account'])['closed_pnl']
    .sum()
    .reset_index()
)

daily_pnl.head()

Unnamed: 0,date,account,closed_pnl
0,2023-05-01,0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891,0.0
1,2023-12-05,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.0
2,2023-12-14,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,-205.434737
3,2023-12-15,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,-24.632034
4,2023-12-16,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.0


In [14]:
trader_df['win'] = np.where(trader_df['closed_pnl'] > 0, 1, 0)

trader_df[['closed_pnl','win']].head()

Unnamed: 0,closed_pnl,win
0,0.0,0
1,0.0,0
2,0.0,0
3,0.0,0
4,0.0,0


In [15]:
win_rate = (
    trader_df
    .groupby(['date', 'account'])['win']
    .mean()
    .reset_index()
    .rename(columns={'win': 'win_rate'})
)

win_rate.head()

Unnamed: 0,date,account,win_rate
0,2023-05-01,0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891,0.0
1,2023-12-05,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.0
2,2023-12-14,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.363636
3,2023-12-15,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.0
4,2023-12-16,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.0


In [16]:
avg_trade_size = (
    trader_df
    .groupby(['date', 'account'])['size_usd']
    .mean()
    .reset_index()
    .rename(columns={'size_usd': 'avg_trade_size'})
)

avg_trade_size.head()

Unnamed: 0,date,account,avg_trade_size
0,2023-05-01,0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891,159.0
1,2023-12-05,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,5556.203333
2,2023-12-14,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,10291.213636
3,2023-12-15,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,5304.975
4,2023-12-16,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,5116.256667


In [17]:
merged_df = daily_pnl.merge(
    win_rate,
    on=['date','account'],
    how='left'
)

merged_df = merged_df.merge(
    avg_trade_size,
    on=['date','account'],
    how='left'
)

merged_df.head()

Unnamed: 0,date,account,closed_pnl,win_rate,avg_trade_size
0,2023-05-01,0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891,0.0,0.0,159.0
1,2023-12-05,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.0,0.0,5556.203333
2,2023-12-14,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,-205.434737,0.363636,10291.213636
3,2023-12-15,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,-24.632034,0.0,5304.975
4,2023-12-16,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.0,0.0,5116.256667


In [18]:
sentiment_df.head()

Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05


In [19]:
sentiment_map = {
    'Extreme Fear': -2,
    'Fear': -1,
    'Neutral': 0,
    'Greed': 1,
    'Extreme Greed': 2
}

sentiment_df['sentiment_score'] = sentiment_df['classification'].map(sentiment_map)

In [20]:
final_df = merged_df.merge(
    sentiment_df[['date', 'sentiment_score']],
    on='date',
    how='left'
)

final_df.head()

Unnamed: 0,date,account,closed_pnl,win_rate,avg_trade_size,sentiment_score
0,2023-05-01,0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891,0.0,0.0,159.0,1.0
1,2023-12-05,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.0,0.0,5556.203333,2.0
2,2023-12-14,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,-205.434737,0.363636,10291.213636,1.0
3,2023-12-15,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,-24.632034,0.0,5304.975,1.0
4,2023-12-16,0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23,0.0,0.0,5116.256667,1.0


In [21]:
final_df = final_df.dropna(subset=['sentiment_score'])

In [22]:
final_df['sentiment_score'].value_counts()

sentiment_score
 1.0    648
-1.0    630
 2.0    526
 0.0    376
-2.0    160
Name: count, dtype: int64

In [23]:
correlation = final_df[['sentiment_score', 'closed_pnl']].corr()
correlation

Unnamed: 0,sentiment_score,closed_pnl
sentiment_score,1.0,-0.00641
closed_pnl,-0.00641,1.0


In [24]:
sentiment_pnl = (
    final_df
    .groupby('sentiment_score')['closed_pnl']
    .mean()
    .reset_index()
)

sentiment_pnl

Unnamed: 0,sentiment_score,closed_pnl
0,-2.0,4619.439053
1,-1.0,5328.818161
2,0.0,3438.618818
3,1.0,3318.10073
4,2.0,5161.922644


In [25]:
trader_sentiment_perf = (
    final_df
    .groupby(['account', 'sentiment_score'])['closed_pnl']
    .mean()
    .reset_index()
)

trader_sentiment_perf.head()

Unnamed: 0,account,sentiment_score,closed_pnl
0,0x083384f897ee0f19899168e3b1bec365f52a9012,-2.0,62384.610721
1,0x083384f897ee0f19899168e3b1bec365f52a9012,-1.0,222674.709503
2,0x083384f897ee0f19899168e3b1bec365f52a9012,0.0,25130.023348
3,0x083384f897ee0f19899168e3b1bec365f52a9012,1.0,92239.756729
4,0x083384f897ee0f19899168e3b1bec365f52a9012,2.0,-4475.8151


In [26]:
overall_trader_perf = (
    final_df
    .groupby('account')
    .agg({
        'closed_pnl': 'mean',
        'win_rate': 'mean'
    })
    .reset_index()
)

overall_trader_perf.head()

Unnamed: 0,account,closed_pnl,win_rate
0,0x083384f897ee0f19899168e3b1bec365f52a9012,66676.242499,0.352333
1,0x23e7a7f8d14b550961925fbfdaa92f5d195ba5bd,920.871548,0.397134
2,0x271b280974205ca63b716753467d5a371de622ab,-5869.68261,0.379952
3,0x28736f43f1e871e6aa8b1148d38d4994275d72c4,817.68404,0.452509
4,0x2c229d22b100a7beb69122eed721cee9b24011dd,2444.318913,0.498708


In [27]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

overall_trader_perf['pnl_norm'] = scaler.fit_transform(
    overall_trader_perf[['closed_pnl']]
)

overall_trader_perf['score'] = (
    0.7 * overall_trader_perf['pnl_norm'] +
    0.3 * overall_trader_perf['win_rate']
)

overall_trader_perf.sort_values('score', ascending=False).head()

Unnamed: 0,account,closed_pnl,win_rate,pnl_norm,score
0,0x083384f897ee0f19899168e3b1bec365f52a9012,66676.242499,0.352333,1.0,0.8057
29,0xbaaaf6571ab7d571043ff1e313a9609a10637864,33577.278794,0.455625,0.543752,0.517314
13,0x513b8629fe877bb581bf244e326a047b249c4ff1,21549.296288,0.325495,0.377953,0.362216
15,0x72743ae2822edd658c0c50608fd7c5c501b2afbd,22597.661364,0.203351,0.392404,0.335688
11,0x4acb90e786d897ecffb614dc822eb231b4ffb9f4,11685.293977,0.426408,0.241984,0.297312


In [28]:
fear_traders = (
    trader_sentiment_perf[
        trader_sentiment_perf['sentiment_score'].isin([-1, -2])
    ]
)

fear_traders = (
    fear_traders
    .groupby('account')['closed_pnl']
    .mean()
    .reset_index()
    .sort_values('closed_pnl', ascending=False)
)

fear_traders.head()

Unnamed: 0,account,closed_pnl
0,0x083384f897ee0f19899168e3b1bec365f52a9012,142529.660112
29,0xbaaaf6571ab7d571043ff1e313a9609a10637864,41228.759894
16,0x72c6a4624e1dffa724e6d00d64ceae698af892a0,21736.273394
11,0x4acb90e786d897ecffb614dc822eb231b4ffb9f4,15712.687666
8,0x420ab45e0bd8863569a5efbb9c05d91f40624641,13034.56434


In [29]:
final_df = final_df.sort_values(['account','date'])

final_df['next_day_pnl'] = (
    final_df
    .groupby('account')['closed_pnl']
    .shift(-1)
)

final_df['profitable_next_day'] = (final_df['next_day_pnl'] > 0).astype(int)

In [30]:
features = ['sentiment_score', 'win_rate', 'avg_trade_size', 'closed_pnl']
X = final_df[features]
y = final_df['profitable_next_day']

In [31]:
split_date = final_df['date'].quantile(0.8)

train = final_df[final_df['date'] <= split_date]
test = final_df[final_df['date'] > split_date]

X_train = train[features]
y_train = train['profitable_next_day']

X_test = test[features]
y_test = test['profitable_next_day']

In [32]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

model = RandomForestClassifier()
model.fit(X_train, y_train)

preds = model.predict(X_test)

print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.44      0.30      0.36       188
           1       0.61      0.73      0.66       275

    accuracy                           0.56       463
   macro avg       0.52      0.52      0.51       463
weighted avg       0.54      0.56      0.54       463

