In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from math import isnan

In [23]:
fg = pd.read_csv("./csv_files/fear_greed_index.csv")
hist = pd.read_csv("./csv_files/historical_data.csv")


In [24]:
fg.head()

Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03
3,1517722200,24,Extreme Fear,2018-02-04
4,1517808600,11,Extreme Fear,2018-02-05


In [None]:
#  Clean Data
fg['date'] = pd.to_datetime(fg['date'], errors='coerce').dt.date
fg = fg[['date', 'fear_greed_score', 'classification']].copy()
fg.rename(columns={'value': 'fear_greed_score'}, inplace=True)
fg['classification'] = fg['classification'].astype(str)

In [32]:
hist.columns = hist.columns.str.strip().str.lower().str.replace(" ", "_")

In [33]:
# parse timestamp column 
if 'timestamp_ist' in hist.columns:
    hist['timestamp_ist'] = pd.to_datetime(hist['timestamp_ist'], format="%d-%m-%Y %H:%M", errors='coerce')
else:
   
    for c in ['timestamp', 'time', 'timestamp_ms']:
        if c in hist.columns:
            try:
                if hist[c].dtype in ['int64','float64']:
                    hist['timestamp_ist'] = pd.to_datetime(hist[c], unit='ms', errors='coerce')
                else:
                    hist['timestamp_ist'] = pd.to_datetime(hist[c], errors='coerce')
                break
            except Exception:
                continue

hist['date'] = hist['timestamp_ist'].dt.date

In [34]:

# numeric conversions 
for col in ['execution_price', 'size_tokens', 'size_usd', 'closed_pnl', 'fee']:
    if col in hist.columns:
        hist[col] = pd.to_numeric(hist[col], errors='coerce')

In [35]:
# leverage detection
lev_col = None
for c in ['start_position', 'startposition', 'leverage', 'start_pos']:
    if c in hist.columns:
        lev_col = c
        break
hist['leverage'] = pd.to_numeric(hist[lev_col], errors='coerce') if lev_col else np.nan

In [36]:
# direction normalization
if 'direction' in hist.columns:
    hist['direction'] = hist['direction'].astype(str).str.upper().str.strip()
elif 'side' in hist.columns:
    hist['direction'] = hist['side'].astype(str).str.upper().str.strip()
else:
    hist['direction'] = np.nan

In [37]:
# notional
if 'execution_price' in hist.columns and 'size_tokens' in hist.columns:
    hist['notional'] = hist['execution_price'] * hist['size_tokens']
else:
    hist['notional'] = hist.get('size_usd', np.nan)

In [38]:
# closed pnl fallback
if 'closed_pnl' not in hist.columns:
    for c in hist.columns:
        if 'pnl' in c:
            hist['closed_pnl'] = pd.to_numeric(hist[c], errors='coerce')
            break
hist['closed_pnl'] = pd.to_numeric(hist.get('closed_pnl', pd.Series([np.nan]*len(hist))), errors='coerce')

In [39]:
# Merge with sentiment 
df = hist.merge(fg, on='date', how='left')

In [40]:
#  Derived metrics 
df['is_win'] = (df['closed_pnl'] > 0).astype(int)
df['pnl_per_notional'] = df['closed_pnl'] / df['notional'].replace(0, np.nan)
df['sentiment'] = df['classification'].fillna('UNKNOWN').str.upper()


In [41]:

def bucket_from_leverage(x):
    try:
        if x is None or (isinstance(x, float) and np.isnan(x)):
            return np.nan
        if x <= 0:
            return np.nan
        if x <= 2:
            return '<=2x'
        if x <= 5:
            return '2-5x'
        if x <= 10:
            return '5-10x'
        if x <= 20:
            return '10-20x'
        if x <= 50:
            return '20-50x'
        return '>50x'
    except:
        return np.nan

df['lev_bucket'] = df['leverage'].apply(bucket_from_leverage)

In [None]:
#  Aggregations 
sentiment_summary = (
    df.groupby('sentiment')
    .agg(n_trades=('closed_pnl','count'),
         avg_pnl=('closed_pnl','mean'),
         median_pnl=('closed_pnl','median'),
         win_rate=('is_win','mean'),
         avg_notional=('notional','mean'),
         avg_leverage=('leverage','mean'))
    .reset_index()
)

direction_summary = (
    df.groupby(['sentiment','direction'])
    .agg(n_trades=('closed_pnl','count'),
         avg_pnl=('closed_pnl','mean'),
         win_rate=('is_win','mean'),
         avg_notional=('notional','mean'),
         avg_leverage=('leverage','mean'))
    .reset_index()
)

lev_summary = (
    df.groupby(['sentiment','lev_bucket'])
    .agg(n_trades=('closed_pnl','count'),
         avg_pnl=('closed_pnl','mean'),
         win_rate=('is_win','mean'))
    .reset_index()
)

In [None]:


OUT_DIR = "./outputs"  

#  Save CSV summaries 
sentiment_summary.to_csv(os.path.join(OUT_DIR, "sentiment_summary.csv"), index=False)
direction_summary.to_csv(os.path.join(OUT_DIR, "direction_summary.csv"), index=False)
lev_summary.to_csv(os.path.join(OUT_DIR, "lev_summary.csv"), index=False)

In [44]:
# 1) PnL distribution by sentiment 
plt.figure(figsize=(10,6))
plot_sentiments = [s for s in ['EXTREME FEAR','FEAR','NEUTRAL','GREED','EXTREME GREED'] if s in df['sentiment'].values]
for s in plot_sentiments:
    subset = df[df['sentiment']==s]['closed_pnl'].dropna()
    if len(subset) > 10:
        trimmed = subset.clip(lower=subset.quantile(0.01), upper=subset.quantile(0.99))
        plt.hist(trimmed, bins=80, alpha=0.45, density=True, label=f"{s} (n={len(subset)})")
plt.xlabel("Closed PnL")
plt.ylabel("Density")
plt.title("Closed PnL distribution by Sentiment (central 98% trimmed)")
plt.legend(fontsize='small')
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "pnl_distribution_by_sentiment.png"))
plt.close()

In [45]:
# 2) Win rate by sentiment
plt.figure(figsize=(8,5))
ws = sentiment_summary.copy()
plt.bar(ws['sentiment'], ws['win_rate'])
plt.ylabel("Win Rate")
plt.xlabel("Sentiment")
plt.title("Win Rate by Sentiment")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "win_rate_by_sentiment.png"))
plt.close()

In [46]:
# 3) Average PnL by direction and sentiment (grouped bar)
pivot = direction_summary.pivot(index='direction', columns='sentiment', values='avg_pnl')
common_cols = [c for c in pivot.columns if c in plot_sentiments]
pivot = pivot[common_cols].dropna(how='all')
if not pivot.empty:
    ax = pivot.plot(kind='bar', figsize=(10,6))
    ax.set_ylabel("Average Closed PnL")
    ax.set_title("Average Closed PnL by Direction and Sentiment")
    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, "avg_pnl_direction_sentiment.png"))
    plt.close()

In [47]:

# 4) Win rate by leverage bucket and sentiment (line plot)
lev_plot = lev_summary.dropna(subset=['lev_bucket'])
if not lev_plot.empty:
    plt.figure(figsize=(10,6))
    for s in lev_plot['sentiment'].unique():
        sub = lev_plot[lev_plot['sentiment']==s].sort_values('lev_bucket')
        plt.plot(sub['lev_bucket'].astype(str), sub['win_rate'], marker='o', label=s)
    plt.xlabel("Leverage Bucket")
    plt.ylabel("Win Rate")
    plt.title("Win Rate by Leverage Bucket and Sentiment")
    plt.legend(fontsize='small')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, "win_rate_lev_bucket_sentiment.png"))
    plt.close()

In [None]:

#  Small numeric summary 
summary_stats = {
    "total_trades": len(df),
    "trades_with_known_sentiment": int(df['sentiment'].isin(plot_sentiments).sum()),
    "overall_win_rate": float(df['is_win'].mean()) if 'is_win' in df.columns else None,
    "avg_leverage": float(df['leverage'].mean(skipna=True)) if 'leverage' in df.columns else None
}
pd.DataFrame([summary_stats]).to_csv(os.path.join(OUT_DIR, "analysis_summary_stats.csv"), index=False)

print("Saved outputs to:", OUT_DIR)
print("Files created: pnl_distribution_by_sentiment.png, win_rate_by_sentiment.png, avg_pnl_direction_sentiment.png (if directions exist), win_rate_lev_bucket_sentiment.png (if leverage exists), sentiment_summary.csv, direction_summary.csv, lev_summary.csv, analysis_summary_stats.csv")

Saved outputs to: ./outputs
Files created: pnl_distribution_by_sentiment.png, win_rate_by_sentiment.png, avg_pnl_direction_sentiment.png (if directions exist), win_rate_lev_bucket_sentiment.png (if leverage exists), sentiment_summary.csv, direction_summary.csv, lev_summary.csv, analysis_summary_stats.csv


: 