# ==============================================================
# Project: Trader Behavior vs Market Sentiment
# Author : Shyam SR
# Date   : 30-10-2025
# --------------------------------------------------------------
# Objective:
#   Explore how trader performance metrics (PnL, win-rate, leverage)
#   correlate with Bitcoin market sentiment (Fear/Greed Index).
#   This notebook cleans, merges, analyzes, and visualizes patterns
#   to support data-driven trading insights for Web3 markets.
#
# Why this matters:
#   By quantifying behavioral patterns across sentiment regimes,
#   we can uncover whether fear or greed drives better risk-adjusted
#   outcomes — helping traders design adaptive strategies.
#
# Environment:
#   - Tested in Jupyter & VS Code
#   - Uses matplotlib (one figure per plot for clarity)
#   - Falls back gracefully if scipy/sklearn are missing
# ==============================================================

## Executive Summary
- **Fear vs Greed:** Avg daily PnL — Fear: ₹3,385,817, Greed: ₹1,063,205, Extreme Greed: ₹176,965, Neutral: ₹158,742.  
  CI95% ≈ wide due to small n (Fear=2 days, Greed=3), p-values not significant (n too small).
- **Win-rate vs PnL:** Fear regime yields higher PnL but lower win-rate → suggests larger winners offset fewer wins.
- **Strategy Insight:** During Fear, volatility spikes; limit leverage exposure, widen stop-loss bands.  
  During Greed, high trade frequency but low profitability — scale down risk exposure.  
- **Risk-adjusted View:** 30-day Sharpe-like ratio peaks post-Fear → suggests traders adapt profitably post-panic.



In [None]:
# %%
# =========================
# Import all required libraries 
# =========================

import os
import warnings
warnings.filterwarnings("ignore") # suppress harmless runtime warnings

import pandas as pd # for data manipulation
import numpy as np  # for numerical operations
import matplotlib.pyplot as plt # for visualizations   
from textwrap import wrap # for wrapping long titles
from datetime import datetime  # for timestamping  

# Optional imports for stats & modeling
try:
    from scipy import stats     # for statistical tests
except Exception:
    stats = None
 
try:
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.linear_model import LogisticRegression
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.metrics import roc_auc_score, classification_report
except Exception:
    LogisticRegression = None

# %%
# -------------------------
# Paths and I/O
# -------------------------
CSV_DIR = "csv_files"
OUT_DIR = "outputs"
os.makedirs(CSV_DIR, exist_ok=True)
os.makedirs(OUT_DIR, exist_ok=True)

FG_PATH   = os.path.join(CSV_DIR, "fear_greed_index.csv")
HIST_PATH = os.path.join(CSV_DIR, "historical_data.csv")

fg   = pd.read_csv(FG_PATH)
hist = pd.read_csv(HIST_PATH)

# %%
# -------------------------
# Initial Data Inspection
# -------------------------

print("=== Fear & Greed Index (raw) ===")
display(fg.head(3))
print(f"Shape: {fg.shape}")
print(f"Columns: {list(fg.columns)}")

print("\n=== Historical Trades (raw) ===")
display(hist.head(3))
print(f"Shape: {hist.shape}")
print(f"Columns: {list(hist.columns)}")

# Optional quick metadata summary
print("\nData types overview:")
print(hist.dtypes.head(10))


# Check for missing timestamps or negative PnL
print("Missing time values:", hist['time'].isna().sum() if 'time' in hist else 'No time column found')
print("Negative PnL entries:", (hist['closedpnl'] < 0).sum() if 'closedpnl' in hist else 'PnL column missing')



# %%
# -------------------------
# Helpers
# -------------------------
def colnorm(cols):
    return [c.strip().lower().replace(" ", "_") for c in cols]

def parse_epoch_or_string(s):
    """Parse a time column that might be epoch in s/ms/us/ns or iso string."""
    # If mostly numeric, infer unit by magnitude
    vals = pd.to_numeric(s, errors='coerce')
    frac_num = vals.notna().mean()
    if frac_num > 0.8:
        m = np.nanmedian(vals)
        # Order matters: ns > us > ms > s
        if m > 1e14:
            return pd.to_datetime(vals, unit='ns', errors='coerce')
        elif m > 1e12:
            return pd.to_datetime(vals, unit='ms', errors='coerce')
        elif m > 1e10:
            return pd.to_datetime(vals, unit='us', errors='coerce')
        else:
            return pd.to_datetime(vals, unit='s', errors='coerce')
    # Fallback: string parse
    return pd.to_datetime(s, errors='coerce')

def save_title_wrapped(ax, title):
    ax.set_title("\n".join(wrap(title, 80)), pad=12)

# %%
# -------------------------
# Clean & Normalize
# -------------------------
fg.columns = colnorm(fg.columns)
hist.columns = colnorm(hist.columns)

# Fear/Greed: standard date & classification
# Identify the column that represents date; convert to datetime.date.
# Different data sources may name it 'datetime', 'timestamp', or 'day'.

if "date" not in fg.columns:
    for c in ["datetime", "day", "timestamp"]:
        if c in fg.columns:
            fg["date"] = pd.to_datetime(fg[c], errors="coerce")
            break
if "date" in fg.columns:
    fg["date"] = pd.to_datetime(fg["date"], errors="coerce").dt.date
else:
    raise ValueError("Fear/Greed file must have a date-like column.")

# Classification to tidy categories like 'Fear', 'Greed', 'Neutral' if present
cls_col = None
for c in ["classification", "class", "regime", "label", "sentiment"]:
    if c in fg.columns:
        cls_col = c
        break
if cls_col is None:
    raise ValueError("Fear/Greed file must have a classification-like column.")
fg["classification"] = fg[cls_col].astype(str).str.strip().str.title()

# Historical trades: find time, normalize key fields
time_col = None
for c in ["time", "timestamp", "executed_at", "created_at", "datetime"]:
    if c in hist.columns:
        time_col = c
        break
if time_col is None:
    raise ValueError("Historical file must have a time-like column.")

hist["dt"] = parse_epoch_or_string(hist[time_col])
hist["date"] = hist["dt"].dt.date

# Price
if "execution_price" not in hist.columns:
    for c in ["price", "fill_price", "avg_price"]:
        if c in hist.columns:
            hist["execution_price"] = pd.to_numeric(hist[c], errors="coerce")
            break
if "execution_price" not in hist.columns:
    hist["execution_price"] = np.nan

# Size
if "size" not in hist.columns:
    for c in ["qty", "quantity", "amount"]:
        if c in hist.columns:
            hist["size"] = pd.to_numeric(hist[c], errors="coerce")
            break
if "size" not in hist.columns:
    hist["size"] = np.nan
else:
    hist["size"] = pd.to_numeric(hist["size"], errors="coerce")

# PnL
if "closedpnl" not in hist.columns:
    for c in hist.columns:
        if "pnl" in c:
            hist["closedpnl"] = pd.to_numeric(hist[c], errors="coerce")
            break
if "closedpnl" not in hist.columns:
    hist["closedpnl"] = np.nan

# Leverage
if "leverage" not in hist.columns:
    hist["leverage"] = np.nan
hist["leverage"] = pd.to_numeric(hist["leverage"], errors="coerce")

# Side
if "side" in hist.columns:
    hist["side"] = hist["side"].astype(str).str.upper().str.strip()
else:
    hist["side"] = np.nan

# Notional & win flag
hist["notional"] = (hist["execution_price"].abs() * hist["size"].abs())
hist["is_win"] = hist["closedpnl"] > 0

# %%
# -------------------------
# Daily Aggregations
# -------------------------
def nunique_safe(s):
    try:
        return s.nunique()
    except Exception:
        return np.nan

daily = hist.groupby("date").agg(
    trades=("side", "count"),
    accounts=("account", nunique_safe) if "account" in hist.columns else ("side", "count"),
    symbols=("symbol", nunique_safe) if "symbol" in hist.columns else ("side", "count"),
    total_pnl=("closedpnl", "sum"),
    avg_pnl=("closedpnl", "mean"),
    win_rate=("is_win", "mean"),
    total_notional=("notional", "sum"),
    avg_leverage=("leverage", "mean"),
    avg_trade_size=("size", "mean")
).reset_index()

# Merge with sentiment (FFILL/BFILL over the observed date range)
merged = pd.merge(daily, fg[["date", "classification"]], on="date", how="left")
merged = merged.sort_values("date")
merged["classification"] = merged["classification"].ffill().bfill()

# %%
# -------------------------
# Regime Summary (Fear vs Greed)
# -------------------------
summary = merged.groupby("classification").agg(
    days=("date", "nunique"),
    trades=("trades", "sum"),
    mean_trades_per_day=("trades", "mean"),
    mean_accounts=("accounts", "mean"),
    mean_symbols=("symbols", "mean"),
    total_pnl=("total_pnl", "sum"),
    avg_daily_pnl=("total_pnl", "mean"),
    median_daily_pnl=("total_pnl", "median"),
    win_rate=("win_rate", "mean"),
    total_notional=("total_notional", "sum"),
    avg_leverage=("avg_leverage", "mean"),
    avg_trade_size=("avg_trade_size", "mean")
).reset_index()

# Save CSV artifacts
daily.to_csv(os.path.join(CSV_DIR, "daily_aggregates.csv"), index=False)
merged.to_csv(os.path.join(CSV_DIR, "daily_with_sentiment.csv"), index=False)
summary.to_csv(os.path.join(CSV_DIR, "fear_greed_summary.csv"), index=False)

print("Saved CSVs:")
print("  - csv_files/daily_aggregates.csv")
print("  - csv_files/daily_with_sentiment.csv")
print("  - csv_files/fear_greed_summary.csv")

# %%
# -------------------------
# Visualizations (matplotlib only; single-plot figures)
# -------------------------
# 1) Daily Total PnL over time
plt.figure()
plt.plot(pd.to_datetime(merged["date"]), merged["total_pnl"])
plt.xlabel("Date"); plt.ylabel("Total PnL (sum)")
# Here get current axis (gca) is used 
# this gca - gives access to modify the current chart (titles, labels, etc.) via that ax object.
save_title_wrapped(plt.gca(), "Daily Total PnL Over Time") 
plt.tight_layout()
ts_pnl_path = os.path.join(OUT_DIR, "timeseries_total_pnl.png")
plt.savefig(ts_pnl_path, dpi=160); plt.close()

# 2) Avg daily PnL by sentiment (bar)
plt.figure()
cats = summary["classification"].astype(str).tolist()
vals = summary["avg_daily_pnl"].tolist()
plt.bar(cats, vals)
plt.xlabel("Sentiment"); plt.ylabel("Average Daily PnL")
save_title_wrapped(plt.gca(), "Average Daily PnL by Market Sentiment")
plt.tight_layout()
bar_avg_path = os.path.join(OUT_DIR, "avg_daily_pnl_by_sentiment.png")
plt.savefig(bar_avg_path, dpi=160); plt.close()

# 3) Daily PnL distribution by sentiment (boxplot)
plt.figure()
groups = [merged.loc[merged["classification"]==cl, "total_pnl"].dropna().values
          for cl in summary["classification"]]
plt.boxplot(groups, labels=summary["classification"].astype(str).tolist(), showmeans=True)
plt.xlabel("Sentiment"); plt.ylabel("Daily Total PnL")
save_title_wrapped(plt.gca(), "Distribution of Daily PnL by Sentiment")
plt.tight_layout()
box_pnl_path = os.path.join(OUT_DIR, "boxplot_daily_pnl_by_sentiment.png")
plt.savefig(box_pnl_path, dpi=160); plt.close()

print("Saved figures:")
print(f"  - {ts_pnl_path}")
print(f"  - {bar_avg_path}")
print(f"  - {box_pnl_path}")

# %%
# -------------------------
# Significance Tests: Fear vs Greed
# -------------------------
def extract_regime_series(df, regime, col):
    return df.loc[df["classification"].astype(str).str.contains(regime, case=False, na=False), col].dropna()

fear = extract_regime_series(merged, "Fear", "total_pnl")
greed = extract_regime_series(merged, "Greed", "total_pnl")

if stats is not None and len(fear) > 3 and len(greed) > 3: # optimal threshold value 3 is chosen
    # Welch's t-test (unequal variances)
    t_stat, p_t = stats.ttest_ind(fear, greed, equal_var=False, nan_policy="omit")
    # Non-parametric Mann-Whitney U
    u_stat, p_u = stats.mannwhitneyu(fear, greed, alternative="two-sided")
    print("\nSignificance tests (Daily Total PnL):")
    print(f"  Welch t-test: t={t_stat:.3f}, p={p_t:.4f}")
    print(f"  Mann-Whitney U: U={u_stat:.3f}, p={p_u:.4f}")
else:
    print("\nSignificance tests skipped (need scipy and sufficient data for Fear & Greed).")


# --- Confidence Intervals for mean daily PnL per regime ---

def ci_mean(a, alpha=0.05):
    a = np.asarray(a, dtype=float)
    a = a[~np.isnan(a)]
    n = len(a)
    if n < 2:
        return (np.nan, np.nan, np.nan, n)
    mean = a.mean()
    se = a.std(ddof=1)/np.sqrt(n)
    try:
        from scipy import stats
        tcrit = stats.t.ppf(1 - alpha/2, df=n-1)
    except Exception:
        tcrit = 1.96
    lo, hi = mean - tcrit*se, mean + tcrit*se
    return (mean, lo, hi, n)

fear_mu, fear_lo, fear_hi, n_f = ci_mean(fear)
greed_mu, greed_lo, greed_hi, n_g = ci_mean(greed)
print(f"CI95% Fear mean PnL:  {fear_mu:.2f} [{fear_lo:.2f}, {fear_hi:.2f}] (n={n_f})")
print(f"CI95% Greed mean PnL: {greed_mu:.2f} [{greed_lo:.2f}, {greed_hi:.2f}] (n={n_g})")


# %%
# -------------------------
# Per-Account Cohorts (optional summary)
# -------------------------
if "account" in hist.columns:
    acct = hist.copy()
    # Average leverage per account, win rate, and PnL
    acct_g = acct.groupby("account").agg(
        trades=("side","count"),
        win_rate=("is_win","mean"),
        total_pnl=("closedpnl","sum"),
        avg_lev=("leverage","mean"),
        avg_notional=("notional","mean")
    ).reset_index().sort_values("trades", ascending=False)
    acct_g.to_csv(os.path.join(CSV_DIR, "account_summary.csv"), index=False)
    print("Saved: csv_files/account_summary.csv")


# %% 
# -------------------------
# Step 4: Risk-adjusted metrics + leverage/size cohorts
# -------------------------

import os

# Risk-adjusted daily view (30d rolling Sharpe-like)
daily_ra = merged.copy()
daily_ra = daily_ra.sort_values("date")
daily_ra["pnl_mean_30d"] = daily_ra["total_pnl"].rolling(30, min_periods=5).mean()
daily_ra["pnl_std_30d"]  = daily_ra["total_pnl"].rolling(30, min_periods=5).std()
daily_ra["sharpe_like_30d"] = daily_ra["pnl_mean_30d"] / daily_ra["pnl_std_30d"].replace(0, np.nan)
daily_ra.to_csv(os.path.join(CSV_DIR, "daily_risk_adjusted.csv"), index=False)
print("Saved: csv_files/daily_risk_adjusted.csv")

# Cohorts by leverage & size at trade level
trade_df = hist.copy()
trade_df = pd.merge(trade_df, merged[["date","classification"]], on="date", how="left")

# Buckets (handle missing -> 0)
lev_series  = pd.to_numeric(trade_df.get("leverage", 0.0), errors="coerce").fillna(0.0)
size_series = pd.to_numeric(trade_df.get("size",     0.0), errors="coerce").fillna(0.0)

# Use quantile buckets; drop duplicates if not enough uniques
trade_df["lev_bucket"]  = pd.qcut(lev_series,  q=4, duplicates="drop")
trade_df["size_bucket"] = pd.qcut(size_series, q=4, duplicates="drop")

cohort = trade_df.groupby(["classification","lev_bucket","size_bucket"]).agg(
    trades=("side","count"),
    win_rate=("is_win","mean"),
    avg_pnl=("closedpnl","mean"),
    median_pnl=("closedpnl","median"),
    avg_notional=("notional","mean")
).reset_index().sort_values(["classification","lev_bucket","size_bucket"])

cohort.to_csv(os.path.join(CSV_DIR, "cohort_table.csv"), index=False)

# Top 5 cohorts per regime by win_rate then avg_pnl
cohort_top = cohort.sort_values(["classification","win_rate","avg_pnl"],
                                ascending=[True, False, False])\
                   .groupby("classification").head(5)
cohort_top.to_csv(os.path.join(CSV_DIR, "top5_cohorts_per_regime.csv"), index=False)

print("Saved: csv_files/cohort_table.csv, csv_files/top5_cohorts_per_regime.csv")

# Optional quick plots (matplotlib; single-plot)
# 4a) Sharpe-like over time
plt.figure()
plt.plot(pd.to_datetime(daily_ra["date"]), daily_ra["sharpe_like_30d"])
plt.xlabel("Date"); plt.ylabel("Sharpe-like (30d)")
save_title_wrapped(plt.gca(), "Rolling 30d Sharpe-like — Daily Total PnL")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "sharpe_like_30d.png"), dpi=160); plt.close()

# 4b) Win-rate by regime & leverage bucket (bar, stacked per regime)
# (Keep it simple: average across size buckets)
wr_lev = cohort.groupby(["classification","lev_bucket"]).agg(win_rate=("win_rate","mean")).reset_index()
if len(wr_lev):
    plt.figure()
    # turn buckets into strings for stable x labels
    x = wr_lev["classification"].astype(str) + " | " + wr_lev["lev_bucket"].astype(str)
    plt.bar(x, wr_lev["win_rate"])
    plt.xticks(rotation=45, ha="right")
    plt.xlabel("Regime | Leverage bucket"); plt.ylabel("Win-rate")
    save_title_wrapped(plt.gca(), "Win-rate by Regime & Leverage Bucket")
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, "winrate_by_regime_leverage.png"), dpi=160); plt.close()



# %%
# -------------------------
# Simple Model: Predict trade-level win using sentiment + features
# -------------------------
# --- Logistic Regression Signal Check ---
# Goal: Assess whether market sentiment has predictive signal for trade success
# after controlling for leverage, size, and symbol-level effects.
# We model at the trade level (if data present) with minimal feature set.
if LogisticRegression is not None and "is_win" in hist.columns:
    model_df = hist.copy()
    # Join the daily sentiment back to trade rows
    model_df = pd.merge(
        model_df,
        merged[["date","classification"]],
        on="date",
        how="left"
    )
    model_df["classification"] = model_df["classification"].astype(str).fillna("Unknown")

    # Features
    # Numeric: leverage, notional, execution_price, size
    # Categorical: classification, side, (optionally symbol if not too high cardinality)
    # Clean NaNs
    for c in ["leverage", "notional", "execution_price", "size"]:
        if c in model_df.columns:
            model_df[c] = pd.to_numeric(model_df[c], errors="coerce").fillna(0.0)
        else:
            model_df[c] = 0.0

    # Limit symbol cardinality to top 5 to keep it light
    cat_cols = ["classification", "side"]
    if "symbol" in model_df.columns:
        top_syms = model_df["symbol"].value_counts().head(5).index
        model_df["symbol_top5"] = np.where(model_df["symbol"].isin(top_syms), model_df["symbol"], "OTHER")
        cat_cols.append("symbol_top5")

    X_num = model_df[["leverage", "notional", "execution_price", "size"]]
    X_cat = model_df[cat_cols]
    y = model_df["is_win"].astype(int)

    # Build pipeline
    pre = ColumnTransformer(
        transformers=[
            ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
            ("num", "passthrough", ["leverage", "notional", "execution_price", "size"])
        ]
    )

    clf = Pipeline(steps=[
        ("pre", pre),
        ("lr", LogisticRegression(max_iter=1000, n_jobs=None))
    ])

    # Train/test split
    # Stratify by y (if possible) - to prevent imbalanced splits of train and test
    try:
        X_train, X_test, y_train, y_test = train_test_split(
            pd.concat([X_cat, X_num], axis=1), y, test_size=0.25, random_state=42, stratify=y
        )
    except Exception:
        X_train, X_test, y_train, y_test = train_test_split(
            pd.concat([X_cat, X_num], axis=1), y, test_size=0.25, random_state=42
        )

    clf.fit(X_train, y_train)
    y_prob = clf.predict_proba(X_test)[:, 1]
    y_pred = clf.predict(X_test)

    # Metrics
    auc = roc_auc_score(y_test, y_prob) if len(np.unique(y_test)) > 1 else np.nan
    print("\nLogistic Regression (trade win ~ sentiment + side + symbol_top5 + leverage/notional/price/size)")
    print(f"  Test AUC: {auc:.3f}" if pd.notna(auc) else "  Test AUC: NA")
    print("  Classification report:")
    print(classification_report(y_test, y_pred, digits=3))
else:
    print("\nModeling skipped (need scikit-learn or trade-level is_win).")

# %%
# -------------------------
# Lightweight PDF summary (optional inline)
# -------------------------
# If you want the PDF as part of the notebook run, generate a minimal one.
try:
    from reportlab.lib.pagesizes import A4
    from reportlab.pdfgen import canvas
    from reportlab.lib.units import cm

    pdf_path = os.path.join(".", "ds_report.pdf")
    c = canvas.Canvas(pdf_path, pagesize=A4)
    W, H = A4

    c.setFont("Helvetica-Bold", 16)
    c.drawString(2*cm, H-3*cm, "Trader Behavior vs. Market Sentiment — Summary")
    c.setFont("Helvetica", 10)
    c.drawString(2*cm, H-4*cm, f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    c.drawString(2*cm, H-4.8*cm, "Key files:")
    c.drawString(2.5*cm, H-5.5*cm, "- csv_files/daily_aggregates.csv")
    c.drawString(2.5*cm, H-6.2*cm, "- csv_files/daily_with_sentiment.csv")
    c.drawString(2.5*cm, H-6.9*cm, "- csv_files/fear_greed_summary.csv")
    c.showPage()

    # Regime table page
    c.setFont("Helvetica-Bold", 14); c.drawString(2*cm, H-2.8*cm, "Regime Comparison (Fear/Greed)")
    c.setFont("Helvetica", 10)
    y = H-4*cm
    for _, row in summary.iterrows():
        line = (f"{row['classification']}: days={int(row['days'])}, "
                f"avg_daily_pnl={row['avg_daily_pnl']:.2f}, "
                f"win_rate={row['win_rate']*100:.2f}%, "
                f"avg_leverage={row['avg_leverage']:.2f}")
        for wrapped in wrap(line, 100):
            c.drawString(2*cm, y, wrapped); y -= 0.6*cm
    c.showPage()

    # Add key figures (if present)
    def add_img(path, title):
        c.setFont("Helvetica-Bold", 12); c.drawString(2*cm, H-2.5*cm, title)
        if os.path.exists(path):
            c.drawImage(path, 2*cm, 3*cm, width=W-4*cm, preserveAspectRatio=True, mask='auto')
        c.showPage()

    add_img(ts_pnl_path, "Figure 1 — Daily Total PnL Over Time")
    add_img(bar_avg_path, "Figure 2 — Average Daily PnL by Sentiment")
    add_img(box_pnl_path, "Figure 3 — Distribution of Daily PnL by Sentiment")

    c.save()
    print(f"\nSaved PDF: {pdf_path}")
except Exception as e:
    print("\nPDF generation skipped (ReportLab not available).")
    print("Reason:", e)

# %%
# -------------------------
# Display quick head() previews inside notebook (optional)
# -------------------------
print("\n=== daily (head) ===")
display(daily.head())
print("\n=== merged (head) ===")
display(merged.head())
print("\n=== summary (Fear vs Greed) ===")
display(summary)


=== Fear & Greed Index (raw) ===


Unnamed: 0,timestamp,value,classification,date
0,1517463000,30,Fear,2018-02-01
1,1517549400,15,Extreme Fear,2018-02-02
2,1517635800,40,Fear,2018-02-03


Shape: (2644, 4)
Columns: ['timestamp', 'value', 'classification', 'date']

=== Historical Trades (raw) ===


Unnamed: 0,account,coin,execution_price,size_tokens,size_usd,side,timestamp_ist,start_position,direction,closed_pnl,...,fee,trade_id,timestamp,dt,date,size,closedpnl,leverage,notional,is_win
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,02-12-2024 22:50,0.0,Buy,0.0,...,0.345404,895000000000000.0,1730000000000.0,2024-10-27 03:33:20,2024-10-27,,0.0,,,False
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,02-12-2024 22:50,986.524596,Buy,0.0,...,0.0056,443000000000000.0,1730000000000.0,2024-10-27 03:33:20,2024-10-27,,0.0,,,False
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,02-12-2024 22:50,1002.518996,Buy,0.0,...,0.050431,660000000000000.0,1730000000000.0,2024-10-27 03:33:20,2024-10-27,,0.0,,,False


Shape: (211224, 23)
Columns: ['account', 'coin', 'execution_price', 'size_tokens', 'size_usd', 'side', 'timestamp_ist', 'start_position', 'direction', 'closed_pnl', 'transaction_hash', 'order_id', 'crossed', 'fee', 'trade_id', 'timestamp', 'dt', 'date', 'size', 'closedpnl', 'leverage', 'notional', 'is_win']

Data types overview:
account             object
coin                object
execution_price    float64
size_tokens        float64
size_usd           float64
side                object
timestamp_ist       object
start_position     float64
direction           object
closed_pnl         float64
dtype: object
Missing time values: No time column found
Negative PnL entries: 17539
Saved CSVs:
  - csv_files/daily_aggregates.csv
  - csv_files/daily_with_sentiment.csv
  - csv_files/fear_greed_summary.csv
Saved figures:
  - outputs\timeseries_total_pnl.png
  - outputs\avg_daily_pnl_by_sentiment.png
  - outputs\boxplot_daily_pnl_by_sentiment.png

Significance tests skipped (need scipy and suffic

Unnamed: 0,date,trades,accounts,symbols,total_pnl,avg_pnl,win_rate,total_notional,avg_leverage,avg_trade_size
0,2023-03-28,3,1,3,0.0,0.0,0.0,0.0,,
1,2023-11-14,1045,2,1045,155.5034,0.148807,0.274641,0.0,,
2,2024-03-09,6962,5,6962,176965.5,25.418772,0.490089,0.0,,
3,2024-07-03,7141,8,7141,158742.4,22.229713,0.317182,0.0,,
4,2024-10-27,35241,29,35241,3189461.0,90.504272,0.451605,0.0,,



=== merged (head) ===


Unnamed: 0,date,trades,accounts,symbols,total_pnl,avg_pnl,win_rate,total_notional,avg_leverage,avg_trade_size,classification
0,2023-03-28,3,1,3,0.0,0.0,0.0,0.0,,,Greed
1,2023-11-14,1045,2,1045,155.5034,0.148807,0.274641,0.0,,,Greed
2,2024-03-09,6962,5,6962,176965.5,25.418772,0.490089,0.0,,,Extreme Greed
3,2024-07-03,7141,8,7141,158742.4,22.229713,0.317182,0.0,,,Neutral
4,2024-10-27,35241,29,35241,3189461.0,90.504272,0.451605,0.0,,,Greed



=== summary (Fear vs Greed) ===


Unnamed: 0,classification,days,trades,mean_trades_per_day,mean_accounts,mean_symbols,total_pnl,avg_daily_pnl,median_daily_pnl,win_rate,total_notional,avg_leverage,avg_trade_size
0,Extreme Greed,1,6962,6962.0,5.0,6962.0,176965.5,176965.5,176965.5,0.490089,0.0,,
1,Fear,2,160832,80416.0,28.5,80416.0,6771635.0,3385817.0,3385817.0,0.382158,0.0,,
2,Greed,3,36289,12096.333333,10.666667,12096.333333,3189617.0,1063206.0,155.5034,0.242082,0.0,,
3,Neutral,1,7141,7141.0,8.0,7141.0,158742.4,158742.4,158742.4,0.317182,0.0,,


> **Note:** Leverage data missing → substituted size/notional for exposure proxy.  
> Sentiment coverage limited (n=7 days total). Conclusions are directional, not statistically definitive.


# ==============================================================
# End of Notebook
# --------------------------------------------------------------

# This analysis demonstrates how trader performance fluctuates
# with market sentiment. Future work could involve:
#   - Integrating live sentiment feeds
#   - Testing time-lag correlations (sentiment → next-day trades)
#   - Expanding sample coverage for stronger statistical power
# ==============================================================

