In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

In [2]:
root = Path(f"ds_KanishaSharma")
csv_dir = root / "csv_files"
out_dir = root / "outputs"
csv_dir.mkdir(parents=True, exist_ok=True)
out_dir.mkdir(parents=True, exist_ok=True)

In [4]:
fg = pd.read_csv("fear_greed_index.csv")
trades = pd.read_csv("historical_data.csv")

In [5]:
# Standardize column names
fg.columns = fg.columns.str.strip().str.lower().str.replace(r"[^\w]+","_",regex=True)
trades.columns = trades.columns.str.strip().str.lower().str.replace(r"[^\w]+","_",regex=True)

Parse Dates Correctly

In [6]:
# Fear/Greed: already YYYY-MM-DD
fg["date"] = pd.to_datetime(fg["date"], errors="coerce").dt.date
fg["classification"] = fg["classification"].astype(str).str.strip().str.title()
fg = fg.dropna(subset=["date"])

In [8]:
# Trades: detect UNIX timestamp format
# Try ms first, fallback to s if needed
try:
    trades["timestamp"] = pd.to_datetime(trades["timestamp"], unit="ms", errors="raise")
except Exception:
    trades["timestamp"] = pd.to_datetime(trades["timestamp"], unit="s", errors="coerce")

trades["trade_date"] = trades["timestamp"].dt.date

# Convert numeric fields
for c in ["execution_price","size","leverage","closedpnl","start_position"]:
    if c in trades.columns:
        trades[c] = pd.to_numeric(trades[c], errors="coerce")

if "side" in trades.columns:
    trades["side"] = trades["side"].astype(str).str.strip().str.lower()

Merge sentiment onto trades 

In [9]:
merged = trades.merge(
    fg[["date","classification"]],
    left_on="trade_date", right_on="date",
    how="left"
).rename(columns={"classification":"daily_sentiment"}).drop(columns=["date"])

# Fill missing sentiment
merged["daily_sentiment"] = merged["daily_sentiment"].fillna("Unknown")

# Add win/loss flag if possible
if "closedpnl" in merged.columns:
    merged["is_win"] = merged["closedpnl"] > 0

# Save merged dataset
merged.to_csv(csv_dir/"merged_trades_with_sentiment.csv", index=False)


Summaries

In [13]:
# Define which metrics are safe to use
agg_dict = {"trade_date": ("trade_date","count")}  # always works

if "size" in merged.columns:
    agg_dict["avg_size"] = ("size","mean")
if "leverage" in merged.columns:
    agg_dict["avg_leverage"] = ("leverage","mean")
if "closedpnl" in merged.columns:
    agg_dict["avg_pnl"] = ("closedpnl","mean")
if "is_win" in merged.columns:
    agg_dict["win_rate"] = ("is_win","mean")

# Apply aggregation dynamically
sentiment_summary = (
    merged.groupby("daily_sentiment")
          .agg(**agg_dict)
          .reset_index()
)

# Save
sentiment_summary.to_csv(csv_dir/"sentiment_summary.csv", index=False)
sentiment_summary.head()


Unnamed: 0,daily_sentiment,trade_date
0,Extreme Greed,6962
1,Fear,133871
2,Greed,36289
3,Neutral,7141
4,Unknown,26961


 Static Visualizations

In [18]:
# (a) Count of trades
plt.figure()
merged["daily_sentiment"].value_counts().plot(kind="bar")
plt.title("Number of Trades by Sentiment")
plt.xlabel("Sentiment"); plt.ylabel("Trade Count")
plt.tight_layout(); plt.savefig(out_dir/"trades_by_sentiment.png"); plt.close()


In [19]:
# (b) Boxplot leverage
if "leverage" in merged.columns:
    plt.figure()
    merged.boxplot(column="leverage", by="daily_sentiment", grid=False)
    plt.title("Leverage Distribution by Sentiment"); plt.suptitle("")
    plt.xlabel("Sentiment"); plt.ylabel("Leverage")
    plt.tight_layout(); plt.savefig(out_dir/"leverage_boxplot.png"); plt.close()
else:
    print("⚠️ No leverage column found. Skipping leverage boxplot.")
    
# (c) Win rate
if "is_win" in merged.columns:
    win_rate = merged.groupby("daily_sentiment")["is_win"].mean()
    plt.figure()
    win_rate.plot(kind="bar")
    plt.title("Win Rate by Sentiment"); plt.xlabel("Sentiment"); plt.ylabel("Win Rate")
    plt.tight_layout(); plt.savefig(out_dir/"winrate_by_sentiment.png"); plt.close()
else:
    print("⚠️ No is_win column (likely no closedpnl). Skipping winrate plot.")


⚠️ No leverage column found. Skipping leverage boxplot.
⚠️ No is_win column (likely no closedpnl). Skipping winrate plot.


Time-Series Visualizations

In [20]:
# (a) Daily trade count
daily_trades = merged.groupby(["trade_date","daily_sentiment"]).size().reset_index(name="trade_count")

plt.figure(figsize=(14,6))
for s in daily_trades["daily_sentiment"].unique():
    subset = daily_trades[daily_trades["daily_sentiment"]==s]
    plt.plot(subset["trade_date"], subset["trade_count"], marker=".", label=s)
plt.title("Daily Trade Volume Over Time (by Sentiment)")
plt.xlabel("Date"); plt.ylabel("Number of Trades")
plt.legend(); plt.tight_layout()
plt.savefig(out_dir/"daily_trades_over_time.png"); plt.close()

# (b) Daily avg leverage
if "leverage" in merged.columns:
    daily_lev = merged.groupby(["trade_date","daily_sentiment"])["leverage"].mean().reset_index()
    plt.figure(figsize=(14,6))
    for s in daily_lev["daily_sentiment"].unique():
        subset = daily_lev[daily_lev["daily_sentiment"]==s]
        plt.plot(subset["trade_date"], subset["leverage"], marker=".", label=s)
    plt.title("Daily Avg Leverage Over Time (by Sentiment)")
    plt.xlabel("Date"); plt.ylabel("Average Leverage")
    plt.legend(); plt.tight_layout()
    plt.savefig(out_dir/"daily_leverage_over_time.png"); plt.close()

# (c) Daily avg PnL
if "closedpnl" in merged.columns:
    daily_pnl = merged.groupby(["trade_date","daily_sentiment"])["closedpnl"].mean().reset_index()
    plt.figure(figsize=(14,6))
    for s in daily_pnl["daily_sentiment"].unique():
        subset = daily_pnl[daily_pnl["daily_sentiment"]==s]
        plt.plot(subset["trade_date"], subset["closedpnl"], marker=".", label=s)
    plt.title("Daily Avg PnL Over Time (by Sentiment)")
    plt.xlabel("Date"); plt.ylabel("Avg PnL")
    plt.legend(); plt.tight_layout()
    plt.savefig(out_dir/"daily_pnl_over_time.png"); plt.close()

# (d) Cumulative PnL
if "closedpnl" in merged.columns:
    merged_sorted = merged.sort_values("time")
    merged_sorted["cum_pnl"] = merged_sorted.groupby("daily_sentiment")["closedpnl"].cumsum()
    plt.figure(figsize=(14,6))
    for s in merged_sorted["daily_sentiment"].unique():
        subset = merged_sorted[merged_sorted["daily_sentiment"]==s]
        plt.plot(subset["time"], subset["cum_pnl"], label=s)
    plt.title("Cumulative PnL Over Time (by Sentiment)")
    plt.xlabel("Time"); plt.ylabel("Cumulative PnL")
    plt.legend(); plt.tight_layout()
    plt.savefig(out_dir/"cumulative_pnl_over_time.png"); plt.close()