In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 200)


In [None]:
df = pd.read_csv("../data/processed/sentiment_labeled_data.csv")
regime_perf = pd.read_csv("../data/processed/regime_performance.csv")
strategy_perf = pd.read_csv("../data/processed/strategy_performance.csv")
clusters = pd.read_csv("../data/processed/trader_cluster_profiles.csv")

df["time"] = pd.to_datetime(df["time"])

print(df.shape)


In [None]:
sentiment_time = df.groupby("date")["sentiment_binary"].mean().reset_index()

plt.figure(figsize=(14,4))
plt.plot(sentiment_time["date"], sentiment_time["sentiment_binary"])
plt.title("Market Sentiment Timeline (Fear → Greed)")
plt.xlabel("Date")
plt.ylabel("Sentiment Index")
plt.show()


In [None]:
plt.figure(figsize=(8,5))
sns.boxplot(data=df, x="Classification", y="normalized_pnl")
plt.title("Trader Performance by Market Sentiment")
plt.show()


In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(
    data=df.sample(min(5000, len(df))),
    x="risk_score",
    y="normalized_pnl",
    hue="Classification",
    alpha=0.4
)
plt.title("Risk vs Return by Sentiment Regime")
plt.show()


In [None]:
display(regime_perf)

plt.figure(figsize=(8,5))
sns.barplot(data=regime_perf.reset_index(), x="regime", y="avg_pnl")
plt.title("Average PnL by Market Regime")
plt.show()


In [None]:
display(strategy_perf)

strategy_perf_plot = strategy_perf.reset_index().rename(columns={"index":"strategy"})

plt.figure(figsize=(10,5))
sns.barplot(data=strategy_perf_plot, x="strategy", y="total_pnl")
plt.title("Total PnL by Strategy")
plt.xticks(rotation=45)
plt.show()


In [None]:
display(clusters)

plt.figure(figsize=(8,5))
sns.barplot(data=clusters.reset_index(), x="cluster", y="avg_pnl")
plt.title("Average PnL by Trader Cluster")
plt.show()


In [None]:
fig = px.scatter(
    df.sample(min(5000, len(df))),
    x="risk_score",
    y="normalized_pnl",
    color="Classification",
    title="Interactive Risk vs Return by Sentiment",
    opacity=0.5
)
fig.show()


In [None]:
insights = []

fear_perf = df[df["Classification"]=="Fear"]["normalized_pnl"].mean()
greed_perf = df[df["Classification"]=="Greed"]["normalized_pnl"].mean()

if fear_perf > greed_perf:
    insights.append("Traders perform better during Fear regimes → evidence of contrarian alpha.")
else:
    insights.append("Traders perform better during Greed regimes → momentum-driven profitability.")

risk_fear = df[df["Classification"]=="Fear"]["risk_score"].mean()
risk_greed = df[df["Classification"]=="Greed"]["risk_score"].mean()

if risk_fear > risk_greed:
    insights.append("Higher risk exposure during Fear → panic-driven trading behavior.")
else:
    insights.append("Higher risk exposure during Greed → FOMO-driven leverage usage.")

strategy_winner = strategy_perf["total_pnl"].idxmax()
insights.append(f"Best performing strategy: {strategy_winner}")

pd.DataFrame(insights, columns=["Key Insights"])
