In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 200)


In [None]:
df = pd.read_csv("../data/processed/sentiment_labeled_data.csv")
profiles = pd.read_csv("../data/processed/trader_profiles.csv")

df["time"] = pd.to_datetime(df["time"])
print(df.shape, profiles.shape)


In [None]:
global_metrics = {
    "Total Trades": len(df),
    "Total Traders": df["account"].nunique(),
    "Avg PnL": df["normalized_pnl"].mean(),
    "Win Rate": df["is_profitable"].mean(),
    "Avg Leverage": df["leverage"].mean(),
    "Avg Trade Size": df["size"].mean()
}

pd.DataFrame.from_dict(global_metrics, orient="index", columns=["Value"])


In [None]:
trader_perf = df.groupby("account").agg(
    trades=("size", "count"),
    total_pnl=("normalized_pnl", "sum"),
    avg_pnl=("normalized_pnl", "mean"),
    win_rate=("is_profitable", "mean"),
    avg_leverage=("leverage", "mean"),
    avg_trade_size=("size", "mean"),
    avg_risk=("risk_score", "mean")
).reset_index()


In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(
    data=trader_perf,
    x="avg_risk",
    y="avg_pnl",
    size="trades",
    sizes=(20, 200),
    alpha=0.6
)
plt.title("Risk vs Return Profile of Traders")
plt.show()


In [None]:
df["leverage_efficiency"] = df["normalized_pnl"] / (df["leverage"] + 1)

leverage_analysis = df.groupby("leverage").agg(
    avg_efficiency=("leverage_efficiency", "mean"),
    trade_count=("leverage", "count")
).reset_index()

display(leverage_analysis.head(20))


In [None]:
profits = df[df["normalized_pnl"] > 0]["normalized_pnl"].sum()
losses = abs(df[df["normalized_pnl"] < 0]["normalized_pnl"].sum())

profit_factor = profits / losses if losses != 0 else np.nan
profit_factor


In [None]:
features = trader_perf[["avg_pnl", "win_rate", "avg_risk", "avg_leverage", "avg_trade_size"]].fillna(0)

scaler = StandardScaler()
X = scaler.fit_transform(features)

kmeans = KMeans(n_clusters=4, random_state=42)
trader_perf["cluster"] = kmeans.fit_predict(X)


In [None]:
cluster_profile = trader_perf.groupby("cluster").agg(
    traders=("account", "count"),
    avg_pnl=("avg_pnl", "mean"),
    win_rate=("win_rate", "mean"),
    avg_risk=("avg_risk", "mean"),
    avg_leverage=("avg_leverage", "mean"),
    avg_trade_size=("avg_trade_size", "mean")
)

display(cluster_profile)


In [None]:
plt.figure(figsize=(8,6))
sns.scatterplot(
    data=trader_perf,
    x="avg_risk",
    y="avg_pnl",
    hue="cluster",
    palette="tab10"
)
plt.title("Trader Performance Clusters")
plt.show()


In [None]:
trader_perf.to_csv("../data/processed/trader_performance_metrics.csv", index=False)
cluster_profile.to_csv("../data/processed/trader_cluster_profiles.csv")

print("âœ… Performance analytics saved.")
