Step 1: Import Required Libraries

In [None]:
!pip install --upgrade scipy



In [None]:
!pip install quantstats

import pandas as pd
import numpy as np
from pathlib import Path
import quantstats as qs
from sklearn.cluster import KMeans



Step 2: Define Paths and Create Data Directory

In [None]:
data_dir = Path("data")
merged_path = data_dir / "merged_data.csv"

Step 3: Load Data Files

In [None]:
df = pd.read_csv(merged_path, parse_dates=['time'])

Step 4: Compute Risk-Adjusted Metrics

In [None]:
def sharpe_ratio(returns):
    return np.mean(returns) / np.std(returns, ddof=1)


def max_drawdown(series):
    cum = series.cumsum()
    peak = cum.cummax()
    drawdown = (cum - peak).min()
    return drawdown


def profit_factor(returns):
    gains = returns[returns > 0].sum()
    losses = -returns[returns < 0].sum()
    return gains / losses if losses>0 else np.nan

Step 5: Compute Per-Sentiment Metrics

In [None]:
def sortino_ratio(returns, risk_free_rate=0):
    downside_returns = returns[returns < 0]
    expected_return = returns.mean() - risk_free_rate
    downside_std = downside_returns.std()
    if downside_std == 0:
        return np.nan
    return expected_return / downside_std

In [None]:
sent_metrics = []
for sentiment, group in df.groupby('Sentiment'):
    returns = group['Closed PnL']
    sent_metrics.append({
        'Sentiment': sentiment,
        'Sharpe': sharpe_ratio(returns),
        'Sortino': sortino_ratio(returns, 0),
        'MaxDrawdown': max_drawdown(returns),
        'ProfitFactor': profit_factor(returns)
    })
df_sent_metrics = pd.DataFrame(sent_metrics)

Step 6: Trader Time-Series Clustering

Step 6.1: Defining Pivot

In [None]:
pivot = df.pivot_table(
    index='date', columns='Account', values='Closed PnL', fill_value=0
)

Step 6.2: Finding Transpose for **clustering**

In [None]:
ts = pivot.T

Step 6.3: KMeans

In [None]:
n_clusters = 4
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
ts['cluster'] = kmeans.fit_predict(ts)

In [None]:
# Load advanced metrics and cluster labels
df_adv = pd.read_csv(data_dir / 'sentiment_advanced_metrics.csv')
clusters = pd.read_csv(data_dir / 'trader_clusters.csv')['cluster']
df_adv['cluster'] = clusters

# DEBUG: confirm columns
print("Columns in df_adv:", df_adv.columns.tolist())

# Use the correct column name 'Sharpe'
cluster_sharpe = df_adv.groupby('cluster')['Sharpe'].mean()
cluster_table = cluster_sharpe.reset_index().rename(columns={'Sharpe':'avg_sharpe'})
display(cluster_table)


Columns in df_adv: ['Sentiment', 'Sharpe', 'Sortino', 'MaxDrawdown', 'ProfitFactor', 'cluster']


Unnamed: 0,cluster,avg_sharpe
0,1,0.062806
1,2,0.030402


Step 7: Top Traders by Sentiment & Sharpe

In [None]:
df = df.merge(ts['cluster'].rename('Cluster'), on='Account')

In [None]:
acct_metrics = df.groupby(['Account','Sentiment']).agg(
    trades=('Closed PnL','count'),
    avg_pnl=('Closed PnL','mean'),
    sharpe=('Closed PnL', sharpe_ratio)
).reset_index()


  return np.mean(returns) / np.std(returns, ddof=1)


Step 8: Get top 5 by Sharpe per sentiment

In [None]:
top5_sharpe = acct_metrics.sort_values(['Sentiment','sharpe'], ascending=[True, False]).groupby('Sentiment').head(5)

Step 9: Save Outputs

In [None]:
# 9.1 Sentiment metrics
df_sent_metrics.to_csv(data_dir / 'sentiment_advanced_metrics.csv', index=False)
# 9.2 Trader clusters
pd.DataFrame(ts['cluster']).to_csv(data_dir / 'trader_clusters.csv')
# 9.3 Top traders
top5_sharpe.to_csv(data_dir / 'top5_traders_sharpe.csv', index=False)

print("Advanced metrics and top trader lists saved to data/ folder.")

Advanced metrics and top trader lists saved to data/ folder.
