In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

pd.set_option("display.max_columns", None)
pd.set_option("display.width", 200)


In [None]:
df = pd.read_csv("../data/processed/sentiment_labeled_data.csv")
df["time"] = pd.to_datetime(df["time"])

print(df.shape)


In [None]:
numeric_cols = df.select_dtypes(include=["float64", "int64"]).columns

corr = df[numeric_cols].corr()

plt.figure(figsize=(14,10))
sns.heatmap(corr, cmap="coolwarm", center=0)
plt.title("Feature Correlation Heatmap")
plt.show()


In [None]:
sentiment_corr = df.groupby("Classification")[numeric_cols].mean()
display(sentiment_corr)


In [None]:
features = df[numeric_cols].fillna(0)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

pca = PCA(n_components=5)
X_pca = pca.fit_transform(X_scaled)

pca_df = pd.DataFrame(X_pca, columns=[f"PC{i+1}" for i in range(5)])


In [None]:
explained = pd.DataFrame({
    "Component": [f"PC{i+1}" for i in range(len(pca.explained_variance_ratio_))],
    "Explained Variance": pca.explained_variance_ratio_
})

display(explained)


In [None]:
plt.figure(figsize=(8,6))
plt.scatter(pca_df["PC1"], pca_df["PC2"], alpha=0.3)
plt.title("PCA Projection of Trading Behavior")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()


In [None]:
df["z_pnl"] = (df["normalized_pnl"] - df["normalized_pnl"].mean()) / df["normalized_pnl"].std()

outliers = df[np.abs(df["z_pnl"]) > 3]

display(outliers.head())


In [None]:
regime_patterns = df.groupby("regime").agg(
    avg_pnl=("normalized_pnl", "mean"),
    pnl_std=("normalized_pnl", "std"),
    avg_risk=("risk_score", "mean"),
    avg_leverage=("leverage", "mean"),
    win_rate=("is_profitable", "mean"),
    avg_trade_size=("size", "mean")
)

display(regime_patterns)


In [None]:
behavior_patterns = df.groupby("Classification").agg(
    aggression=("aggression_score", "mean"),
    discipline=("discipline_score", "mean"),
    overtrading=("overtrading_score", "mean")
)

display(behavior_patterns)


In [None]:
regime_patterns.to_csv("../data/processed/regime_pattern_metrics.csv")
behavior_patterns.to_csv("../data/processed/behavior_pattern_metrics.csv")
pca_df.to_csv("../data/processed/pca_projection.csv")

print("âœ… Pattern intelligence datasets saved.")
