In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import sys

logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"

log_dir = "logs"
log_filepath = os.path.join(log_dir, "logging.log")
os.makedirs(log_dir, exist_ok=True)

logging.basicConfig(
    level=logging.INFO,
    format=logging_str,
    handlers=[logging.FileHandler(log_filepath), logging.StreamHandler(sys.stdout)],
)


In [2]:
hist_data = pd.read_csv('csv_files/historical_data_inter.csv', index_col=0)
fear_data = pd.read_csv('csv_files/fear_data_inter.csv', index_col=0)

In [3]:
merged_df = pd.merge(hist_data.copy(), fear_data.copy(), on='Timestamp')

In [4]:
# Define your aggregations
agg_dict = {
    # Profit & Loss
    'net_daily_pnl': ['sum', 'mean'],     # Total P&L and average daily P&L
    'pnl_volatility': 'mean',            # Average daily volatility

    # Risk & Volume
    'total_trade_volume_usd': ['sum', 'mean'],  # Total volume and average daily volume
    'avg_trade_value_usd': 'mean',        # Average trade size (bet)

    # Participation
    'active_traders': ['mean', 'max'],    # Average and peak daily traders
    'total_daily_fees': 'sum',            # Total fees generated by this group

    # Trader Sentiment
    'top_coin': lambda s: s.mode()[0],    # Most common "hot coin"
    'dominant_side': lambda s: s.mode()[0],  # Most common dominant action
    'buy_sell_ratio': 'mean',             # The group's overall sentiment ratio

    # Context / Timeframe
    'date': ['nunique', 'min', 'max']   # Number of days, and the date range
}

# Run the groupby aggregation
insight_summary = merged_df.groupby('classification').agg(agg_dict)
insight_summary.rename(columns={"<lambda>": "mode"}, level=1, inplace=True)

In [5]:
# Prepare data for Elastic Net
model_df = merged_df.copy()

# Drop non-numeric and less relevant columns
model_df = model_df.drop(
    columns=["Timestamp", "date", "top_coin", "dominant_side", "dominant_direction"]
)

# One-hot encode the 'classification' column
model_df = pd.get_dummies(model_df, columns=["classification"], drop_first=True)

# Define features (X) and target (y)
X = model_df.drop(columns=["net_daily_pnl"])
y = model_df["net_daily_pnl"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [7]:
# Create and train the ElasticNet model
elastic_net = ElasticNet(alpha=1.0, l1_ratio=0.5, random_state=42)
elastic_net.fit(X_train, y_train)

# Make predictions on the test set
y_pred = elastic_net.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

logging.info(f"Mean Squared Error: {mse}")
logging.info(f"R-squared: {r2}")

# Display model coefficients
coefficients = pd.DataFrame(
    elastic_net.coef_, index=X_train.columns, columns=["Coefficient"]
)
logging.info(coefficients)

[2025-11-13 12:51:52,827: INFO: 1066801492: Mean Squared Error: 42560571024580.086]
[2025-11-13 12:51:52,828: INFO: 1066801492: R-squared: -7040247052.317085]
[2025-11-13 12:51:52,828: INFO: 1066801492:                          Coefficient
avg_daily_pnl           18626.162165
pnl_volatility           3408.268968
avg_trade_value_usd      1211.441575
total_trade_volume_usd      0.005144
avg_trade_size_tokens     -36.862319
active_traders           -613.588979
total_daily_fees           -4.084892
buy_sell_ratio             -0.000000
avg_exec_price            -50.597163
timestamp                  -0.003811
value                   -3841.089045
classification_Fear        68.921390
classification_Greed      -28.347257
classification_Neutral     38.459330]


In [None]:
def create_dashboard(
    summary_df, top_coin_counts, output_file="outputs/classification_dashboard.png"
):
    """
    Creates a single 2x2 dashboard image comparing key metrics.
    """

    # --- 1. Create the Figure and Grid ---
    # (nrows, ncols), and the total image size
    fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(20, 14))

    # Add a main title for the whole dashboard
    fig.suptitle(
        "Trader Classification Dashboard (Fear vs. Greed)", fontsize=20, y=1.03
    )

    # --- 2. Plot 1: Average Daily P&L ---
    summary_df["total_trade_volume_usd"]["sum"].plot(
        kind="bar",
        ax=axes[0, 0],
        title="Total Trade Volume by Classification",
        xlabel="Classification",
    )
    axes[0, 0].set_ylabel("Total Volume (USD)")
    axes[0, 0].tick_params(axis="x", rotation=0)

    # --- 3. Plot 2: Average Trade Value (Risk) ---
    summary_df["net_daily_pnl"]["mean"].plot(
        kind="bar",
        ax=axes[0, 1],
        title="Average Daily Net P&L by Classification",
        xlabel="Classification",
    )
    axes[0, 1].set_ylabel("Average P&L (USD)")
    axes[0, 1].tick_params(axis="x", rotation=0)

    # --- 4. Plot 3: Average P&L Volatility (Risk) ---
    summary_df["pnl_volatility"]["mean"].plot(
        kind="bar",
        ax=axes[0, 2],
        title="Average Daily P&L Volatility (Risk)",
        xlabel="Classification",
    )
    axes[0, 2].set_ylabel("Average Volatility (std. dev.)")
    axes[0, 2].tick_params(axis="x", rotation=0)

    # --- 5. Plot 4: Average Active Traders ---
    summary_df["total_trade_volume_usd"]["sum"].plot(
        kind="pie",
        ax=axes[0, 3],
        title="Total Trade Volume by Classification",
        autopct="%1.1f%%",
        xlabel="Classification",
    )
    axes[0, 3].set_ylabel("Total Volume (USD)")
    axes[0, 3].tick_params(axis="x", rotation=0)

    # ---- 6. Plot 5: Average Risk Appetite
    summary_df["avg_trade_value_usd"]["mean"].plot(
        kind="bar",
        ax=axes[1, 0],
        title="Average Trade Value (Risk Appetite)",
        xlabel="Classification",
    )
    axes[1, 0].set_ylabel("Average Size (USD)")
    axes[1, 0].tick_params(axis="x", rotation=0)

    # --- 7. Plot 6: P&L Volatility
    sns.boxplot(data=merged_df, x="classification", y="pnl_volatility", ax=axes[1, 1])
    axes[1, 1].set_title("Distribution of Daily P&L Volatility")
    axes[1, 1].set_ylabel("Daily P&L Volatility")
    axes[1, 1].set_xlabel("Classification")

    summary_df["active_traders"]["mean"].plot(
        kind="bar",
        ax=axes[1, 2],
        title="Average Daily Active Traders",
        xlabel="Classification",
    )
    axes[1, 2].set_ylabel("Average Trader Count")
    axes[1, 2].tick_params(axis="x", rotation=0)

    summary_df["active_traders"]["max"].plot(
        kind="bar",
        ax=axes[1, 3],
        title="Peak Daily Active Traders",
        ylabel="Peak Trader Count",
        xlabel="Classification",
    )
    axes[1, 3].set_ylabel("Peak Trader Count")
    axes[1, 3].tick_params(axis="x", rotation=0)

    summary_df["total_daily_fees"]["sum"].plot(
        kind="bar",
        ax=axes[2, 0],
        title="Total Fees Paid by Classification",
        xlabel="Classification",
    )
    axes[2, 0].set_ylabel("Total Fees (USD)")
    axes[2, 0].tick_params(axis="x", rotation=0)

    # --- Plot 10: Top Coin by Classification (as a bar chart) ---
    top_coin_counts.plot(
        kind="bar",
        stacked=True,
        ax=axes[2, 1],
        title="Top Coin Counts by Classification",
    )
    axes[2, 1].set_ylabel("Number of Days")
    axes[2, 1].tick_params(axis="x", rotation=45)
    axes[2, 1].legend(title="Top Coin")

    summary_df["buy_sell_ratio"]["mean"].plot(
        kind="bar",
        ax=axes[2, 2],
        title="Average Buy/Sell Ratio by Classification",
        ylabel="Buy Ratio (e.g., 0.5 = 50% Buys)",
        xlabel="Classification",
    )
    plt.axhline(y=0.5, color="grey", linestyle="--", label="Neutral (0.5)")
    axes[2, 2].set_ylabel("Buy Ratio")
    axes[2, 2].tick_params(axis="x", rotation=0)
    # --- 6. Clean up and Save ---
    plt.tight_layout()  # Prevents labels from overlapping
    plt.savefig(output_file)
    logging.info(f"Dashboard saved to {output_file}")
    plt.close()


In [13]:
top_coin_counts = (
    merged_df.groupby(["classification", "top_coin"]).size().unstack(fill_value=0)
)
try:
    create_dashboard(insight_summary, top_coin_counts, output_file="my_dashboard.png")
except Exception as e:
    logging.error(f"An error occurred while creating the dashboard: {e}")
    raise

[2025-11-13 12:52:40,030: INFO: 2010101424: Dashboard saved to my_dashboard.png]
