In [1]:
import pandas as pd
import numpy as np
from plotnine import *
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
time = "2024-12-31_13:33"
time = "2025-01-02_08:33"


raw_df = pd.read_csv(f"/projects/genomic-ml/da2343/ml_project_2/unsupervised/kmeans/results/{time}_results.csv")
raw_df
print(raw_df.head(10))

   window  train_actual_return  train_num_trades train_direction  \
0     201             0.158699               131            long   
1     202             0.129365               110            long   
2     203             0.042934               124           short   
3     204             0.091009               102           short   
4     205             0.102404               118           short   
5     206             0.096709                94            long   
6     207             0.197344                86            long   
7     208             0.257997               107            long   
8     209             0.310905               146            long   
9     210             0.141542                98            long   

   test_actual_return  test_num_trades test_direction  train_average_return  \
0            0.007502               18           long              0.114631   
1           -0.139557               30           long              0.114631   
2           -0

In [3]:
def analyze_performance(raw_df):
    # Group by relevant parameters and calculate averages
    group_cols = ['instrument', 'num_clusters', 'clustering_algorithm', 'train_period', 
                 'reverse_test', 'random_seed']
    
    # Calculate mean performance metrics for each parameter combination
    results_df = raw_df.groupby(group_cols).agg({
        'test_profit_factor': 'mean',
        'test_sharpe_ratio': 'mean',
        'test_win_ratio': 'mean',
        'test_num_trades': 'mean'
    }).reset_index()
    
    # Calculate combined score
    results_df['combined_score'] = (
        results_df['test_profit_factor'] * 0.4 +
        results_df['test_sharpe_ratio'] * 0.4 +
        (results_df['test_num_trades'] / results_df['test_num_trades'].max()) * 0.2
    )
    
    # Create display DataFrame with proper formatting
    display_df = pd.DataFrame({
        'Instrument': results_df['instrument'],
        'Clusters': results_df['num_clusters'],
        'Algorithm': results_df['clustering_algorithm'],
        'Train Period': results_df['train_period'].astype(str) + 'w',
        'Rev.Test': results_df['reverse_test'],
        'Seed': results_df['random_seed'],
        'PF': results_df['test_profit_factor'].round(3),
        'Sharpe': results_df['test_sharpe_ratio'].round(3),
        'Win%': (results_df['test_win_ratio'] * 100).round(1),
        'Trades': results_df['test_num_trades'].round(1),
        'Score': results_df['combined_score'].round(3)
    })
    
    # Sort by instrument name
    display_df = display_df.sort_values('Instrument')
    
    # Apply styling with color gradients
    styled_df = display_df.style\
        .format({
            'PF': '{:.3f}',
            'Sharpe': '{:.3f}',
            'Win%': '{:.1f}%',
            'Trades': '{:.1f}',
            'Score': '{:.3f}'
        })\
        .background_gradient(subset=['Score'], cmap='YlOrRd')\
        .background_gradient(subset=['PF'], cmap='YlOrRd')\
        .background_gradient(subset=['Sharpe'], cmap='YlOrRd')\
        .background_gradient(subset=['Win%'], cmap='YlOrRd')\
        .background_gradient(subset=['Trades'], cmap='YlOrRd')\
        .set_properties(**{
            'text-align': 'right',
            'font-family': 'monospace',
            'padding': '5px'
        })\
        .hide(axis="index")
    
    return results_df, styled_df

# Example usage:
results_df, styled_df = analyze_performance(raw_df)
styled_df  # Display the styled table

Instrument,Clusters,Algorithm,Train Period,Rev.Test,Seed,PF,Sharpe,Win%,Trades,Score
AUD_CAD_M15,7,kmeans,6w,True,407,1.034,0.014,48.6%,24.7,0.547
AUD_JPY_M15,7,birch,12w,True,42,1.502,0.14,54.4%,22.3,0.772
AUD_NZD_M15,5,kmeans,11w,True,311,1.955,0.216,50.0%,36.1,1.055
AUD_USD_M15,8,birch,9w,True,42,1.2,0.064,47.1%,17.9,0.598
CAD_JPY_M15,5,kmeans,4w,False,311,1.273,0.071,55.3%,33.3,0.71
EUR_CAD_M15,6,kmeans,5w,True,755,0.962,-0.014,53.9%,12.6,0.444
EUR_CHF_M15,5,kmeans,4w,False,795,0.609,-0.147,47.3%,34.6,0.364
EUR_GBP_M15,5,birch,12w,True,42,1.748,0.199,53.8%,31.8,0.943
EUR_JPY_M15,8,kmeans,12w,False,780,1.39,0.127,48.5%,21.0,0.716
EUR_USD_M15,6,kmeans,8w,False,139,1.284,0.091,52.8%,12.3,0.613


In [4]:
results_df

Unnamed: 0,instrument,num_clusters,clustering_algorithm,train_period,reverse_test,random_seed,test_profit_factor,test_sharpe_ratio,test_win_ratio,test_num_trades,combined_score
0,AUD_CAD_M15,7,kmeans,6,True,407,1.03408,0.013608,0.486486,24.702703,0.546913
1,AUD_JPY_M15,7,birch,12,True,42,1.502459,0.139936,0.54386,22.315789,0.772443
2,AUD_NZD_M15,5,kmeans,11,True,311,1.955222,0.215545,0.5,36.053571,1.054885
3,AUD_USD_M15,8,birch,9,True,42,1.199871,0.063568,0.471429,17.928571,0.598157
4,CAD_JPY_M15,5,kmeans,4,False,311,1.273029,0.071035,0.552632,33.289474,0.7099
5,EUR_CAD_M15,6,kmeans,5,True,755,0.961605,-0.013985,0.539474,12.592105,0.444213
6,EUR_CHF_M15,5,kmeans,4,False,795,0.609061,-0.146784,0.472973,34.648649,0.364219
7,EUR_GBP_M15,5,birch,12,True,42,1.748233,0.199028,0.538462,31.788462,0.943411
8,EUR_JPY_M15,8,kmeans,12,False,780,1.390351,0.127006,0.484848,21.015152,0.715697
9,EUR_USD_M15,6,kmeans,8,False,139,1.283937,0.090716,0.528169,12.267606,0.613347
