In [1]:
import pandas as pd
import numpy as np
from plotnine import *
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
time = "2024-12-31_13:33"


raw_df = pd.read_csv(f"/projects/genomic-ml/da2343/ml_project_2/unsupervised/kmeans/results/{time}_results.csv")
raw_df
print(raw_df.head(10))

In [6]:
def analyze_performance(raw_df):
    # Group by relevant parameters and calculate averages
    group_cols = ['instrument', 'num_clusters', 'clustering_algorithm', 'train_period', 
                 'reverse_test', 'random_seed']
    
    # Calculate mean performance metrics for each parameter combination
    results_df = raw_df.groupby(group_cols).agg({
        'test_profit_factor': 'mean',
        'test_sharpe_ratio': 'mean',
        'test_win_ratio': 'mean',
        'test_num_trades': 'mean'
    }).reset_index()
    
    # Calculate combined score
    results_df['combined_score'] = (
        results_df['test_profit_factor'] * 0.4 +
        results_df['test_sharpe_ratio'] * 0.4 +
        (results_df['test_num_trades'] / results_df['test_num_trades'].max()) * 0.2
    )
    
    # Create display DataFrame with proper formatting
    display_df = pd.DataFrame({
        'Instrument': results_df['instrument'],
        'Clusters': results_df['num_clusters'],
        'Algorithm': results_df['clustering_algorithm'],
        'Train Period': results_df['train_period'].astype(str) + 'w',
        'Rev.Test': results_df['reverse_test'],
        'Seed': results_df['random_seed'],
        'PF': results_df['test_profit_factor'].round(3),
        'Sharpe': results_df['test_sharpe_ratio'].round(3),
        'Win%': (results_df['test_win_ratio'] * 100).round(1),
        'Trades': results_df['test_num_trades'].round(1),
        'Score': results_df['combined_score'].round(3)
    })
    
    # Sort by instrument name
    display_df = display_df.sort_values('Instrument')
    
    # Apply styling with color gradients
    styled_df = display_df.style\
        .format({
            'PF': '{:.3f}',
            'Sharpe': '{:.3f}',
            'Win%': '{:.1f}%',
            'Trades': '{:.1f}',
            'Score': '{:.3f}'
        })\
        .background_gradient(subset=['Score'], cmap='YlOrRd')\
        .background_gradient(subset=['PF'], cmap='YlOrRd')\
        .background_gradient(subset=['Sharpe'], cmap='YlOrRd')\
        .background_gradient(subset=['Win%'], cmap='YlOrRd')\
        .background_gradient(subset=['Trades'], cmap='YlOrRd')\
        .set_properties(**{
            'text-align': 'right',
            'font-family': 'monospace',
            'padding': '5px'
        })\
        .hide(axis="index")
    
    return results_df, styled_df

# Example usage:
results_df, styled_df = analyze_performance(raw_df)
styled_df  # Display the styled table

Instrument,Clusters,Algorithm,Train Period,Rev.Test,Seed,PF,Sharpe,Win%,Trades,Score
AUD_CAD_M15,15,kmeans,14w,True,971,0.894,-0.04,41.2%,11.6,0.404
AUD_JPY_M15,10,gaussian_mixture,4w,True,942,0.782,-0.076,50.6%,13.2,0.353
AUD_NZD_M15,8,kmeans,12w,True,716,1.881,0.158,48.5%,15.5,0.899
AUD_USD_M15,8,birch,9w,True,42,1.2,0.064,47.1%,17.9,0.602
CAD_JPY_M15,5,kmeans,4w,False,311,1.273,0.071,55.3%,33.3,0.717
EUR_CAD_M15,6,gaussian_mixture,7w,True,435,0.526,-0.233,41.3%,11.9,0.182
EUR_CHF_M15,6,kmeans,7w,False,435,0.781,-0.085,38.0%,30.0,0.44
EUR_GBP_M15,8,gaussian_mixture,12w,True,936,1.489,0.133,50.0%,19.5,0.754
EUR_JPY_M15,7,gaussian_mixture,10w,True,716,1.125,0.035,44.2%,18.1,0.562
EUR_USD_M15,5,kmeans,8w,False,936,1.109,0.036,54.3%,15.1,0.539


In [5]:
results_df

Unnamed: 0,instrument,num_clusters,clustering_algorithm,train_period,test_period,reverse_test,random_seed,test_profit_factor,test_sharpe_ratio,test_win_ratio,test_num_trades,test_avg_trades_per_window,combined_score
0,AUD_CAD_M15,15,kmeans,14,1,True,971,0.894123,-0.039519,0.411765,11.558824,11.558824,0.404212
1,AUD_JPY_M15,10,gaussian_mixture,4,1,True,942,0.782194,-0.076376,0.50641,13.153846,13.153846,0.353304
2,AUD_NZD_M15,8,kmeans,12,1,True,716,1.881125,0.158191,0.485075,15.492537,15.492537,0.899322
3,AUD_USD_M15,8,birch,9,1,True,42,1.199871,0.063568,0.471429,17.928571,17.928571,0.602116
4,CAD_JPY_M15,5,kmeans,4,1,False,311,1.273029,0.071035,0.552632,33.289474,33.289474,0.717252
5,EUR_CAD_M15,6,gaussian_mixture,7,1,True,435,0.526142,-0.232948,0.413333,11.946667,11.946667,0.181741
6,EUR_CHF_M15,6,kmeans,7,1,False,435,0.78063,-0.085198,0.380282,30.042254,30.042254,0.440278
7,EUR_GBP_M15,8,gaussian_mixture,12,1,True,936,1.488541,0.133354,0.5,19.451613,19.451613,0.753717
8,EUR_JPY_M15,7,gaussian_mixture,10,1,True,716,1.125143,0.035493,0.442029,18.101449,18.101449,0.561928
9,EUR_USD_M15,5,kmeans,8,1,False,936,1.108547,0.035526,0.543478,15.101449,15.101449,0.539115


In [None]:
output_path = f"/projects/genomic-ml/da2343/ml_project_2/unsupervised/kmeans/strategy_figures/{instrument}_{algo}.png"
# save the df
df.to_csv(output_path.replace(".png", ".csv"), index=False)
p.save(output_path, dpi=300)