In [None]:
import pandas as pd
import numpy as np

def process_metrics(test_mse_for_output, test_mse_dummy_for_output, output_name, aggregated_results):
    mean_test_mse = np.mean(test_mse_for_output)
    std_test_mse = np.std(test_mse_for_output)
    mean_test_mse_dummy = np.mean(test_mse_dummy_for_output)
    std_test_mse_dummy = np.std(test_mse_dummy_for_output)
    improvement = (mean_test_mse_dummy - mean_test_mse) / mean_test_mse_dummy * 100  # Improvement percentage
    
    aggregated_results['output'].append(output_name)
    aggregated_results['mean_test_mse'].append(mean_test_mse)
    aggregated_results['std_test_mse'].append(std_test_mse)
    aggregated_results['mean_test_mse_dummy'].append(mean_test_mse_dummy)
    aggregated_results['std_test_mse_dummy'].append(std_test_mse_dummy)
    aggregated_results['improvement_over_dummy (%)'].append(improvement)

    return aggregated_results


def performance_summary_regression(seed, metric, task_output):
    output_names = np.load(f'./processed_data/{metric}/algo_portfolio.npy', allow_pickle=True)
    num_outputs = len(output_names)

    aggregated_results = {
        'output': [],
        'mean_test_mse': [],
        'std_test_mse': [],
        'mean_test_mse_dummy': [],
        'std_test_mse_dummy': [],
        'improvement_over_dummy (%)': []
    }

    if(task_output=='multi'):
        results_df = pd.read_csv(f'./results/seed_{seed}/{metric}/regression/multi/performance_results.csv')
        results_df['test_mse'] = results_df['test_mse'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
        results_df['test_mse_dummy'] = results_df['test_mse_dummy'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
        for i in range(num_outputs):
            test_mse_for_output = np.array([mse[i] for mse in results_df['test_mse']])
            test_mse_dummy_for_output = np.array([mse_dummy[i] for mse_dummy in results_df['test_mse_dummy']])
            aggregated_results = process_metrics(test_mse_for_output, test_mse_dummy_for_output, output_names[i], aggregated_results)
    elif(task_output=='single'):
        for i in range(num_outputs):
            results_df = pd.read_csv(f"./results/seed_{seed}/{metric}/regression/single/performance_results_{output_names[i]}.csv")
            test_mse_for_output = np.array(results_df['test_mse'])
            test_mse_dummy_for_output = np.array(results_df['test_mse_dummy'])
            aggregated_results = process_metrics(test_mse_for_output, test_mse_dummy_for_output, output_names[i], aggregated_results)

    return pd.DataFrame(aggregated_results)



def performance_summary_classification(metric, task_output):
    results_df = pd.read_csv(f'./results/seed_42/{metric}/classification/{task_output}/performance_results.csv', index_col=0)
    mean_test_acc = np.mean(results_df['test_score'])
    std_test_acc = np.std(results_df['test_score'])
    mean_test_acc_dummy = np.mean(results_df['test_score_dummy'])
    std_test_acc_dummy = np.std(results_df['test_score_dummy'])
    improvement = (mean_test_acc - mean_test_acc_dummy) / mean_test_acc_dummy * 100  # Improvement percentage
    aggregated_results = {
        'output': ['C'],
        'mean_test_acc': [mean_test_acc],
        'std_test_acc': [std_test_acc],
        'mean_test_acc_dummy': [mean_test_acc_dummy],
        'std_test_acc_dummy': [std_test_acc_dummy],
        'improvement_over_dummy (%)': [improvement]
    }
    return pd.DataFrame(aggregated_results)

def performance_summary_pairwise_regression(metric, task_output):
    aggregated_results = {
        'output': [],
        'mean_test_mse': [],
        'std_test_mse': [],
        'mean_test_mse_dummy': [],
        'std_test_mse_dummy': [],
        'improvement_over_dummy (%)': []
    }
    outputs = pd.read_csv(f'./processed_data/{metric}/pairwise_regression/performance.csv', index_col=0).columns
    if task_output == 'multi':
        results_df = pd.read_csv(f'./results/seed_42/{metric}/pairwise_regression/{task_output}/performance_results.csv', index_col=0)
        results_df['test_mse'] = results_df['test_mse'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
        results_df['test_mse_dummy'] = results_df['test_mse_dummy'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
        for i in range(len(outputs)):
            test_mse_for_output = np.array([mse[i] for mse in results_df['test_mse']])
            test_mse_dummy_for_output = np.array([mse_dummy[i] for mse_dummy in results_df['test_mse_dummy']])
            aggregated_results = process_metrics(test_mse_for_output, test_mse_dummy_for_output, outputs[i], aggregated_results)
    elif task_output == 'single':
        print('single')
        for i in range(len(outputs)):
            results_df = pd.read_csv(f"./results/seed_42/{metric}/pairwise_regression/{task_output}/performance_results_{outputs[i]}.csv")
            test_mse_for_output = np.array(results_df['test_mse'])
            test_mse_dummy_for_output = np.array(results_df['test_mse_dummy'])
            aggregated_results = process_metrics(test_mse_for_output, test_mse_dummy_for_output, outputs[i], aggregated_results)

    return pd.DataFrame(aggregated_results)

def process_metrics_classification(test_score_for_output, test_score_dummy_for_output, output_name, aggregated_results):
    mean_test_acc = np.mean(test_score_for_output)
    std_test_acc = np.std(test_score_for_output)
    mean_test_acc_dummy = np.mean(test_score_dummy_for_output)
    std_test_acc_dummy = np.std(test_score_dummy_for_output)
    improvement = (mean_test_acc - mean_test_acc_dummy) / mean_test_acc_dummy * 100  # Improvement percentage
    
    aggregated_results['output'].append(output_name)
    aggregated_results['mean_test_acc'].append(mean_test_acc)
    aggregated_results['std_test_acc'].append(std_test_acc)
    aggregated_results['mean_test_acc_dummy'].append(mean_test_acc_dummy)
    aggregated_results['std_test_acc_dummy'].append(std_test_acc_dummy)
    aggregated_results['improvement_over_dummy (%)'].append(improvement)

    return aggregated_results

def performance_summary_pairwise_classification(metric, task_output, cost_sensitive=False):
    aggregated_results = {
        'output': [],
        'mean_test_acc': [],
        'std_test_acc': [],
        'mean_test_acc_dummy': [],
        'std_test_acc_dummy': [],
        'improvement_over_dummy (%)': []
    }
    outputs = pd.read_csv(f'./processed_data/{metric}/pairwise_classification/performance.csv', index_col=0).columns
    if task_output == 'multi':
        results_df = pd.read_csv(f'./results/seed_42/{metric}/pairwise_classification/{task_output}/performance_results.csv', index_col=0)
        results_df['test_score'] = results_df['test_score'].apply(lambda x: np.fromstring(x[1:-1], sep=','))
        results_df['test_score_dummy'] = results_df['test_score_dummy'].apply(lambda x: np.fromstring(x[1:-1], sep=','))
        for i in range(len(outputs)):
            test_mse_for_output = np.array([mse[i] for mse in results_df['test_score']])
            test_mse_dummy_for_output = np.array([mse_dummy[i] for mse_dummy in results_df['test_score_dummy']])
            aggregated_results = process_metrics_classification(test_mse_for_output, test_mse_dummy_for_output, outputs[i], aggregated_results)
    elif task_output == 'single':
        for i in range(len(outputs)):
            results_df = pd.read_csv(f'./results/seed_42/{metric}/{"cost_sensitive_" if cost_sensitive else ""}pairwise_classification/{task_output}/performance_results_{outputs[i]}.csv')
            test_mse_for_output = np.array(results_df['test_score'])
            test_mse_dummy_for_output = np.array(results_df['test_score_dummy'])
            aggregated_results = process_metrics_classification(test_mse_for_output, test_mse_dummy_for_output, outputs[i], aggregated_results)
    return pd.DataFrame(aggregated_results)

In [5]:
aggregated_results_df_pairwise_classification_multi = performance_summary_pairwise_classification('HAMMING LOSS example based', 'multi')
print(aggregated_results_df_pairwise_classification_multi)

# aggregated_results_df_pairwise_classification_single = performance_summary_pairwise_classification('HAMMING LOSS example based', 'single')
# print(aggregated_results_df_pairwise_classification_single)

# aggregated_results_df_pairwise_classification_single_cost_sensitive = performance_summary_pairwise_classification('HAMMING LOSS example based', 'single', True)
# print(aggregated_results_df_pairwise_classification_single_cost_sensitive)

             output  mean_test_acc  std_test_acc  mean_test_acc_dummy  \
0    DEEP4_vs_RFPCT          0.725      0.446514                0.700   
1       DEEP4_vs_CC          0.825      0.379967                0.750   
2   DEEP4_vs_Ada300          0.575      0.494343                0.625   
3   DEEP4_vs_TREMLC          0.750      0.433013                0.750   
4       RFPCT_vs_CC          0.800      0.400000                0.675   
5   RFPCT_vs_Ada300          0.600      0.489898                0.000   
6   RFPCT_vs_TREMLC          0.575      0.494343                0.525   
7      CC_vs_Ada300          0.700      0.458258                0.525   
8      CC_vs_TREMLC          0.600      0.489898                0.725   
9  Ada300_vs_TREMLC          0.550      0.497494                0.000   

   std_test_acc_dummy  improvement_over_dummy (%)  
0            0.458258                    3.571429  
1            0.433013                   10.000000  
2            0.484123                   

  improvement = (mean_test_acc - mean_test_acc_dummy) / mean_test_acc_dummy * 100  # Improvement percentage


In [6]:
# aggregated_results_df_classification_single = performance_summary_classification('HAMMING LOSS example based', 'single')
# print(aggregated_results_df_classification_single)

# aggregated_results_df_multi = performance_summary_regression('HAMMING LOSS example based', 'multi')
# print(aggregated_results_df_multi)

# for seed in range(0, 10):
#     print("seed: ", seed)
#     aggregated_results_df_single = performance_summary_regression(seed, 'HAMMING LOSS example based', 'single')
#     print(aggregated_results_df_single)

# aggregated_results_df_pairwise_regression_single = performance_summary_pairwise_regression('HAMMING LOSS example based', 'single')
# print(aggregated_results_df_pairwise_regression_single)

# aggregated_results_df_pairwise_regression_multi = performance_summary_pairwise_regression('HAMMING LOSS example based', 'multi')
# print(aggregated_results_df_pairwise_regression_multi)