Import packages and processed run metrics

In [161]:
import re
import numpy as np
import pandas as pd
df = pd.read_csv('final_run_metrics.csv')

validation_metric = 'Results/val_acc'

Write function to return summaries of run's metric performance

In [162]:
def process_run_metrics(df):
    pass

    # get best runs for each group
    idx = df.groupby(['dataset', 'method', 'finetune']) \
        [validation_metric].idxmax()  # return index of max validation metric
    filtered_df = df.loc[idx]


    ## Process metrics and get top runs for each
    # are large or small metric values are desirable?
    descending_metrics = [name for name in filtered_df.columns if re.search('Results', name)]
    ascending_metrics = [
        descending_metrics.pop(descending_metrics.index(name))
        for name in descending_metrics
        if re.search('std', name)
    ]

    # all non-metrics columns are used to identify the experimental run
    filtered_runs = filtered_df[[
        name for name in filtered_df.columns
        if name not in descending_metrics + ascending_metrics
    ]]

    # rank the metrics
    ranked_descending = filtered_df[descending_metrics].rank(
        method='first',
        ascending=False
    )
    ranked_ascending = filtered_df[ascending_metrics].rank(
        method='first',
        ascending=True
    )

    # combine and sort the ranked_metrics
    ranked_metrics = pd.concat([ranked_descending, ranked_ascending], axis=1)
    ranked_metrics = ranked_metrics[sorted(ranked_metrics.columns)]
    filtered_ranks = filtered_runs.join(ranked_metrics)


    ## Manipulate rank data to be summarized by runs and metrics
    # convert to long format
    # filter to top ranks
    long_filtered_ranks = pd.melt(filtered_ranks, id_vars=filtered_runs.columns, var_name='metric')
    top_filtered_metrics = long_filtered_ranks.loc[long_filtered_ranks.value <= 3].copy()  # top 3 runs

    # compute rank summaries to understand what runs are top overall
    top_filtered_metrics['rank_one_ind'] = (top_filtered_metrics.value == 1)
    top_filtered_metrics['rank_two_ind'] = (top_filtered_metrics.value == 2)
    top_filtered_metrics['rank_three_ind'] = (top_filtered_metrics.value == 3)
    top_filtered_metrics.replace(False, pd.NA, inplace=True)

    # summarized metric ranks for run type
    rank_summary_columns = ['rank_one_ind', 'rank_two_ind', 'rank_three_ind', 'value']
    id_columns = ['dataset', 'method', 'finetune']
    run_summary = top_filtered_metrics.groupby(id_columns)[rank_summary_columns].count()

    # summarize metric ranks for metric choice
    metric_summary = top_filtered_metrics.loc[top_filtered_metrics.method == 'exact']
    metric_summary = metric_summary.sort_values(by='metric').groupby('metric')[rank_summary_columns].count()

    return (
        metric_summary,
        run_summary,
        filtered_runs
    )


Filter data to runs of interest
Apply summary function to filtered dataset

In [163]:
subset_df = df.loc[(df.n_epochs < 6)]
(my_metric_summary, my_run_summary, _) = process_run_metrics(subset_df)

View processed summaries

In [164]:
print(my_metric_summary.shape)
my_metric_summary.sort_values(by='value', ascending=False)

(21, 4)


Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results/test_acc_std,1,0,1,2
Results/test_acc_bottom_decile,0,1,1,2
Results/test_acc,1,0,0,1
Results_unseen/test_f1,1,0,0,1
Results_weighted_unseen/test_f1,0,1,0,1
Results_weighted_unseen/test_acc,1,0,0,1
Results_weighted/test_f1,0,1,0,1
Results_weighted/test_acc,0,1,0,1
Results_unseen/test_loss_std,0,1,0,1
Results_unseen/test_loss_bottom_decile,0,1,0,1


In [165]:
print(my_run_summary.shape)
my_run_summary.sort_values(by='value', ascending=False)

(12, 4)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
dataset,method,finetune,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
sst2,exact,0,4,9,5,18
sst2,pfedme,0,5,4,2,11
sst2,pfedme,1,5,3,1,9
sst2,fedem,0,0,2,5,7
sst2,ditto,0,4,0,1,5
sst2,exact,1,0,1,4,5
sst2,ditto,1,2,2,0,4
sst2,fedavg,0,1,0,3,4
sst2,fedbn,0,0,2,2,4
sst2,fedem,1,2,1,1,4
