Import packages and processed run metrics

In [82]:
# packages
import re
import numpy as np
import pandas as pd

# control
project_names = [
    'decay--sst2',
    'decay--pubmed',
    'decay--cifar--alpha5.0',
    'decay--cifar--alpha0.5',
    'decay--cifar--alpha0.1'
]
validation_metric = 'Results/val_acc'

# read each data set
df = []
print('input datasets:')
for project_name in project_names:
    temp = pd.read_csv(f'{project_name}.csv')
    print('\t', temp.shape)
    df.append(temp)

df = pd.concat(df, axis=0, ignore_index=True)
print('all runs:', df.shape)

input datasets:
	 (1200, 35)
	 (780, 28)
	 (520, 27)
	 (520, 27)
	 (520, 27)
all runs: (3540, 36)


Write function to return summaries of run's metric performance

In [83]:
def process_run_metrics(filtered_df, method='exact'):

    ## Process metrics and get top runs for each
    # are large or small metric values are desirable?
    descending_metrics = [name for name in filtered_df.columns if re.search('Results.*test', name)]
    ascending_metrics = [
        descending_metrics.pop(descending_metrics.index(name))
        for name in descending_metrics
        if re.search('std', name)
    ]

    # convert metrics to numeric
    #filtered_df[descending_metrics + ascending_metrics] = filtered_df[descending_metrics + ascending_metrics].apply(pd.to_numeric)

    # all non-metrics columns are used to identify the experimental run
    filtered_runs = filtered_df[[
        name for name in filtered_df.columns
        if name not in descending_metrics + ascending_metrics
    ]]

    # rank the metrics
    ranked_descending = filtered_df.groupby(['dataset'])[descending_metrics].rank(
        method='min',
        ascending=False
    )
    ranked_ascending = filtered_df.groupby(['dataset'])[ascending_metrics].rank(
        method='min',
        ascending=True
    )

    # combine and sort the ranked_metrics
    ranked_metrics = pd.concat([ranked_descending, ranked_ascending], axis=1)
    ranked_metrics = ranked_metrics[sorted(ranked_metrics.columns)]
    filtered_ranks = filtered_runs.join(ranked_metrics)


    ## Manipulate rank data to be summarized by runs and metrics
    # convert to long format
    # filter to top ranks
    long_filtered_ranks = pd.melt(filtered_ranks, id_vars=filtered_runs.columns, var_name='metric')
    top_filtered_metrics = long_filtered_ranks.loc[long_filtered_ranks.value <= 3].copy()  # top 3 runs

    # compute rank summaries to understand what runs are top overall
    top_filtered_metrics['rank_one_ind'] = (top_filtered_metrics.value == 1)
    top_filtered_metrics['rank_two_ind'] = (top_filtered_metrics.value == 2)
    top_filtered_metrics['rank_three_ind'] = (top_filtered_metrics.value == 3)
    top_filtered_metrics.replace(False, pd.NA, inplace=True)

    # summarized metric ranks for run type
    rank_summary_columns = ['rank_one_ind', 'rank_two_ind', 'rank_three_ind', 'value']
    #id_columns = ['dataset', 'method', 'finetune']
    id_columns = ['method', 'finetune']
    run_summary = top_filtered_metrics.groupby(id_columns)[rank_summary_columns].count()

    # summarize metric ranks for metric choice
    metric_summary = top_filtered_metrics.loc[top_filtered_metrics.method == method]
    metric_summary = metric_summary.sort_values(by='metric').groupby('metric')[rank_summary_columns].count()

    return (
        run_summary,
        metric_summary,
        filtered_runs
    )


Filter data to runs of interest
Apply summary function to filtered dataset

In [84]:
subset_df = df
print('input dataset:', df.shape)

## row (run) filtering
# remove extra hyper-parameter searches
subset_df = subset_df.loc[(subset_df.n_epochs < 6)]
print('\t reduce to 3 or fewer local update steps:', subset_df.shape)
subset_df = subset_df.loc[(subset_df.K < 6) | (subset_df.method != 'pfedme')]
print('\t reduce to 3 or fewer local meta-learning steps for pfedme:', subset_df.shape)
subset_df = subset_df.loc[((10 * subset_df.beta).astype('Int64') % 2 == 0) | (subset_df.method != 'exact')]
print('\t reduce beta grid for decay:', subset_df.shape)

## column (metric) filtering
subset_df = subset_df[[
    name for name in subset_df.columns
    if not re.search('f1', name)
       and not re.search('loss', name)
]]

input dataset: (3540, 36)
	 reduce to 3 or fewer local update steps: (2770, 36)
	 reduce to 3 or fewer local meta-learning steps for pfedme: (2690, 36)
	 reduce beta grid for decay: (2490, 36)


In [85]:
## get best runs for each group
# regardless of finetuning
best_runs = subset_df.loc[
    subset_df.groupby(['dataset', 'method', 'finetune']) \
    [validation_metric].idxmax()  # return index of max validation metric
]

# treat finetuning groups as seperate
ft_yes = best_runs.loc[best_runs.finetune == 1]
ft_no = best_runs.loc[best_runs.finetune == 0]

For all best runs, regardless of finetuning, produce summaries

In [86]:
(all_run_summary, all_metric_summary, _) = process_run_metrics(best_runs)
print('all runs:', all_run_summary.shape)
print('all metrics:', all_metric_summary.shape)

# run summary
all_run_summary.sort_values(by='value', ascending=False)

all runs: (12, 4)
all metrics: (7, 4)


  ranked_descending = filtered_df.groupby(['dataset'])[descending_metrics].rank(


Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
exact,0,9,6,4,19
exact,1,5,4,5,14
fedavg,0,4,4,5,13
fedem,0,2,4,6,12
fedavg,1,3,2,6,11
pfedme,0,5,5,1,11
pfedme,1,7,2,2,11
fedem,1,3,1,2,6
fedbn,1,1,1,3,5
fedbn,0,0,2,0,2


View processed summaries

In [87]:
# metrics summary
sort_by = ['value', 'rank_one_ind', 'rank_two_ind', 'rank_three_ind']
all_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_weighted_unseen/test_acc,4,1,1,6
Results/test_acc_bottom_decile,2,3,1,6
Results_unseen/test_acc_bottom_decile,2,1,3,6
Results/test_acc_std,4,0,1,5
Results/test_acc,1,2,1,4
Results_unseen/test_acc_std,1,1,1,3
Results_weighted/test_acc,0,2,1,3


Now seperately, based on finetuning status, repeat the above summary

In [88]:
(yes_run_summary, yes_metric_summary, _) = process_run_metrics(ft_yes)
print('ft_yes runs:', yes_run_summary.shape)
print('ft_yes metrics:', yes_metric_summary.shape)
(no_run_summary, no_metric_summary, _) = process_run_metrics(ft_no)
print('ft_no runs:', all_run_summary.shape)
print('ft_no metrics:', all_metric_summary.shape)

ft_yes runs: (6, 4)
ft_yes metrics: (7, 4)
ft_no runs: (12, 4)
ft_no metrics: (7, 4)


  ranked_descending = filtered_df.groupby(['dataset'])[descending_metrics].rank(
  ranked_descending = filtered_df.groupby(['dataset'])[descending_metrics].rank(


In [89]:
# run summary
yes_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
exact,1,15,12,4,31
fedavg,1,11,8,5,24
pfedme,1,12,1,7,20
fedem,1,3,5,7,15
fedbn,1,1,4,1,6
ditto,1,0,1,2,3


In [90]:
# metrics summary
yes_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_weighted_unseen/test_acc,5,0,0,5
Results/test_acc_std,3,1,1,5
Results/test_acc_bottom_decile,2,3,0,5
Results_unseen/test_acc_bottom_decile,3,0,1,4
Results_unseen/test_acc_std,1,3,0,4
Results/test_acc,1,2,1,4
Results_weighted/test_acc,0,3,1,4


In [91]:
# run summary
no_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
exact,0,15,7,6,28
fedem,0,4,8,16,28
fedavg,0,8,7,7,22
pfedme,0,9,4,1,14
ditto,0,1,0,3,4
fedbn,0,3,0,1,4


In [92]:
# metrics summary
no_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_weighted_unseen/test_acc,4,1,0,5
Results/test_acc_std,2,1,2,5
Results/test_acc,2,1,1,4
Results_weighted/test_acc,2,1,1,4
Results_unseen/test_acc_bottom_decile,2,0,2,4
Results/test_acc_bottom_decile,2,1,0,3
Results_unseen/test_acc_std,1,2,0,3
