Import packages and processed run metrics

In [12]:
# packages
import os
import re
import numpy as np
import pandas as pd

# source
helper = __import__('02_helper')

# control
project_names = [
    'femnist--s02', 'sst2', 'pubmed',
    #'cifar--alpha5.0', 'cifar--alpha0.5', 'cifar--alpha0.1'
]
validation_metric = 'Results/val_acc'

df = helper.load_data(project_names)


input datasets:
	 (413, 26)
	 (1200, 35)
	 (780, 28)
all runs: (2393, 35)


Filter data to runs of interest
Apply summary function to filtered dataset

In [13]:
subset_df = df
print('input dataset:', df.shape)

## row (run) filtering
# remove extra hyper-parameter searches

subset_df = subset_df.loc[(subset_df.n_epochs < 6)]
print('\t reduce to 3 or fewer local update steps:', subset_df.shape)

subset_df = subset_df.loc[(subset_df.K < 6) | (subset_df.method != 'pfedme')]
print('\t reduce to 3 or fewer local meta-learning steps for pfedme:', subset_df.shape)

subset_df = subset_df.loc[((10 * subset_df.beta).astype('Int64') % 2 == 0) | (subset_df.method != 'exact')]
print('\t reduce beta grid for decay:', subset_df.shape)

## column (metric) filtering
subset_df = subset_df[[
    name for name in subset_df.columns
    if not re.search('f1', name)
       and not re.search('loss', name)
]]

input dataset: (2393, 35)
	 reduce to 3 or fewer local update steps: (1787, 35)
	 reduce to 3 or fewer local meta-learning steps for pfedme: (1707, 35)
	 reduce beta grid for decay: (1610, 35)


In [14]:
## get best runs for each group
# regardless of finetuning
best_runs = subset_df.loc[
    subset_df.groupby(['dataset', 'method', 'finetune']) \
    [validation_metric].idxmax()  # return index of max validation metric
]

# treat finetuning groups as seperate
ft_yes = best_runs.loc[best_runs.finetune == 1]
ft_no = best_runs.loc[best_runs.finetune == 0]

# show best runs
best_runs.head()

Unnamed: 0,method,finetune,dataset,s,n_epochs,lr,regular_weight,beta,Results/test_acc,Results/test_acc_bottom_decile,Results/test_acc_std,Results/val_acc,Results_unseen/test_acc,Results_unseen/test_acc_bottom_decile,Results_unseen/test_acc_std,Results_weighted/test_acc,Results_weighted_unseen/test_acc,batch_size,K
280,ditto,0,femnist--s02,2.0,3,0.05,0.5,,0.895517,0.815789,0.060415,0.899532,0.013385,0.0,0.023664,0.904729,0.012756,,
11,ditto,1,femnist--s02,2.0,3,0.1,0.5,,0.90315,0.833333,0.056343,0.904993,0.567176,0.444444,0.092124,0.910967,0.575854,,
308,exact,0,femnist--s02,2.0,3,0.1,,0.8,0.893264,0.813559,0.080925,0.895771,0.913772,0.851852,0.050396,0.90507,0.919818,,
381,exact,1,femnist--s02,2.0,3,0.05,,0.2,0.898591,0.821429,0.071101,0.901799,0.915247,0.842105,0.048456,0.908699,0.919362,,
149,fedavg,0,femnist--s02,2.0,3,0.01,,,0.873541,0.785714,0.085197,0.876718,0.897397,0.833333,0.051539,0.885902,0.904328,,


For all best runs, regardless of finetuning, produce summaries

In [15]:
(all_run_summary, all_metric_summary, all_rank_summary) = helper.process_run_metrics(best_runs)
print('all runs:', all_run_summary.shape)
print('all metrics:', all_metric_summary.shape)

# run summary
all_run_summary.sort_values(by='value', ascending=False)

all runs: (11, 4)
all metrics: (8, 4)


Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
fedbn,1,5,3,3,11
exact,0,4,3,3,10
exact,1,4,3,1,8
fedbn,0,1,5,1,7
fedem,0,0,4,3,7
fedem,1,4,1,2,7
ditto,1,0,2,3,5
pfedme,0,2,2,0,4
pfedme,1,2,0,2,4
fedavg,1,0,2,1,3


View processed summaries

In [16]:
# metrics summary
sort_by = ['value', 'rank_one_ind', 'rank_two_ind', 'rank_three_ind']
all_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_unseen/test_acc,2,0,2,4
Results_weighted_unseen/test_acc,2,0,1,3
Results/test_acc_bottom_decile,0,3,0,3
Results/test_acc_std,2,0,0,2
Results/test_acc,1,1,0,2
Results_unseen/test_acc_bottom_decile,1,0,1,2
Results_unseen/test_acc_std,0,1,0,1
Results_weighted/test_acc,0,1,0,1


Now seperately, based on finetuning status, repeat the above summary

In [17]:
(yes_run_summary, yes_metric_summary, _) = helper.process_run_metrics(ft_yes)
print('ft_yes runs:', yes_run_summary.shape)
print('ft_yes metrics:', yes_metric_summary.shape)
(no_run_summary, no_metric_summary, _) = helper.process_run_metrics(ft_no)
print('ft_no runs:', all_run_summary.shape)
print('ft_no metrics:', all_metric_summary.shape)

ft_yes runs: (6, 4)
ft_yes metrics: (8, 4)
ft_no runs: (11, 4)
ft_no metrics: (8, 4)


In [18]:
# run summary
yes_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
exact,1,8,6,6,20
fedbn,1,5,6,3,14
fedem,1,4,4,2,10
pfedme,1,2,1,5,8
ditto,1,0,5,2,7
fedavg,1,3,2,2,7


In [19]:
# metrics summary
yes_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_unseen/test_acc,2,1,0,3
Results_weighted_unseen/test_acc,2,1,0,3
Results/test_acc_std,1,0,2,3
Results/test_acc_bottom_decile,0,2,1,3
Results_unseen/test_acc_bottom_decile,1,1,0,2
Results_unseen/test_acc_std,1,1,0,2
Results/test_acc,1,0,1,2
Results_weighted/test_acc,0,0,2,2


In [20]:
# run summary
no_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
exact,0,9,7,4,20
fedem,0,3,5,9,17
fedbn,0,8,0,1,9
pfedme,0,2,4,2,8
ditto,0,0,4,3,7
fedavg,0,0,2,4,6


In [21]:
# metrics summary
no_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_unseen/test_acc,2,1,0,3
Results_weighted_unseen/test_acc,2,1,0,3
Results/test_acc_bottom_decile,1,1,1,3
Results/test_acc_std,1,1,1,3
Results_weighted/test_acc,1,1,1,3
Results/test_acc,1,1,0,2
Results_unseen/test_acc_bottom_decile,1,0,1,2
Results_unseen/test_acc_std,0,1,0,1


In [22]:
helper.runs_to_latex(best_runs, 'all')
helper.runs_to_latex(best_runs.loc[best_runs.finetune == 1], 'ft_yes')
helper.runs_to_latex(best_runs.loc[best_runs.finetune == 0], 'ft_no')