Import packages and processed run metrics

In [63]:
# packages
import os
import re
import numpy as np
import pandas as pd

# source
helper = __import__('00_helper')

# control
write_files = 0
validation_metric = 'Results/val_acc'
project_names = [
    'femnist--s02', 'sst2', 'pubmed'#,
    #'cifar--alpha5.0', 'cifar--alpha0.5', 'cifar--alpha0.1'
]
df = helper.load_data(project_names)

## For CIFAR
#remove_methods = [
#    method for method in df.method.unique()
#    if method not in ['fedavg', 'exact']
#


input datasets:
	 (520, 36)
	 (520, 36)
	 (520, 36)
all runs: (1560, 35)


Filter data to runs of interest
Apply summary function to filtered dataset

In [64]:
subset_df = df
print('input dataset:', df.shape)

## row (run) filtering
# remove extra hyper-parameter searches

subset_df = subset_df.loc[(subset_df.n_epochs < 6)]
print('\t reduce to 3 or fewer local update steps:', subset_df.shape)

subset_df = subset_df.loc[(subset_df.K < 6) | (subset_df.method != 'pfedme')]
print('\t reduce to 3 or fewer local meta-learning steps for pfedme:', subset_df.shape)

subset_df = subset_df.loc[((10 * subset_df.beta).astype('Int64') % 2 == 0) | (subset_df.method != 'exact')]
print('\t reduce beta grid for decay:', subset_df.shape)

## column (metric) filtering
subset_df = subset_df[[
    name for name in subset_df.columns
    if (
        not re.search('^sys', name)
        and not re.search('f1', name)
        and not re.search('loss', name)
    )
]]

input dataset: (1560, 35)
	 reduce to 3 or fewer local meta-learning steps for pfedme: (1560, 35)
	 reduce beta grid for decay: (1260, 35)


In [65]:
## get best runs for each group
# regardless of finetuning
best_runs = subset_df.loc[
    subset_df.groupby(['dataset', 'method', 'finetune']) \
    [validation_metric].idxmax()  # return index of max validation metric
]

# treat finetuning groups as seperate
ft_yes = best_runs.loc[best_runs.finetune == 1]
ft_no = best_runs.loc[best_runs.finetune == 0]

#best_runs.loc[[method not in remove_methods for method in best_runs.method]].head(12)

Unnamed: 0,method,finetune,dataset,alpha,n_epochs,lr,beta,regular_weight,K,Results/test_acc,Results/test_acc_bottom_decile,Results/test_acc_std,Results/val_acc,Results_unseen/test_acc,Results_unseen/test_acc_bottom_decile,Results_unseen/test_acc_std,Results_weighted/test_acc,Results_weighted_unseen/test_acc
1530,exact,0,cifar--alpha0.1,0.1,6,0.1,1.0,,,0.594851,0.423423,0.146593,0.570317,0.641625,0.515406,0.117322,0.564013,0.615199
1450,exact,1,cifar--alpha0.1,0.1,6,0.1,0.8,,,0.589347,0.369565,0.14446,0.568931,0.633189,0.515152,0.119887,0.565931,0.606755
1040,fedavg,0,cifar--alpha0.1,0.1,6,0.1,,,,0.594851,0.423423,0.146593,0.570317,0.641625,0.515406,0.117322,0.564013,0.615199
1041,fedavg,1,cifar--alpha0.1,0.1,6,0.1,,,,0.590661,0.358696,0.150555,0.566829,0.625818,0.515152,0.109783,0.569408,0.60193
989,exact,0,cifar--alpha0.5,0.5,6,0.05,0.2,,,0.712724,0.615385,0.069776,0.714404,0.711504,0.608696,0.072264,0.711944,0.720452
1038,exact,1,cifar--alpha0.5,0.5,1,0.05,0.0,,,0.718009,0.639344,0.063321,0.712604,0.700924,0.633803,0.070719,0.713807,0.709147
523,fedavg,0,cifar--alpha0.5,0.5,6,0.05,,,,0.703486,0.6,0.071834,0.699693,0.694635,0.619718,0.052156,0.703005,0.700411
848,fedavg,1,cifar--alpha0.5,0.5,1,0.05,,,,0.718009,0.639344,0.063321,0.712604,0.700924,0.633803,0.070719,0.713807,0.709147
508,exact,0,cifar--alpha5.0,5.0,1,0.05,0.0,,,0.744859,0.686047,0.044244,0.745662,0.74095,0.696629,0.034485,0.744619,0.741606
519,exact,1,cifar--alpha5.0,5.0,1,0.05,0.0,,,0.73949,0.681319,0.04387,0.744677,0.734124,0.677083,0.045635,0.739333,0.734793


For all best runs, regardless of finetuning, produce summaries

In [66]:
# For existing users, FT since sufficient data
seen_best_runs = ft_yes[[
    name for name in ft_yes.columns
    if (
        not re.search('weighted', name)
        and not re.search('unseen', name)
    )
    or (
        not re.search('Results', name)
        or re.search('val', name)
    )
]]
if write_files:
    helper.runs_to_latex(seen_best_runs, 'seen--ft_yes')

(seen_run_summary, seen_metric_summary, seen_rank_summary) = helper.process_run_metrics(seen_best_runs)
print('all runs:', seen_run_summary.shape)
print('all metrics:', seen_metric_summary.shape)

all runs: (4, 4)
all metrics: (3, 4)


In [67]:
# run summary
seen_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
exact,1,4,5,0,9
fedavg,1,5,2,2,9
pfedme,1,4,0,2,6
fedem,1,0,0,3,3


View processed summaries

In [68]:
# metrics summary
sort_by = ['value', 'rank_one_ind', 'rank_two_ind', 'rank_three_ind']
seen_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results/test_acc_bottom_decile,2,1,0,3
Results/test_acc_std,2,1,0,3
Results/test_acc,0,3,0,3


Now seperately, based on finetuning status, repeat the above summary

In [69]:
unseen_best_runs = best_runs[[
    name for name in ft_yes.columns
    if (
               not re.search('weighted', name)
               and re.search('unseen', name)
       )
       or (
               not re.search('Results', name)
               or re.search('val', name)
       )
]]

In [70]:
ft_yes = unseen_best_runs.loc[unseen_best_runs.finetune == 1]
ft_no = unseen_best_runs.loc[unseen_best_runs.finetune == 0]

(yes_run_summary, yes_metric_summary, _) = helper.process_run_metrics(ft_yes)
if write_files:
    helper.runs_to_latex(ft_yes, 'unseen--ft_yes')

(no_run_summary, no_metric_summary, _) = helper.process_run_metrics(ft_no)
if write_files:
    helper.runs_to_latex(ft_no, 'unseen--ft_no')

In [71]:
# run summary
yes_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
exact,1,7,1,1,9
fedavg,1,6,3,0,9
pfedme,1,1,0,4,5
fedem,1,1,0,3,4


In [72]:
# metrics summary
yes_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_unseen/test_acc,3,0,0,3
Results_unseen/test_acc_bottom_decile,3,0,0,3
Results_unseen/test_acc_std,1,1,1,3


In [73]:
# run summary
no_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
exact,0,5,3,1,9
fedavg,0,6,2,1,9
fedem,0,1,2,5,8
ditto,0,1,0,0,1


In [74]:
# metrics summary
no_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_unseen/test_acc,2,1,0,3
Results_unseen/test_acc_bottom_decile,2,1,0,3
Results_unseen/test_acc_std,1,1,1,3
