Import packages and processed run metrics

In [15]:
# packages
import os
import re
import numpy as np
import pandas as pd

# source
helper = __import__('00_helper')

# control
write_files = 1
validation_metric = 'Results/val_acc'
project_names = [
    'femnist--s02', 'sst2', 'pubmed'#,
    #'cifar--alpha5.0', 'cifar--alpha0.5', 'cifar--alpha0.1'
]
df = helper.load_data(project_names)

## For CIFAR
# remove_methods = [
#     method for method in df.method.unique()
#     if method not in ['fedavg', 'exact']
# ]


input datasets:
	 (651, 36)
	 (1200, 44)
	 (780, 37)
all runs: (2631, 44)


In [16]:
def df_to_latex(
    df,
    id_columns,
    file_name=None,
    remove_columns=None,
    file_path='output'
):

    # remove columns not of interest
    if remove_columns is None:
        remove_columns = list()
    temp_df = df[[name for name in df.columns if name not in remove_columns]]

    # sort by id columns
    temp_df = temp_df.sort_values(by=id_columns)

    # create output file
    string_df = temp_df.to_string(
        header=True,
        index=False,
        index_names=False
    )

    # remove any leading space
    string_df = re.sub('^[\s]+', '', string_df)
    # remove leading space after newlines
    string_df = re.sub('\n[\s]+', '\n', string_df)
    # replace white space between words with table column skip
    string_df = re.sub('[ \t]+', ' & ', string_df)

    # add latex newline to end of each line
    string_df = string_df.replace('_', ' ')
    string_df = string_df.replace('\n', ' \\\\\n')
    string_df = string_df + ' \\\\'

    if file_name is not None:

        # write to file
        with open(os.path.join(file_path, f'{file_name}.txt'), 'w') as f:
            f.writelines(string_df)

    else:
        return string_df


Filter data to runs of interest
Apply summary function to filtered dataset

In [17]:
subset_df = df
print('input dataset:', df.shape)

## row (run) filtering
# remove extra hyper-parameter searches

subset_df = subset_df.loc[(subset_df.n_epochs < 6)]
print('\t reduce to 3 or fewer local update steps:', subset_df.shape)

subset_df = subset_df.loc[(subset_df.K < 6) | (subset_df.method != 'pfedme')]
print('\t reduce to 3 or fewer local meta-learning steps for pfedme:', subset_df.shape)

subset_df = subset_df.loc[((10 * subset_df.beta).astype('Int64') % 2 == 0) | (subset_df.method != 'exact')]
print('\t reduce beta grid for decay:', subset_df.shape)

## column (metric) filtering
subset_df = subset_df[[
    name for name in subset_df.columns
    if (
        not re.search('^sys', name)
        and not re.search('f1', name)
        and not re.search('loss', name)
    )
]]

input dataset: (2631, 44)
	 reduce to 3 or fewer local update steps: (1945, 44)
	 reduce to 3 or fewer local meta-learning steps for pfedme: (1865, 44)
	 reduce beta grid for decay: (1768, 44)


In [18]:
## get best runs for each group
# regardless of finetuning
best_runs = subset_df.loc[
    subset_df.groupby(['dataset', 'method', 'finetune']) \
    [validation_metric].idxmax()  # return index of max validation metric
]

# treat finetuning groups as seperate
ft_yes = best_runs.loc[best_runs.finetune == 1]
ft_no = best_runs.loc[best_runs.finetune == 0]

# best_runs.loc[[method not in remove_methods for method in best_runs.method]].head(12)
best_runs.columns

Index(['method', 'finetune', 'dataset', 's', 'n_epochs', 'lr',
       'regular_weight', 'K', 'beta', 'Results/test_acc',
       'Results/test_acc_bottom_decile', 'Results/test_acc_std',
       'Results/val_acc', 'Results_unseen/test_acc',
       'Results_unseen/test_acc_bottom_decile', 'Results_unseen/test_acc_std',
       'Results_weighted/test_acc', 'Results_weighted_unseen/test_acc',
       'batch_size'],
      dtype='object')

For all best runs, regardless of finetuning, produce summaries

In [19]:
# For existing users, FT since sufficient data
seen_best_runs = ft_yes[[
    name for name in ft_yes.columns
    if (
        not re.search('weighted', name)
        and not re.search('unseen', name)
    )
    or (
        not re.search('Results', name)
        or re.search('val', name)
    )
]]
if write_files:
    helper.runs_to_latex(seen_best_runs, 'seen--ft_yes')

(seen_run_summary, seen_metric_summary, seen_rank_summary) = helper.process_run_metrics(seen_best_runs)
print('all runs:', seen_run_summary.shape)
print('all metrics:', seen_metric_summary.shape)

all runs: (6, 4)
all metrics: (3, 4)


In [20]:
# run summary
seen_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
fedbn,1,2,3,1,6
pfedme,1,3,1,2,6
ditto,1,0,2,3,5
exact,1,2,2,1,5
fedem,1,1,1,1,3
fedavg,1,1,1,0,2


View processed summaries

In [21]:
# metrics summary
sort_by = ['value', 'rank_one_ind', 'rank_two_ind', 'rank_three_ind']
seen_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results/test_acc_std,1,0,1,2
Results/test_acc_bottom_decile,0,2,0,2
Results/test_acc,1,0,0,1


Now seperately, based on finetuning status, repeat the above summary

In [22]:
unseen_best_runs = best_runs[[
    name for name in ft_yes.columns
    if (
               not re.search('weighted', name)
               and re.search('unseen', name)
       )
       or (
               not re.search('Results', name)
               or re.search('val', name)
       )
]]

In [23]:
ft_yes = unseen_best_runs.loc[unseen_best_runs.finetune == 1]
ft_no = unseen_best_runs.loc[unseen_best_runs.finetune == 0]

(yes_run_summary, yes_metric_summary, _) = helper.process_run_metrics(ft_yes)
if write_files:
    helper.runs_to_latex(ft_yes, 'unseen--ft_yes')

(no_run_summary, no_metric_summary, _) = helper.process_run_metrics(ft_no)
if write_files:
    helper.runs_to_latex(ft_no, 'unseen--ft_no')

In [24]:
# run summary
yes_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
exact,1,4,2,1,7
fedavg,1,2,1,1,4
fedbn,1,0,3,1,4
pfedme,1,0,1,3,4
fedem,1,1,1,0,2


In [25]:
# metrics summary
yes_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_unseen/test_acc,2,1,0,3
Results_unseen/test_acc_std,1,1,0,2
Results_unseen/test_acc_bottom_decile,1,0,1,2


In [26]:
# run summary
no_run_summary.sort_values(by='value', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
method,finetune,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
fedem,0,1,3,3,7
exact,0,3,2,1,6
pfedme,0,2,1,2,5
ditto,0,0,1,0,1
fedavg,0,0,0,1,1
fedbn,0,1,0,0,1


In [27]:
# metrics summary
no_metric_summary.sort_values(by=sort_by, ascending=False)

Unnamed: 0_level_0,rank_one_ind,rank_two_ind,rank_three_ind,value
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Results_unseen/test_acc,2,1,0,3
Results_unseen/test_acc_bottom_decile,1,0,1,2
Results_unseen/test_acc_std,0,1,0,1


Temporary scratch work below

In [28]:
id_columns = ['method', 'finetune', 'dataset']
metrics = ['Results/test_acc','Results_unseen/test_acc']
temp = best_runs[id_columns + metrics]
temp['generalization_gap'] = temp['Results_unseen/test_acc'] - temp['Results/test_acc']

s = df_to_latex(temp, id_columns)
print(s)

method & finetune & dataset & Results/test acc & Results unseen/test acc & generalization gap \\
ditto & 0 & femnist--s02 & 0.895517 & 0.013385 & -0.882132 \\
ditto & 0 & pubmed & 0.871126 & 0.001783 & -0.869343 \\
ditto & 0 & sst2 & 0.513384 & 0.539752 & 0.026368 \\
ditto & 1 & femnist--s02 & 0.903150 & 0.567176 & -0.335973 \\
ditto & 1 & pubmed & 0.875375 & 0.244207 & -0.631169 \\
ditto & 1 & sst2 & 0.594936 & 0.474551 & -0.120386 \\
exact & 0 & femnist--s02 & 0.893264 & 0.913772 & 0.020508 \\
exact & 0 & pubmed & 0.874337 & 0.704100 & -0.170237 \\
exact & 0 & sst2 & 0.783775 & 0.794374 & 0.010599 \\
exact & 1 & femnist--s02 & 0.898591 & 0.915247 & 0.016656 \\
exact & 1 & pubmed & 0.872161 & 0.803922 & -0.068239 \\
exact & 1 & sst2 & 0.781485 & 0.810150 & 0.028664 \\
fedavg & 0 & femnist--s02 & 0.873541 & 0.897397 & 0.023856 \\
fedavg & 0 & pubmed & 0.855285 & 0.666667 & -0.188618 \\
fedavg & 0 & sst2 & 0.774538 & 0.752555 & -0.021983 \\
fedavg & 1 & femnist--s02 & 0.885054 & 0.90550

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['generalization_gap'] = temp['Results_unseen/test_acc'] - temp['Results/test_acc']
