Import packages and processed run metrics

In [None]:
# packages
import os
import re
import numpy as np
import pandas as pd

# source
helper = __import__('00_helper')

# control
write_files = 1
validation_metric = 'Results/val_acc'
project_names = [
    'femnist--s02', 'sst2', 'pubmed'#,
    #'cifar--alpha5.0', 'cifar--alpha0.5', 'cifar--alpha0.1'
]
df = helper.load_data(project_names)

## For CIFAR
# remove_methods = [
#     method for method in df.method.unique()
#     if method not in ['fedavg', 'exact']
# ]

# Custom sorts
from pandas.api.types import CategoricalDtype
cat_method_type = CategoricalDtype(
    [
        'ditto', 'fedbn', 'fedem', 'pfedme',  # pfl-methods
        'fomaml', 'fedavg', 'exact'  # ml-methodsd
    ],
    ordered=True
)
cat_dataset_type = CategoricalDtype(
    ['femnist--s02', 'sst2', 'pubmed'],
    ordered=True
)


In [None]:
def df_to_latex(
    df,
    id_columns,
    file_name=None,
    remove_columns=None,
    file_path='output'
):

    # remove columns not of interest
    if remove_columns is None:
        remove_columns = list()
    temp_df = df[[name for name in df.columns if name not in remove_columns]]

    # sort by id columns
    temp_df = temp_df.sort_values(by=id_columns)

    # create output file
    string_df = temp_df.to_string(
        header=True,
        index=False,
        index_names=False
    )

    # remove any leading space
    string_df = re.sub('^[\s]+', '', string_df)
    # remove leading space after newlines
    string_df = re.sub('\n[\s]+', '\n', string_df)
    # replace white space between words with table column skip
    string_df = re.sub('[ \t]+', ' & ', string_df)

    # add latex newline to end of each line
    string_df = string_df.replace('_', ' ')
    string_df = string_df.replace('\n', ' \\\\\n')
    string_df = string_df + ' \\\\'

    if file_name is not None:

        # write to file
        with open(os.path.join(file_path, f'{file_name}.txt'), 'w') as f:
            f.writelines(string_df)

    else:
        return string_df


Filter data to runs of interest
Apply summary function to filtered dataset

In [None]:
subset_df = df
print('input dataset:', df.shape)

## row (run) filtering
# remove extra hyper-parameter searches

subset_df = subset_df.loc[(subset_df.n_epochs < 6)]
print('\t reduce to 3 or fewer local update steps:', subset_df.shape)

subset_df = subset_df.loc[(subset_df.K < 6) | (subset_df.method != 'pfedme')]
print('\t reduce to 3 or fewer local meta-learning steps for pfedme:', subset_df.shape)

subset_df = subset_df.loc[((10 * subset_df.beta).astype('Int64') % 2 == 0) | (subset_df.method != 'exact')]
print('\t reduce beta grid for decay:', subset_df.shape)

## column (metric) filtering
subset_df = subset_df[[
    name for name in subset_df.columns
    if (
        not re.search('^sys', name)
        and not re.search('f1', name)
        and not re.search('loss', name)
    )
]]

In [None]:
## get best runs for each group
# regardless of finetuning
best_runs = subset_df.loc[
    subset_df.groupby(['dataset', 'method', 'finetune']) \
    [validation_metric].idxmax()  # return index of max validation metric
]
best_runs['method'] = best_runs['method'].astype(cat_method_type)
best_runs['dataset'] = best_runs['dataset'].astype(cat_dataset_type)

# treat finetuning groups as seperate
ft_yes = best_runs.loc[best_runs.finetune == 1]
ft_no = best_runs.loc[best_runs.finetune == 0]

# best_runs.loc[[method not in remove_methods for method in best_runs.method]].head(12)
best_runs.columns

For all best runs, regardless of finetuning, produce summaries

In [None]:
# For existing users, FT since sufficient data
seen_best_runs = ft_yes[[
    name for name in ft_yes.columns
    if (
        not re.search('weighted', name)
        and not re.search('unseen', name)
    )
    or (
        not re.search('Results', name)
        or re.search('val', name)
    )
]]

if write_files:
    helper.runs_to_latex(seen_best_runs, 'seen--ft_yes')

(seen_run_summary, seen_metric_summary, seen_rank_summary) = helper.process_run_metrics(seen_best_runs)
print('all runs:', seen_run_summary.shape)
print('all metrics:', seen_metric_summary.shape)

In [None]:
# run summary
seen_run_summary.sort_values(by='value', ascending=False)

View processed summaries

In [None]:
# metrics summary
sort_by = ['value', 'rank_one_ind', 'rank_two_ind', 'rank_three_ind']
seen_metric_summary.sort_values(by=sort_by, ascending=False)

Now seperately, based on finetuning status, repeat the above summary

In [None]:
unseen_best_runs = best_runs[[
    name for name in ft_yes.columns
    if (
               not re.search('weighted', name)
               and re.search('unseen', name)
       )
       or (
               not re.search('Results', name)
               or re.search('val', name)
       )
]]

In [None]:
ft_yes = unseen_best_runs.loc[unseen_best_runs.finetune == 1]
ft_no = unseen_best_runs.loc[unseen_best_runs.finetune == 0]

(yes_run_summary, yes_metric_summary, _) = helper.process_run_metrics(ft_yes)
if write_files:
    helper.runs_to_latex(ft_yes, 'unseen--ft_yes')

(no_run_summary, no_metric_summary, _) = helper.process_run_metrics(ft_no)
if write_files:
    helper.runs_to_latex(ft_no, 'unseen--ft_no')

In [None]:
# run summary
yes_run_summary.sort_values(by='value', ascending=False)

In [None]:
# metrics summary
yes_metric_summary.sort_values(by=sort_by, ascending=False)

In [None]:
# run summary
no_run_summary.sort_values(by='value', ascending=False)

In [None]:
# metrics summary
no_metric_summary.sort_values(by=sort_by, ascending=False)

Temporary scratch work below

In [None]:
id_columns = ['method', 'finetune', 'dataset']
metrics = ['Results/test_acc','Results_unseen/test_acc']
temp = best_runs[id_columns + metrics]
temp['generalization_gap'] = temp['Results_unseen/test_acc'] - temp['Results/test_acc']

s = df_to_latex(temp, id_columns)
print(s)