Import packages and processed run metrics

In [1]:
import re
import numpy as np
import pandas as pd
df = pd.read_csv('final_run_metrics.csv')

validation_metric = 'Results/val_acc'

Filter data to runs of interest

In [2]:
filtered_df = df.loc[(df.n_epochs < 6)]

Write function to return summaries of run's metric performance

In [3]:
# get best runs for each group
idx = filtered_df.groupby(['dataset', 'method', 'finetune']) \
    [validation_metric].idxmax()  # return index of max validation metric
filtered_df = df.loc[idx]

## Process metrics and get top runs for each
# are large or small metric values are desirable?
descending_metrics = [name for name in filtered_df.columns if re.match('Results', name)]
ascending_metrics = [
    descending_metrics.pop(descending_metrics.index(name))
    for name in descending_metrics
    if re.search('std', name)
]

# all non-metrics columns are used to identify the experimental run
filtered_runs = filtered_df[[
    name for name in filtered_df.columns
    if name not in descending_metrics + ascending_metrics
]]

# rank the metrics
ranked_descending = filtered_df[descending_metrics].rank(
    method='first',
    ascending=False
)
ranked_ascending = filtered_df[ascending_metrics].rank(
    method='first',
    ascending=True
)

# combine and sort the ranked_metrics
ranked_metrics = pd.concat([ranked_descending, ranked_ascending], axis=1)
ranked_metrics = ranked_metrics[sorted(ranked_metrics.columns)]

id_columns = ['dataset', 'method', 'finetune']
filtered_ranks = filtered_runs[id_columns].join(ranked_metrics)

In [4]:
## Compute rank summaries to understand what runs are top overall
rank_ones = (ranked_metrics == 1).sum(axis = 1)
rank_twos = (ranked_metrics == 2).sum(axis = 1)
rank_threes = (ranked_metrics == 3).sum(axis = 1)
rank_summaries = pd.concat({
    'rank_one_count':rank_ones,
    'rank_two_count':rank_twos,
    'rank_three_count':rank_threes
}, axis=1)

# append summary to run id columns
filtered_summary = filtered_runs[id_columns].join(rank_summaries)

# sort based on runs and view
filtered_summary = filtered_summary.sort_values(by=id_columns)
filtered_summary['top_rank_count'] = filtered_summary[[name for name in filtered_summary.columns if name not in id_columns]].sum(axis=1)
filtered_summary.head()

Unnamed: 0,dataset,method,finetune,rank_one_count,rank_two_count,rank_three_count,top_rank_count
28,sst2,ditto,False,4,0,1,5
48,sst2,ditto,True,2,2,0,4
3,sst2,exact,False,4,9,5,18
79,sst2,exact,True,0,1,4,5
12,sst2,fedavg,False,1,0,3,4


In [5]:
## Find metrics where decay performs well for each dataset
# convert to long format
# filter to top ranks
long_filtered_ranks = pd.melt(filtered_ranks, id_vars=id_columns, var_name='metric')
top_filtered_metrics = long_filtered_ranks.loc[long_filtered_ranks.value <= 3].copy()  # top 3 runs

# check which metrics result in top runs for decay vs. other
top_filtered_metrics['decay'] = (top_filtered_metrics.method == 'exact')
filter_columns = ['metric', 'decay']
top_filtered_metrics = top_filtered_metrics.groupby(filter_columns).count()
top_filtered_metrics = top_filtered_metrics.reset_index()[filter_columns + ['value']]

# convert back to wide format
top_filtered_metrics = top_filtered_metrics.pivot(index='metric', columns='decay')
print(top_filtered_metrics.shape)
top_filtered_metrics.columns = [str(col[-1])[0] for col in top_filtered_metrics.columns]
top_filtered_metrics.head()

(25, 2)


Unnamed: 0_level_0,F,T
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
Results/test_acc,2.0,1.0
Results/test_acc_bottom_decile,1.0,2.0
Results/test_acc_std,1.0,2.0
Results/test_f1,2.0,1.0
Results/test_f1_bottom_decile,2.0,1.0


In [6]:
def process_run_metrics(df):
    pass

"""
    # get best runs for each group
    idx = df.groupby(['dataset', 'method', 'finetune']) \
        [validation_metric].idxmax()  # return index of max validation metric
    filtered_df = df.loc[idx]


    ## Process metrics and get top runs for each
    # are large or small metric values are desirable?
    descending_metrics = [name for name in filtered_df.columns if re.match('Results', name)]
    ascending_metrics = [
        descending_metrics.pop(descending_metrics.index(name))
        for name in descending_metrics
        if re.search('std', name)
    ]

    # all non-metrics columns are used to identify the experimental run
    filtered_runs = filtered_df[[
        name for name in filtered_df.columns
        if name not in descending_metrics + ascending_metrics
    ]]

    # rank the metrics
    ranked_descending = filtered_df[descending_metrics].rank(
        method='first',
        ascending=False
    )
    ranked_ascending = filtered_df[ascending_metrics].rank(
        method='first',
        ascending=True
    )

    # combine and sort the ranked_metrics
    ranked_metrics = pd.concat([ranked_descending, ranked_ascending], axis=1)
    ranked_metrics = ranked_metrics[sorted(ranked_metrics.columns)]
    filtered_ranks = filtered_runs.join(ranked_metrics)


    ## Compute rank summaries to understand what runs are top overall
    rank_ones = (ranked_metrics == 1).sum(axis = 1)
    rank_twos = (ranked_metrics == 2).sum(axis = 1)
    rank_threes = (ranked_metrics == 3).sum(axis = 1)
    rank_summaries = pd.concat({
        'rank_one_count':rank_ones,
        'rank_two_count':rank_twos,
        'rank_three_count':rank_threes
    }, axis=1)
    print(rank_summaries)

    # append summary to run id columns
    id_columns = ['dataset', 'method', 'finetune']
    filtered_summary = filtered_runs[id_columns].join(rank_summaries)

    # sort based on runs and view
    filtered_summary = filtered_summary.sort_values(by=id_columns)
    filtered_summary.head()

    return (
        filtered_ranks,
        filtered_df
    )
"""



"\n    # get best runs for each group\n    idx = df.groupby(['dataset', 'method', 'finetune'])         [validation_metric].idxmax()  # return index of max validation metric\n    filtered_df = df.loc[idx]\n\n\n    ## Process metrics and get top runs for each\n    # are large or small metric values are desirable?\n    descending_metrics = [name for name in filtered_df.columns if re.match('Results', name)]\n    ascending_metrics = [\n        descending_metrics.pop(descending_metrics.index(name))\n        for name in descending_metrics\n        if re.search('std', name)\n    ]\n\n    # all non-metrics columns are used to identify the experimental run\n    filtered_runs = filtered_df[[\n        name for name in filtered_df.columns\n        if name not in descending_metrics + ascending_metrics\n    ]]\n\n    # rank the metrics\n    ranked_descending = filtered_df[descending_metrics].rank(\n        method='first',\n        ascending=False\n    )\n    ranked_ascending = filtered_df[ascending_m