This notebook contains plots of various statistical and systems metrics gathered from the last run

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import visualization_utils

from baseline_constants import (
    ACCURACY_KEY,
    BYTES_READ_KEY,
    BYTES_WRITTEN_KEY,
    CLIENT_ID_KEY,
    LOCAL_COMPUTATIONS_KEY,
    NUM_ROUND_KEY,
    NUM_SAMPLES_KEY)

In [4]:
def get_accuracy_vs_round_number(stat_metrics, weighted=False):
    if weighted:
        accuracies = stat_metrics.groupby(NUM_ROUND_KEY).apply(_weighted_mean, ACCURACY_KEY, NUM_SAMPLES_KEY)
        accuracies = accuracies.reset_index(name=ACCURACY_KEY)

    else:
        accuracies = stat_metrics.groupby(NUM_ROUND_KEY, as_index=False).mean()
        stds = stat_metrics.groupby(NUM_ROUND_KEY, as_index=False).std()
    
    percentile_10 = stat_metrics.groupby(NUM_ROUND_KEY, as_index=False).apply(lambda x: x.quantile(0.10)) #.quantile(10)
    percentile_90 = stat_metrics.groupby(NUM_ROUND_KEY, as_index=False).apply(lambda x: x.quantile(0.90)) #.quantile(90)

    return accuracies, percentile_10, percentile_90


def _weighted_mean(df, metric_name, weight_name):
    d = df[metric_name]
    w = df[weight_name]
    try:
        return (w * d).sum() / w.sum()
    except ZeroDivisionError:
        return np.nan


In [5]:
dpath = "data/dropout50/"
fpath = "data/fedavg50/"
fppath = "data/fedprox50/"

# Dropout

In [6]:
SHOW_WEIGHTED = True # show weighted accuracy instead of unweighted accuracy
PLOT_CLIENTS = True
stat_file = dpath + 'metrics_stat.csv' # change to None if desired
sys_file = dpath + 'metrics_sys.csv' # change to None if desired

dstat_metrics, dsys_metrics = visualization_utils.load_data(stat_file, sys_file)

KeyError: 'round_number'

# FedAvg

In [7]:
SHOW_WEIGHTED = True # show weighted accuracy instead of unweighted accuracy
PLOT_CLIENTS = True
stat_file = fpath + 'metrics_stat.csv' # change to None if desired
sys_file = fpath + 'metrics_sys.csv' # change to None if desired

fstat_metrics, fsys_metrics = visualization_utils.load_data(stat_file, sys_file)

KeyError: 'round_number'

# FedProx

In [8]:
SHOW_WEIGHTED = True # show weighted accuracy instead of unweighted accuracy
PLOT_CLIENTS = True
stat_file = fppath + 'metrics_stat.csv' # change to None if desired
sys_file = fppath + 'metrics_sys.csv' # change to None if desired

fpstat_metrics, fpsys_metrics = visualization_utils.load_data(stat_file, sys_file)

FileNotFoundError: [Errno 2] File b'data/fedprox50/metrics_stat.csv' does not exist: b'data/fedprox50/metrics_stat.csv'

# Plots

In [9]:
daccuracies, dpercentile_10, dpercentile_90 = get_accuracy_vs_round_number(dstat_metrics, True)
faccuracies, fpercentile_10, fpercentile_90 = get_accuracy_vs_round_number(fstat_metrics, True)
fpaccuracies, fppercentile_10, fppercentile_90 = get_accuracy_vs_round_number(fpstat_metrics, True)


NameError: name 'dstat_metrics' is not defined

In [10]:
def plot_accuracy_vs_round_number(daccuracies, faccuracies, fpaccuracies, atype, weighted=False, figsize=(10, 8), title_fontsize=16, **kwargs):

    plt.figure(figsize=figsize)
    title_weighted = 'Weighted' if weighted else 'Unweighted'
    plt.title(atype + ' Accuracy vs Round Number (%s)' % title_weighted, fontsize=title_fontsize)


    plt.plot(daccuracies[NUM_ROUND_KEY], daccuracies[ACCURACY_KEY], label='FedDropout')
    plt.plotz(faccuracies[NUM_ROUND_KEY], faccuracies[ACCURACY_KEY], label='FedAvg')
    plt.plot(fpaccuracies[NUM_ROUND_KEY], fpaccuracies[ACCURACY_KEY], label='FedProx')
    
    plt.legend(loc=2)
    #plt.legend(['Federated Dropout', 'FedAvg', 'FedProx'], loc='upper left')

    plt.ylabel('Accuracy')
    plt.xlabel('Round Number')
    plt.show()
    #plt.savefig('fig50.png')

plot_accuracy_vs_round_number(daccuracies, faccuracies, fpaccuracies, 'Mean', True)


NameError: name 'daccuracies' is not defined

In [9]:
# plot_accuracy_vs_round_number(dpercentile_10, fpercentile_10, '10th percentile', True)
# plot_accuracy_vs_round_number(dpercentile_90, fpercentile_90, '90th percentile', True)

