## plot

In [None]:
import tensorflow as tf
tf.__version__

### convert tensorboard's event file to csv

In [None]:
import os
import glob
import numpy as np
import pandas as pd
from functools import reduce

from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

In [None]:
def merge(dfs):
    return reduce(lambda left, right: pd.merge(left, right, how='outer',
                                       left_index=True, right_index=True), dfs)

In [None]:
def events2csv(glob_path, save=True):
    head, tail = os.path.split(glob_path)
    summary_iterator = EventAccumulator(glob_path).Reload()
    tags = summary_iterator.Tags()['scalars']
    dfs = []

    for tag in tags:
        steps = [e.step for e in summary_iterator.Scalars(tag)]
        values = [e.value for e in summary_iterator.Scalars(tag)]
        
        df = pd.DataFrame(values, index=steps, columns=[tag.replace('/', '_')])
        dfs.append(df)

    df_joined = merge(dfs)
    if save:
        df_joined.to_csv(f'{head}-results.csv', index=True, index_label='steps')
    return df_joined

In [None]:
columns = [
    'train_cuml_reward_nce',
    'test_acc',
    'test_neighbourhood_0.01',
    'test_neighbourhood_0.02',
    'test_neighbourhood_0.05',
    'test_neighbourhood_0.1',
    'test_neighbourhood_0.5',
    'test_gradient_norm_true',
    'util_gradient_norm_true',
    'util_gradient_norm_approx',
    'util_lipschitz_t-1_t',
    'util_lipschitz_t-1_t_numerator',
    'util_lipschitz_t-1_t_denominator',
    'util_lipschitz',
    'util_lipschitz_numerator',
    'util_lipschitz_denominator',
    'util_lipschitz_f-mu_f',
    'util_lipschitz_f-mu_f_numerator'
]

In [None]:
col_map = {
    'train_cuml_reward_nce': 'train_cuml_reward_nce',
    'test_acc': 'test_acc',
    'test_neighbourhood_0.01': 'test_neighbourhood_0.01',
    'test_neighbourhood_0.02': 'test_neighbourhood_0.02',
    'test_neighbourhood_0.05': 'test_neighbourhood_0.05',
    'test_neighbourhood_0.1': 'test_neighbourhood_0.1',
    'test_neighbourhood_0.5': 'test_neighbourhood_0.5',
    'test_gradient_norm_true': 'test_gradient_norm_true',
    'util_gradient_norm_true': 'util_gradient_norm_true',
    'util_gradient_norm_approx': 'util_gradient_norm_approx',
    'util_lipschitz_t-1_t': 'util_lipschitz_t-1_t',
    'util_lipschitz_t-1_t_numerator': 'util_lipschitz_t-1_t_numerator',
    'util_lipschitz_t-1_t_denominator': 'util_lipschitz_t-1_t_denominator',
    'util_lipschitz': 'util_lipschitz',
    'util_lipschitz_numerator': 'util_lipschitz_numerator',
    'util_lipschitz_denominator': 'util_lipschitz_denominator',
    'util_lipschitz_f-mu_f': 'util_lipschitz_f-mu_f',
    'util_lipschitz_f-mu_f_numerator': 'util_lipschitz_f-mu_f_numerator',
    'lipschitz_neighbourhood_0.01_test': 'test_neighbourhood_0.01',
    'lipschitz_neighbourhood_0.02_test': 'test_neighbourhood_0.02',
    'lipschitz_neighbourhood_0.05_test': 'test_neighbourhood_0.05',
    'lipschitz_neighbourhood_0.1_test': 'test_neighbourhood_0.1',
    'lipschitz_neighbourhood_0.5_test': 'test_neighbourhood_0.5',
    'gradients_norm_true_test': 'test_gradient_norm_true',
    'gradients_norm_true_train': 'util_gradient_norm_true',
    'gradients_norm_approx_train': 'util_gradient_norm_approx',
    'lipschitz_t-1_t': 'util_lipschitz_t-1_t',
    'lipschitz_t-1_t_numerator': 'util_lipschitz_t-1_t_numerator',
    'lipschitz_t-1_t_denominator': 'util_lipschitz_t-1_t_denominator',
    'lipschitz_global': 'util_lipschitz',
    'lipschitz_global_numerator': 'util_lipschitz_numerator',
    'lipschitz_global_denominator': 'util_lipschitz_denominator',
    'lipschitz_f-mu_f': 'util_lipschitz_f-mu_f',
    'lipschitz_f-mu_f_numerator': 'util_lipschitz_f-mu_f_numerator'
}

In [None]:
labels = [
    'prune-last-L1',
    'prune-last-random',
    'freeze-last-L1',
    'freeze-last-random',
    'none-last-none'
]

In [None]:
runs = {
    'MNIST': ([31, 32, 33], 64),
    'CIFAR-10': ([404, 414, 424], 64)
}
results = {
    'MNIST': {},
    'CIFAR-10': {}
}

for key, (seeds, batch_size) in runs.items():
    data = {}
    for seed in seeds:# + cifar_seed:
        dfs = {}
        for label in labels:
            glob_path = glob.glob(f"../runs-{seed}/agarwal-nce-{label}-{batch_size}/events.out*")
            assert len(glob_path) == 1, (glob_path)
            dfs[label] = events2csv(glob_path[0], save=False)

        cols = {}
        for col_key, col in col_map.items():
            if col_key in dfs[label].columns:
                cols[col] = merge([dfs[label][col_key].to_frame(label) for label in labels])
                assert len(cols[col].columns) == len(labels)
        data[seed] = cols
        
    results[key] = data

### plot

In [None]:
def interpolate(scalars, factor=0.8): # 0 < factor < 1
    prev = None if np.isnan(scalars[0]) else scalars[0] # First value in the plot (first timestep)
    smoothed = list()
    for current in scalars:
        if np.isnan(current):
            smoothed.append(float('nan'))
            continue
        else:
            if prev is None:
                prev = current
            smoothed_val = prev * factor + current * (1 - factor) # Calculate smoothed value
            smoothed.append(smoothed_val)                       # Save it
            prev = smoothed_val                                 # Anchor the last smoothed value
    assert len(scalars) == len(smoothed), (len(scalars), len(smoothed))
    assert sum([1 for i in scalars if np.isnan(i)]) == sum([1 for i in smoothed if np.isnan(i)])
    return np.array(smoothed)

In [None]:
ylabel_map = {
    'train_cuml_reward_nce': 'Cross Entropy $f(\mathbf{w}^{(t)})$',
    'test_acc': 'Test Accuracy',
    'test_neighbourhood_0.01': r'Lipschitz smoothness $L_\mathrm{neighbor}(0.01)$',
    'test_neighbourhood_0.02': r'Lipschitz smoothness $L_\mathrm{neighbor}(0.02)$',
    'test_neighbourhood_0.05': r'Lipschitz smoothness $L_\mathrm{neighbor}(0.05)$',
    'test_neighbourhood_0.1': r'Lipschitz smoothness $L_\mathrm{neighbor}(0.1)$',
    'test_neighbourhood_0.5': r'Lipschitz smoothness $L_\mathrm{neighbor}(0.5)$',
    'test_gradient_norm_true': r'True Gradient Norm $ || \nabla f(\mathbf{w}^{(t)}) ||^2 $',
    'util_gradient_norm_true': r'True Gradient Norm $ || \nabla f(\mathbf{w}^{(t)}) ||^2 $',
    'util_gradient_norm_approx': r'Approx Gradient Norm $ || g_\mu(\mathbf{w}^{(t)}) ||^2 $',
    'util_lipschitz_t-1_t': r'Lipschitz smoothness $L_\mathrm{local}$',
    'util_lipschitz_t-1_t_numerator': r'Numerator $ || \nabla f(\mathbf{w}^{(t-1)}) - \nabla f(\mathbf{w}^{(t)}) || $',
    'util_lipschitz_t-1_t_denominator': r'Denominator $ ||\mathbf{w}^{(t-1)} - \mathbf{w}^{(t)}|| $',
    'util_lipschitz': r'Lipschitz smoothness $L_\mathrm{global}$',
    'util_lipschitz_numerator': r'Numerator $ || \nabla f(\mathbf{w}^{(i)}) - \nabla f(\mathbf{w}^{(j)}) ||^2 $',
    'util_lipschitz_denominator': r'Denominator $ ||\mathbf{w}^{(i)} - \mathbf{w}^{(j)}|| $',
    'util_lipschitz_f-mu_f': r'Lipschitz smoothness $L_{\mathrm{true}-\mathrm{approx}}$',
    'util_lipschitz_f-mu_f_numerator': r'$|| \nabla f_{\mu}(\mathbf{w}^{(t)}) - \nabla f(\mathbf{w}^{(t)}) ||^2$'
}

In [None]:
legend_map = {
    'freeze-last-L1': 'freezing magnitude-masking',
    'freeze-last-random': 'freezing random-masking',
    'prune-last-L1': 'pruning magnitude-masking',
    'prune-last-random': 'pruning random-masking',
    'none-last-none': 'dense'
}

In [None]:
tag_color_map = {
    'none-last-none': 'tab:green', #'#2ca02c',
    'prune-last-L1': 'tab:blue', #'#1f77b4',
    'freeze-last-L1': 'tab:red', #'#d62728',
    'prune-last-random': 'tab:purple', #'#9467bd',
    'freeze-last-random': 'tab:orange' #'#ff7f0e'
}

In [None]:
from matplotlib import rcParams, font_manager
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['DejaVu Sans', 'Helvetica', 'Tahoma', 
                               'Lucida Grande', 'Verdana']
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
def subplot(df, plt, fig, ax, col, data_name, data_size, model_size):
    
    x = pd.to_numeric(df.index.values, downcast='integer')
    
    for tag in labels:
        if f'{tag}_mean' not in df.columns:
            continue
            
        y = pd.to_numeric(df[f'{tag}_mean'].values)
        y_std = pd.to_numeric(df[f'{tag}_std'].values)
        
        kwargs = {'linestyle': '-', 'linewidth': 3.0}
        if 'prune' in tag:
            kwargs['linestyle'] = '--'
            kwargs['dashes'] = (3, 3)
        elif 'none' in tag:
            kwargs['linestyle'] = ':'
            #kwargs['dashes'] = (5, 5)
        ax.plot(x/1000, y, c=tag_color_map[tag], label=legend_map[tag], **kwargs)
        ax.fill_between(x/1000, y - y_std, y + y_std, alpha=0.2, color=tag_color_map[tag])
    
    ax.set_title(data_name, fontsize='20', fontweight="bold")
    axR = ax.twiny()
    
    t = 20 # number of rounds
    e = 5 # number of epochs
    k = int((data_size/1000)) * e # 50*5 for mnist, 40*5 for cifar
    ax.tick_params(direction = 'out')
    xprint = np.arange(0, k*t+1, k)
    ax.set_xticks(np.arange(0, k*t+1, k))
    ax.set_xlabel('Number of Examples', fontsize=20)
    ax.set_xlim((-100, k*t+100))
    ax.set_xticklabels(['{:3d}k'.format(s) if i%2 == 0 else '' for i, s in enumerate(xprint)], fontsize=16)
    ax.grid(True)
    
    ax.set_ylabel(ylabel_map[col], fontsize=20)
    ax.tick_params(axis='both', which='major', labelsize=16)
    
    if data_name=='MNIST':
        #if 'test_gradient_norm_true' in col:
        #    ax.set_ylim((-0.1, 2.1))
        if 'test_acc' in col:
            ax.set_ylim((0.45, 0.95))
        #elif 'util_gradient_norm_true' in col:
        #    ax.set_ylim((-0.1, 5.1))
        elif 'util_gradient_norm_approx' in col:
            ax.set_ylim((-0.1, 5.1))
        #elif 'util_lipschitz_f-mu_f_numerator' in col:
        #    ax.set_ylim((-0.1, 5.1))
        elif 'neighbourhood_0.01' in col:
            ax.set_ylim((-0.1, 2.1))
        elif 'neighbourhood_0.02' in col:
            ax.set_ylim((-0.1, 2.1))
        elif 'neighbourhood_0.05' in col:
            ax.set_ylim((-1, 11))
        elif 'neighbourhood_0.1' in col:
            ax.set_ylim((-1, 11))
        elif 'neighbourhood_0.5' in col:
            ax.set_ylim((-1, 41))
    elif data_name=='CIFAR-10':
        #if 'test_gradient_norm_true' in col:
        #    ax.set_ylim((-1, 5))
        #if 'util_gradient_norm_true' in col:
        #    ax.set_ylim((-1, 5))
        if 'neighbourhood_0.01' in col:
            ax.set_ylim((-0.1, 2.1))
        elif 'neighbourhood_0.02' in col:
            ax.set_ylim((-0.1, 2.1))
        elif 'neighbourhood_0.05' in col:
            ax.set_ylim((-1, 11))
        elif 'neighbourhood_0.1' in col:
            ax.set_ylim((-1, 11))
        elif 'neighbourhood_0.5' in col:
            ax.set_ylim((-10, 1010))
        
    
    axR.tick_params(direction = 'in')
    xtick_top = [0.0]
    xtick_dummy = [model_size] # 266610 for mnist, 4301642 for cifar
    for i in range(t):
        a = xtick_dummy[-1] - (xtick_dummy[-1] * 0.2)
        s = 1 - (a / xtick_dummy[0])
        #print(s, a, xtick_dummy[0])
        xtick_top.append(s)
        xtick_dummy.append(a)
    axR.set_xticks(np.arange(0, k*t+1, k))
    axR.set_xticklabels(['{0:.3f}'.format(s) if i%2 == 0 else '' for i, s in enumerate(xtick_top)], fontsize=16)
    axR.set_xlabel('Sparsity', fontsize = 20)
    axR.set_xlim((-100, k*t+100))
    
    plt.xticks(fontname='DejaVu Sans', fontsize=16)
    return plt

In [None]:
interpolation = 0.99

In [None]:
def plot(mnist, cifar, col):
    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(1, 2, figsize=(24, 6), dpi=200) #, dpi=300

    plt = subplot(mnist, plt, fig, ax[0], col, 'MNIST', 50000, 266610)
    plt = subplot(cifar, plt, fig, ax[1], col, 'CIFAR-10', 40000, 4301642)
    #print(lines)
    #print(linelabels)
    handles, linelabels = ax[0].get_legend_handles_labels()
    #linelabels = ['\n'.join(l.split(' ')) for l in sorted(linelabels, reverse=True)]
    lgd = fig.legend(handles, linelabels, loc='lower center',
               fontsize=18, bbox_to_anchor=(0.48, -0.005), ncol=5) #labelspacing=0.5
    fig.subplots_adjust(bottom=0.2, left=-0.05)
    return plt, lgd

In [None]:
def prepare_dfs(data_name, col): #
    data = results[data_name]
    dfs_tag = []
    for tag in labels:
        dfs_seeds = []
        seeds, batch_size = runs[data_name]
        for seed in seeds:
            if col not in data[seed].keys():
                continue
            c = data[seed][col][tag]
            c_index = c.index
            if 'nce' in col:
                c = c.apply(lambda y: -y)
            elif 'norm' in col:
                c = pd.Series(interpolate(c.values, factor=interpolation))
            elif 'lipschitz' in col:
                if 'f-mu_f_numerator' in col:
                    c = np.power(c.values, 2)
                else:
                    c = c.values
                c = pd.Series(interpolate(c, factor=interpolation))
            dfs_seeds.append(c.to_frame(f'runs-{seed}').set_index([c_index]))
        if len(dfs_seeds) > 0:
            df = merge(dfs_seeds)
            df_mean = df.mean(axis=1).to_frame(f'{tag}_mean')
            df_mean[f'{tag}_std'] = df.std(axis=1)
            dfs_tag.append(df_mean)
    df_mean_std = merge(dfs_tag)
    return df_mean_std

In [None]:
for col in columns:
    mnist_df = prepare_dfs('MNIST', col)
    cifar_df = prepare_dfs('CIFAR-10', col)
    
    print(col)
    if 'norm' in col:
        print('interpolation', interpolation)
    elif 'lipschitz' in col:
        print('interpolation', interpolation)
    plt, lgd = plot(mnist_df, cifar_df, col)
    col = col.replace('.', '')
    plt.savefig(f'szo-results-new-{col}.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.show()