## Varying the number of bins for bKLD

Additional experiments with respect to hyperparameter b.

In [1]:
import mml.interactive
from pathlib import Path
mml.interactive.init()
import pandas as pd
from mml_tf.aggregate import AggregateStrategy
from mml_tf.distances import LoadCachedDistances
from mml_tf.evaluation import get_evaluations
from mml_tf.experiments import EXPERIMENTS, METRICS
from mml_tf.visualization import init_colors, get_exp_color
import plotly.express as px

  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


 _____ ______   _____ ______   ___
|\   _ \  _   \|\   _ \  _   \|\  \
\ \  \\\__\ \  \ \  \\\__\ \  \ \  \
 \ \  \\|__| \  \ \  \\|__| \  \ \  \
  \ \  \    \ \  \ \  \    \ \  \ \  \____
   \ \__\    \ \__\ \__\    \ \__\ \_______\
    \|__|     \|__|\|__|     \|__|\|_______|
         ____  _  _    __  _  _  ____  _  _
        (  _ \( \/ )  (  )( \/ )/ ___)( \/ )
         ) _ ( )  /    )( / \/ \\___ \ )  /
        (____/(__/    (__)\_)(_/(____/(__/
Interactive MML API initialized.


In [2]:
init_colors(exp=EXPERIMENTS, distance_measures=[])
color_map = {exp: get_exp_color(exp) for exp in EXPERIMENTS}

In [3]:
bin_range = [10, 25, 50, 75, 100, 250, 500, 750, 1000]

In [9]:
all_distances = []
for n_bins in bin_range:
    all_distances.append(LoadCachedDistances(f'KLD-PP:NS-W:TS-{n_bins}-BINS'))
    all_distances.append(LoadCachedDistances(f'KLD-PP:NS-W:SN-{n_bins}-BINS'))
    all_distances.append(LoadCachedDistances(f'KLD-PP:NS-{n_bins}-BINS'))

In [10]:
full_evaluations = get_evaluations(all_distances=all_distances, aggregates=[AggregateStrategy.MEAN], metrics=METRICS,
                                   experiments=EXPERIMENTS, top_meta_metrics=['regret', 'rank', 'gain', 'delta'],
                                   top_mode='avg')

Calculating...: 100%|██████████| 9288/9288 [01:02<00:00, 149.80it/s]


In [11]:
bin_plot_rows = []
for group_values, group_df in full_evaluations.groupby(['exp', 'distances', 'meta metric']):
    exp, distances, meta = group_values
    n_bins = int(distances.split('-')[-2])
    weighting = 'unweighted'
    if '-W:TS-' in distances:
        weighting = 'target'
    if '-W:SN-' in distances:
        weighting = 'source'
    if meta == 'regret':
        mean = (1 - group_df['score']).mean()
        std = (1 - group_df['score']).std()
    else:
        mean = group_df['score'].mean()
        std = group_df['score'].std()
    bin_plot_rows.append({'exp': exp, 'weighting': weighting, 'bins': n_bins, 'meta': meta, 'mean': mean, 'std': std})
bin_plot_df = pd.DataFrame(bin_plot_rows).sort_values('bins')
bin_plot_df['upper'] = bin_plot_df['mean'] + bin_plot_df['std']
bin_plot_df['lower'] = bin_plot_df['mean'] - bin_plot_df['std']

In [19]:
def get_bins_plot(meta_metric: str, add_std: bool = False):
    df = bin_plot_df[(bin_plot_df['meta'] == meta_metric) & (bin_plot_df['bins'] != 0)]
    fig = px.line(df, x='bins', y='mean', color='exp', template='plotly', color_discrete_map=color_map, facet_col='weighting', 
                  category_orders={'exp': EXPERIMENTS}, markers=True, labels={'exp': 'Scenario', 'mean': meta_metric},
                  log_x=True)
    x = df['bins'].sort_values().unique().tolist()
    x_rev = x[::-1]
    for exp in EXPERIMENTS:
        if add_std:
            # add shaded areas for uncertainty over target tasks
            y_upper = df[df['exp'] == exp].sort_values(by='bins')['upper'].values.tolist()
            y_lower = df[df['exp'] == exp].sort_values(by='bins', ascending=False)['lower'].values.tolist()
            fig.add_scatter(x=x + x_rev, y=y_upper + y_lower, fill='toself', showlegend=False, name=exp + 'std',
                            line_color='rgba(255,255,255,0)',
                            fillcolor=get_exp_color(exp=exp, opacity=0.15))
    return fig

In [20]:
get_bins_plot('weightedtau')

With respect to weightedtau the pretraining data and augmentation policy scenarios would have profited from a lower number of bins. 

In [24]:
get_bins_plot('rank')

With respect to percentile there seems to be a trend towards more bins (except for the source weighted variant in combination with pretraining data & augmentation policy scenario.

In [25]:
get_bins_plot('regret')

Recall that lower is better for regret!