## Varying the number of samples for bKLD

Additional experiments with respect to hyperparameter n (used number of samples to generate features).

In [1]:
import mml.interactive
from pathlib import Path

mml.interactive.init(Path('~/.config/mml.env').expanduser())
import pandas as pd
from mml_tf.aggregate import AggregateStrategy
from mml_tf.distances import LoadCachedDistances, map_dist2printable
from mml_tf.evaluation import get_evaluations
from mml_tf.experiments import EXPERIMENTS, METRICS
from mml_tf.paths import FIG_PATH
from mml_tf.visualization import init_colors, get_exp_color
import plotly.express as px

 _____ ______   _____ ______   ___
|\   _ \  _   \|\   _ \  _   \|\  \
\ \  \\\__\ \  \ \  \\\__\ \  \ \  \
 \ \  \\|__| \  \ \  \\|__| \  \ \  \
  \ \  \    \ \  \ \  \    \ \  \ \  \____
   \ \__\    \ \__\ \__\    \ \__\ \_______\
    \|__|     \|__|\|__|     \|__|\|_______|
         ____  _  _    __  _  _  ____  _  _
        (  _ \( \/ )  (  )( \/ )/ ___)( \/ )
         ) _ ( )  /    )( / \/ \\___ \ )  /
        (____/(__/    (__)\_)(_/(____/(__/
Interactive MML API initialized.


In [2]:
init_colors(exp=EXPERIMENTS, distance_measures=[])
exp_color_map = {exp: get_exp_color(exp) for exp in EXPERIMENTS}

In [3]:
bases = ['KLD-PP:NS-W:TS-100-BINS',
         # 'KLD-PP:NS-W:SN-1000-BINS',
         'KLD-PP:NS-1000-BINS']

In [4]:
samples_distances = []
for n_reps, n_samples in zip([10, 10, 10], [10, 100, 1000]):
    for rep_idx in range(n_reps):
        for base_dist in bases:
            samples_distances.append(LoadCachedDistances(f'{base_dist}-{n_samples}-SAMPLES-{rep_idx}-SEED'))
for base_dist in bases:
    samples_distances.append(LoadCachedDistances(base_dist))

In [5]:
full_evaluations = get_evaluations(all_distances=samples_distances, aggregates=[AggregateStrategy.MEAN],
                                   metrics=METRICS,
                                   experiments=EXPERIMENTS, top_meta_metrics=['regret', 'rank', 'gain', 'delta'],
                                   top_mode='best')

Calculating...: 100%|██████████| 21328/21328 [01:31<00:00, 232.41it/s]


In [6]:
samples_plot_rows = []
for group_values, group_df in full_evaluations.groupby(['exp', 'distances', 'meta metric', 'metric']):
    exp, distances, meta, metric = group_values
    if 'SAMPLES' in distances:
        samples = int(distances.split('-')[-4])
        rep = int(distances.split('-')[-2])
    else:
        samples = 10_000
        rep = 0
    mean = group_df['score'].mean()
    std = group_df['score'].std()
    samples_plot_rows.append(
        {'exp': exp, 'samples': samples, 'meta': meta, 'mean': mean, 'std': std, 'rep': rep, 'metric': metric,
         'dist': distances[:distances.find('-BINS') + 5]})
intermediate_df = pd.DataFrame(samples_plot_rows)
samples_plot_rows = []
for group_values, group_df in intermediate_df.groupby(['exp', 'samples', 'meta', 'metric', 'dist']):
    exp, samples, meta, metric, dist = group_values
    # average over repetitions
    mean = group_df['mean'].mean()
    std = group_df['mean'].std()
    samples_plot_rows.append(
        {'exp': exp, 'samples': samples, 'meta': meta, 'mean': mean, 'std': std, 'metric': metric, 'dist': dist})
samples_plot_df = pd.DataFrame(samples_plot_rows).sort_values(by=['samples'])

In [7]:
def trace_updater(trace):
    exp, dist = trace.name.split(', ')
    is_solid = trace.line.dash == 'solid'
    trace.update(legendgroup=exp, legendgrouptitle_text='Scenario', name=exp, showlegend=is_solid)

In [8]:
def get_samples_plot(meta_metric='rank', metric='BA'):
    df = samples_plot_df[(samples_plot_df['meta'] == meta_metric) & (samples_plot_df['metric'] == metric)].replace(
        map_dist2printable)
    fig = px.line(df, x='samples', y='mean', color='exp', template='plotly', color_discrete_map=exp_color_map,
                  category_orders={'exp': EXPERIMENTS}, markers=True,
                  labels={'exp': 'Scenario', 'dist': 'Distances', 'mean': f'three shot percentile'}, log_x=True,
                  error_y='std', line_dash='dist',
                  line_dash_map={map_dist2printable['KLD-PP:NS-W:TS-100-BINS']: 'solid',
                                 # map_dist2printable['KLD-PP:NS-W:SN-1000-BINS']: 'dash',
                                 map_dist2printable['KLD-PP:NS-1000-BINS']: 'dashdot'})
    fig.update_layout(font_size=20)
    fig.add_shape(
        legendrank=1,
        showlegend=True,
        type="line",
        xref="paper",
        line=dict(dash="solid"),
        x0=10.,
        x1=10.,
        y0=0.7,
        y1=0.7,
        name=map_dist2printable['KLD-PP:NS-W:TS-100-BINS'],
        legendgrouptitle_text='Fingerprint'
    )
    # fig.add_shape(
    #     legendrank=1,
    #     showlegend=True,
    #     type="line",
    #     xref="paper",
    #     line=dict(dash="dash"),
    #     x0=10.,
    #     x1=10.,
    #     y0=0.7,
    #     y1=0.7,
    #     name=map_dist2printable['KLD-PP:NS-W:SN-1000-BINS']
    #     )
    fig.add_shape(
        legendrank=1,
        showlegend=True,
        type="line",
        xref="paper",
        line=dict(dash="dashdot"),
        x0=10.,
        x1=10.,
        y0=0.7,
        y1=0.7,
        name=map_dist2printable['KLD-PP:NS-1000-BINS']
    )
    fig.for_each_trace(trace_updater)
    fig.update_layout(legend_title_text='')
    return fig

In [9]:
get_samples_plot()

In [10]:
def get_single_plot(base: str, meta_metric='rank', metric='BA'):
    df = samples_plot_df[(samples_plot_df['meta'] == meta_metric) & (samples_plot_df['metric'] == metric) & (
                samples_plot_df['dist'] == base)].replace(map_dist2printable)
    fig = px.line(df, x='samples', y='mean', color='exp', template='plotly', color_discrete_map=exp_color_map,
                  category_orders={'exp': EXPERIMENTS}, markers=True,
                  labels={'exp': 'Scenario', 'dist': 'Distances', 'mean': f'three shot percentile'}, log_x=True,
                  error_y='std')
    fig.update_layout(font_size=20)
    return fig

In [11]:
get_single_plot(base=bases[0])

In [12]:
def get_stacked_plot(meta_metric='rank', metric='BA'):
    df = samples_plot_df[(samples_plot_df['meta'] == meta_metric) & (samples_plot_df['metric'] == metric)].replace(
        map_dist2printable)
    fig = px.line(df, x='samples', y='mean', color='exp', template='plotly', color_discrete_map=exp_color_map,
                  category_orders={'exp': EXPERIMENTS}, markers=True,
                  labels={'exp': 'Scenario', 'dist': 'Distances', 'mean': f'Three shot percentile',
                          'samples': 'Samples'}, log_x=True, error_y='std', facet_col='dist')
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
    fig.update_layout(font_size=20, width=1200, height=500)
    return fig

In [13]:
fig = get_stacked_plot()
fig.write_image(FIG_PATH / 'fig_6.png', width=1200, height=500)
fig.write_image(FIG_PATH / 'fig_6.pdf', width=1200, height=500)
fig