# Import

In [1]:
%matplotlib widget

In [2]:
import os

import pandas as pd
import numpy as np
import json

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import ttest_ind

In [3]:
sns.set_theme(style='whitegrid')

# Define

In [4]:
def single_round_box(
    plot_arrays, 
    plot_alltraining,
    ylim=[0,1],
    title=None,
    xlabel=None,
    ylabel=None,
    tabletitle=None,
    tableon=True,
):
    if tableon:
        fig, ax = plt.subplots(2, 1)
        sns.boxplot(x='treat', y='acc', data=plot_arrays, ax=ax[0])
        ax[0].scatter(x=plot_alltraining['treat'], y=plot_alltraining['acc'], c='r', s=75)

        ax[0].set_title(title)
        ax[0].set_xlabel(xlabel)
        ax[0].set_ylabel(ylabel)
        ax[0].set_ylim(*ylim)

        cell_text = []
        order = ['baseline', 'LotS', 'LitL']
        for treat in order:
            display_text = f'{plot_alltraining.loc[plot_alltraining["treat"] == treat,"acc"].values[0]*100:.2f}%'
            cell_text.append(display_text)

        table = ax[1].table(cellText=[cell_text], colLabels=plot_alltraining['treat'].values, loc='upper center')
        table.scale(1, 2)
        ax[1].axis('off')

        ax[1].set_title(tabletitle)
    else:
        fig, ax = plt.subplots()
        sns.boxplot(x='treat', y='acc', data=plot_arrays, ax=ax)
        ax.scatter(x=plot_alltraining['treat'], y=plot_alltraining['acc'], c='r', s=75)

        ax.set_title(title)
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_ylim(*ylim)
    
    fig.tight_layout()
    
    return fig

In [5]:
def get_val_summary(modifier, iteration, eval_dir, ):
    fname = os.path.join(eval_dir, f'r{iteration}', 'tables', f'configs.{modifier}.csv')
    summary_table = pd.read_csv(fname, index_col = 0)
    summary_table = summary_table[[str(n) for n in range(1, iteration+1)]]
    
    return summary_table


def get_itereval_summary(sub_keys, iteration, eval_dir, combined, ):
    rep = {
        '/': '-',
        ';': '--',
    }
    
    fname_key = '.'.join(sub_keys.values())
    for old_char, new_char in rep.items():
        fname_key = fname_key.replace(old_char, new_char)
    fname = os.path.join(eval_dir, f'r{iteration}', 'tables', combined, f'iterevals.{fname_key}.csv')
    summary_table = pd.read_csv(fname, index_col = 0)
    summary_table = summary_table[[str(n) for n in range(1, iteration+1)]]
    
    return summary_table
    

In [6]:
def get_mnli_tables(mnli_summary, subsetting='genre'):
    with open(mnli_summary, 'r') as f:
        summary = pd.DataFrame([json.loads(line) for line in f])
    
    mnli_tables = {}
    for comb in summary['comb'].unique():
        comb_sum = summary.loc[summary['comb'] == comb, :]

        for subset in summary[subsetting].unique():
            subset_sum = comb_sum.loc[comb_sum[subsetting] == subset, :]

            plot_tab = []
            for treat in subset_sum['treat'].unique():
                treat_sum = subset_sum.loc[subset_sum['treat'] == treat, :]
                s = treat_sum[['iter','acc']].set_index('iter').rename({'acc': treat}, axis=1).transpose()            
                plot_tab.append(s)
            
            mnli_tables[(model, comb, subset)] = pd.concat(plot_tab)
    
    return summary, mnli_tables

In [7]:
def split_run_name(run_name, split_by='_'):
    name_list = run_name.split(split_by)
    if len(name_list) == 2:
        input_type = 'full'
        comb = 'combined'
    elif len(name_list) == 3:
        if name_list[-1] == 'hyp':
            input_type = name_list[-1]
            comb = 'combined'
        else:
            input_type = 'full'
            comb = name_list[-1]
    else:
        input_type = name_list[-1]
        comb = name_list[-2]

    return (name_list[0], name_list[1], input_type, comb)

In [8]:
def load_sampled_results(sampled_base):
    collected = pd.read_csv(os.path.join(sampled_base, 'collected.csv'))
    itereval = pd.read_csv(os.path.join(sampled_base, 'itereval.csv'))
    mnli = pd.read_csv(os.path.join(sampled_base, 'mnli.csv'))
    
    
    # fill in keys
    collected['treat'] = collected['run'].apply(lambda x: split_run_name(x)[0])
    collected['iter'] = collected['run'].apply(lambda x: int(split_run_name(x)[1]))
    collected['mod'] = collected['run'].apply(lambda x: split_run_name(x)[2])
    collected['combined'] = collected['run'].apply(lambda x: split_run_name(x)[3])
    
    mnli['genre'] = mnli['genre'].fillna('combined')
    
    return collected, itereval, mnli

In [9]:
def load_all_sampled(sampled_base, upto=5):
    loaded_keys = {'collected': 0, 'itereval':1 ,'mnli': 2}
    results = {key: [] for key in loaded_keys.keys()}
    
    for r in range(1, upto + 1):
        loaded = load_sampled_results(os.path.join(sampled_base, f'r{r}'))
        for result_key, loaded_key in loaded_keys.items():
            results[result_key].append(loaded[loaded_key])
    
    return {
        key: pd.concat(result_list, ignore_index=True)
        for key, result_list in results.items()
    }
    

In [10]:
def get_ttest_pvals(dist_df, verbose=True):
    pairs = [
        ('baseline', 'LotS'),
        ('baseline', 'LitL'),
        ('LotS', 'LitL'),
    ]
    
    ttest_dict = {}
    for pair in pairs:
        a = dist_df.loc[dist_df['treat'] == pair[0], 'acc']
        b = dist_df.loc[dist_df['treat'] == pair[1], 'acc']
        ttest_dict[pair] = ttest_ind(a, b)
    
    if verbose:
        for pair, ttest_results in ttest_dict.items():
            print('='*45)
            print(f"{pair}\nt: {ttest_results[0]:.5f} | p: {ttest_results[1]/2:.5f}")
    
    return ttest_dict
    

In [11]:
def err_line_plots(
    plot_df,
    ylim=[0,1],
    title=None,
    xlabel=None,
    ylabel=None,
    tabletitle=None,
    tableon=True,
    x='iter',
    y='acc',
    hue='treat',
    err_style='bars',
    ci=95,
    estimator=lambda x: np.median(x),
    markers=True,
    hue_order=['baseline', 'LotS', 'LitL'],
    iteration=5,
#     bbox_to_anchor=(1.01, 1),
    palette=None,    
):
    if tableon:
        fig, ax = plt.subplots(2, 1)
                
        sns.lineplot(
            data=plot_df, x=x, y=y, ax=ax[0],
            hue=hue, err_style=err_style, ci=ci, markers=markers,
            estimator=estimator, palette=palette
        )
        ax[0].set_xticks(np.arange(1,iteration + 1))
        
        ax[0].set_title(title)
        ax[0].set_xlabel(xlabel)
        ax[0].set_ylabel(ylabel)
        ax[0].set_ylim(*ylim)
#         ax[0].legend(bbox_to_anchor=bbox_to_anchor)

        cell_text = []
        for treat in hue_order:
            treat_df = plot_df.loc[plot_df['treat'] == treat, :]
            display_text = [
                f'{treat_df.loc[treat_df["iter"] == i, "acc"].median()*100:.2f}%'
                for i in range(1, iteration + 1)
            ]
            cell_text.append(display_text)
        
        table = ax[1].table(cellText=cell_text, colLabels=list(range(1, iteration + 1)), rowLabels=hue_order, loc='upper center')
        table.scale(1, 2)
        ax[1].axis('off')

        ax[1].set_title(tabletitle)
    else:
        fig, ax = plt.subplots()
                
        sns.lineplot(
            data=plot_df, x=x, y=y,
            hue=hue, err_style=err_style, ci=ci, markers=markers,
        )

        ax.set_title(title)
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_ylim(*ylim)
        ax.legend(bbox_to_anchor=bbox_to_anchor)
    
    fig.tight_layout()
    
    return fig

# Plot

In [12]:
model='roberta-large'
repo = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
eval_dir = os.path.join(repo, 'eval_summary', model)
sample_type = 'cross_eval'
iteration = 5

mnli_summary = os.path.join(eval_dir, 'mnli_evals', 'eval_summaries.jsonl')

plots_dir = os.path.join(eval_dir, 'sample', sample_type, f'final', 'plots')
os.makedirs(plots_dir, exist_ok=True)

In [13]:
acc_name = 'Performance'
diff_name = 'Over Baseline'

In [14]:
distributions = load_all_sampled(
    os.path.join(eval_dir, 'sample', sample_type), upto=iteration
)

## Hyp

In [15]:
select2mod = {
    ('combined', 'full'): 'combined',
    ('combined', 'hyp'): 'hyp',
    ('separate', 'full'): 'separate',
    ('separate', 'hyp'): 'separate_hyp',
}

### Combined

In [16]:
combined = 'combined'
input_type = 'hyp'

mod = select2mod[(combined, input_type)]

collected = get_val_summary(mod, iteration, eval_dir, )

In [17]:
temp = []
for idx, row in collected.iterrows():
    df = pd.DataFrame({
        'acc': row,
        'iter': [int(x) for x in row.index.values],
        'treat':row.name,
        'mod':input_type,
        'combined':combined,
    })
    temp.append(df)
collected_t = pd.concat(temp, ignore_index=True)

In [18]:
all_df = pd.concat([distributions['collected'], collected_t])

In [19]:
input_type_df = all_df.loc[all_df['mod'] == input_type, :]
plot_df = input_type_df.loc[input_type_df['combined'] == combined, :]
plot_df.reset_index(inplace=True)

In [20]:
keeps = ['acc', 'treat', 'iter']

In [21]:
ylim=[0.3,0.7]
title=f'{acc_name}\ncollected | {combined} | {input_type} input'
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f'acc-collected-{combined}-{input_type}.{figtype}'))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [22]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [23]:
ylim=[-0.3, 0.05]
title=f'{diff_name}\ncollected | {combined} | {input_type} input'
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f'difference-collected-{combined}-{input_type}.{figtype}'))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Separate

In [24]:
combined = 'separate'
input_type = 'hyp'

mod = select2mod[(combined, input_type)]

collected = get_val_summary(mod, iteration, eval_dir, )

In [25]:
temp = []
for idx, row in collected.iterrows():
    df = pd.DataFrame({
        'acc': row,
        'iter': [int(x) for x in row.index.values],
        'treat':row.name,
        'mod':input_type,
        'combined':combined,
    })
    temp.append(df)
collected_t = pd.concat(temp, ignore_index=True)

In [26]:
all_df = pd.concat([distributions['collected'], collected_t])

In [27]:
input_type_df = all_df.loc[all_df['mod'] == input_type, :]
plot_df = input_type_df.loc[input_type_df['combined'] == combined, :]
plot_df.reset_index(inplace=True)

In [28]:
keeps = ['acc', 'treat', 'iter']

In [29]:
ylim=[0.3,0.7]
title=f'{acc_name}\ncollected | {combined} | {input_type} input'
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f'acc-collected-{combined}-{input_type}.{figtype}'))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [30]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [31]:
ylim=[-0.3, 0.05]
title=f'{diff_name}\ncollected | {combined} | {input_type} input'
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f'difference-collected-{combined}-{input_type}.{figtype}'))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## HANS - combined

In [32]:
combined_iterations = 'combined'

### lexical_overlap

#### non-entailment

In [33]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'lexical_overlap',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'non-entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}


hans = get_itereval_summary(sub_keys, iteration, eval_dir, combined_iterations)

In [34]:
temp = []
for idx, row in hans.iterrows():
    df = pd.DataFrame({
        'acc': row,
        'iter': [int(x) for x in row.index.values],
        'treat':row.name,
        'case':sub_keys['case'],
        'subcase':sub_keys['subcase'],
        'label':sub_keys['label'],
        'comb':combined_iterations
    })
    temp.append(df)
hans_t = pd.concat(temp, ignore_index=True)

In [35]:
all_df = pd.concat([distributions['itereval'], hans_t])

In [36]:
dataset_errs = all_df.loc[all_df['dataset'] == sub_keys['dataset'], :]
combo_errs = dataset_errs.loc[dataset_errs['comb'] == combined_iterations, :]
case_errs = combo_errs.loc[combo_errs['case'] == sub_keys['case'], :]
subcase_errs = case_errs.loc[case_errs['subcase'] == sub_keys['subcase'], :]
plot_df = subcase_errs.loc[subcase_errs['label'] == sub_keys['label'], :]

In [37]:
keeps = ['acc', 'treat', 'iter']

In [38]:
ylim=[0,1]
title=f"{acc_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f"acc-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [39]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [40]:
ylim=[-0.15, 0.5]
title=f"{diff_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f"difference-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### subsequence

#### non-entailment

In [41]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'subsequence',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'non-entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}


hans = get_itereval_summary(sub_keys, iteration, eval_dir, combined_iterations)

In [42]:
temp = []
for idx, row in hans.iterrows():
    df = pd.DataFrame({
        'acc': row,
        'iter': [int(x) for x in row.index.values],
        'treat':row.name,
        'case':sub_keys['case'],
        'subcase':sub_keys['subcase'],
        'label':sub_keys['label'],
        'comb':combined_iterations
    })
    temp.append(df)
hans_t = pd.concat(temp, ignore_index=True)

In [43]:
all_df = pd.concat([distributions['itereval'], hans_t])

In [44]:
dataset_errs = all_df.loc[all_df['dataset'] == sub_keys['dataset'], :]
combo_errs = dataset_errs.loc[dataset_errs['comb'] == combined_iterations, :]
case_errs = combo_errs.loc[combo_errs['case'] == sub_keys['case'], :]
subcase_errs = case_errs.loc[case_errs['subcase'] == sub_keys['subcase'], :]
plot_df = subcase_errs.loc[subcase_errs['label'] == sub_keys['label'], :]

In [45]:
keeps = ['acc', 'treat', 'iter']

In [46]:
ylim=[0,1]
title=f"{acc_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f"acc-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [47]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [48]:
ylim=[-0.15, 0.5]
title=f"{diff_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f"difference-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### constituent

#### non-entailment

In [49]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'constituent',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'non-entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}


hans = get_itereval_summary(sub_keys, iteration, eval_dir, combined_iterations)

In [50]:
temp = []
for idx, row in hans.iterrows():
    df = pd.DataFrame({
        'acc': row,
        'iter': [int(x) for x in row.index.values],
        'treat':row.name,
        'case':sub_keys['case'],
        'subcase':sub_keys['subcase'],
        'label':sub_keys['label'],
        'comb':combined_iterations
    })
    temp.append(df)
hans_t = pd.concat(temp, ignore_index=True)

In [51]:
all_df = pd.concat([distributions['itereval'], hans_t])

In [52]:
dataset_errs = all_df.loc[all_df['dataset'] == sub_keys['dataset'], :]
combo_errs = dataset_errs.loc[dataset_errs['comb'] == combined_iterations, :]
case_errs = combo_errs.loc[combo_errs['case'] == sub_keys['case'], :]
subcase_errs = case_errs.loc[case_errs['subcase'] == sub_keys['subcase'], :]
plot_df = subcase_errs.loc[subcase_errs['label'] == sub_keys['label'], :]

In [53]:
keeps = ['acc', 'treat', 'iter']

In [54]:
ylim=[0,1]
title=f"{acc_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f"acc-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [55]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [56]:
ylim=[-0.15, 0.5]
title=f"{diff_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f"difference-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## HANS - separate

In [57]:
combined_iterations = 'separate'

### lexical_overlap

#### non-entailment

In [58]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'lexical_overlap',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'non-entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}


hans = get_itereval_summary(sub_keys, iteration, eval_dir, combined_iterations)

In [59]:
temp = []
for idx, row in hans.iterrows():
    df = pd.DataFrame({
        'acc': row,
        'iter': [int(x) for x in row.index.values],
        'treat':row.name,
        'case':sub_keys['case'],
        'subcase':sub_keys['subcase'],
        'label':sub_keys['label'],
        'comb':combined_iterations
    })
    temp.append(df)
hans_t = pd.concat(temp, ignore_index=True)

In [60]:
all_df = pd.concat([distributions['itereval'], hans_t])

In [61]:
dataset_errs = all_df.loc[all_df['dataset'] == sub_keys['dataset'], :]
combo_errs = dataset_errs.loc[dataset_errs['comb'] == combined_iterations, :]
case_errs = combo_errs.loc[combo_errs['case'] == sub_keys['case'], :]
subcase_errs = case_errs.loc[case_errs['subcase'] == sub_keys['subcase'], :]
plot_df = subcase_errs.loc[subcase_errs['label'] == sub_keys['label'], :]

In [62]:
keeps = ['acc', 'treat', 'iter']

In [63]:
ylim=[0,1]
title=f"{acc_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f"acc-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [64]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [65]:
ylim=[-0.15, 0.5]
title=f"{diff_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f"difference-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### subsequence

#### non-entailment

In [66]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'subsequence',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'non-entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}


hans = get_itereval_summary(sub_keys, iteration, eval_dir, combined_iterations)

In [67]:
temp = []
for idx, row in hans.iterrows():
    df = pd.DataFrame({
        'acc': row,
        'iter': [int(x) for x in row.index.values],
        'treat':row.name,
        'case':sub_keys['case'],
        'subcase':sub_keys['subcase'],
        'label':sub_keys['label'],
        'comb':combined_iterations
    })
    temp.append(df)
hans_t = pd.concat(temp, ignore_index=True)

In [68]:
all_df = pd.concat([distributions['itereval'], hans_t])

In [69]:
dataset_errs = all_df.loc[all_df['dataset'] == sub_keys['dataset'], :]
combo_errs = dataset_errs.loc[dataset_errs['comb'] == combined_iterations, :]
case_errs = combo_errs.loc[combo_errs['case'] == sub_keys['case'], :]
subcase_errs = case_errs.loc[case_errs['subcase'] == sub_keys['subcase'], :]
plot_df = subcase_errs.loc[subcase_errs['label'] == sub_keys['label'], :]

In [70]:
keeps = ['acc', 'treat', 'iter']

In [71]:
ylim=[0,1]
title=f"{acc_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f"acc-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [72]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [73]:
ylim=[-0.15, 0.5]
title=f"{diff_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f"difference-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### constituent

#### non-entailment

In [74]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'constituent',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'non-entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}


hans = get_itereval_summary(sub_keys, iteration, eval_dir, combined_iterations)

In [75]:
temp = []
for idx, row in hans.iterrows():
    df = pd.DataFrame({
        'acc': row,
        'iter': [int(x) for x in row.index.values],
        'treat':row.name,
        'case':sub_keys['case'],
        'subcase':sub_keys['subcase'],
        'label':sub_keys['label'],
        'comb':combined_iterations
    })
    temp.append(df)
hans_t = pd.concat(temp, ignore_index=True)

In [76]:
all_df = pd.concat([distributions['itereval'], hans_t])

In [77]:
dataset_errs = all_df.loc[all_df['dataset'] == sub_keys['dataset'], :]
combo_errs = dataset_errs.loc[dataset_errs['comb'] == combined_iterations, :]
case_errs = combo_errs.loc[combo_errs['case'] == sub_keys['case'], :]
subcase_errs = case_errs.loc[case_errs['subcase'] == sub_keys['subcase'], :]
plot_df = subcase_errs.loc[subcase_errs['label'] == sub_keys['label'], :]

In [78]:
keeps = ['acc', 'treat', 'iter']

In [79]:
ylim=[0,1]
title=f"{acc_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f"acc-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [80]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [81]:
ylim=[-0.15, 0.5]
title=f"{diff_name}\n{combined_iterations} | {sub_keys['dataset']} | {sub_keys['case']} | {sub_keys['label']}"
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f"difference-{sub_keys['dataset']}-{combined_iterations}-{sub_keys['case']}-{sub_keys['label']}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## MNLI

### Combined

In [82]:
combined = 'combined'

In [83]:
genre = 'combined'

with open(mnli_summary, 'r') as f:
    summary = pd.DataFrame([json.loads(line) for line in f])

iter_summary = summary.loc[summary['iter'] == str(iteration), :]
comb_summary = iter_summary.loc[iter_summary['comb'] == combined, :]
genre_summary = comb_summary.loc[comb_summary['tag'] == genre, :] # <--- CHANGE 'tag' to 'genre'

In [84]:
comb_errs = distributions['mnli'].loc[distributions['mnli']['comb'] == combined, :]
plot_df = comb_errs.loc[comb_errs['genre'] == genre, :]

In [85]:
keeps = ['acc', 'treat', 'iter']

In [86]:
ylim=[0.7,1]
title=f'{acc_name}\nmnli | {combined} | {genre} genre'
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f'acc-mnli-{combined}-{genre}.{figtype}'))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [87]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [88]:
ylim=[-0.05, 0.05]
title=f"{diff_name}\nmnli | {combined} | {genre} genre"
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f"difference-mnli-{combined}-{genre}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Separate

In [89]:
combined = 'separate'

In [90]:
genre = 'combined'

with open(mnli_summary, 'r') as f:
    summary = pd.DataFrame([json.loads(line) for line in f])

iter_summary = summary.loc[summary['iter'] == str(iteration), :]
comb_summary = iter_summary.loc[iter_summary['comb'] == combined, :]
genre_summary = comb_summary.loc[comb_summary['tag'] == genre, :] # <--- CHANGE 'tag' to 'genre'

In [91]:
comb_errs = distributions['mnli'].loc[distributions['mnli']['comb'] == combined, :]
plot_df = comb_errs.loc[comb_errs['genre'] == genre, :]

In [92]:
keeps = ['acc', 'treat', 'iter']

In [93]:
ylim=[0.7,1]
title=f'{acc_name}\nmnli | {combined} | {genre} genre'
xlabel='Iteration'
ylabel='Accuracy'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    plot_df[keeps],
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'baseline':'tab:blue',
        'LotS':'tab:orange',
        'LitL':'tab:green',
    }
)
fig.savefig(os.path.join(plots_dir, f'acc-mnli-{combined}-{genre}.{figtype}'))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [94]:
diff = []
base_df = plot_df.loc[plot_df['treat'] == 'baseline', :]
for treat in ['LotS', 'LitL']:
    treat_df = plot_df.loc[plot_df['treat'] == treat, :]
    
    for iteration in plot_df['iter'].unique():
        base_iter_df = base_df.loc[base_df['iter'] == iteration]
        treat_iter_df = treat_df.loc[treat_df['iter'] == iteration]
        
        accs = []
        for _, treat_row in treat_iter_df.iterrows():
            for _, base_row in base_iter_df.iterrows():
                accs.append(treat_row['acc'] - base_row['acc'])
        
        diff.append(pd.DataFrame({
            'acc': accs,
            'iter': int(iteration),
            'treat': treat,
        }))

diff_df = pd.concat(diff, ignore_index=True)

In [95]:
ylim=[-0.05, 0.05]
title=f"{diff_name}\n mnli | {combined} | {genre} genre"
xlabel='Iteration'
ylabel='Difference'
tabletitle='Median'

figtype='jpg'

fig = err_line_plots(
    diff_df,
    ylim=ylim,
    title=title,
    xlabel=xlabel,
    ylabel=ylabel,
    tabletitle=tabletitle,
    palette={
        'LotS':'tab:orange',
        'LitL':'tab:green',
    },
    hue_order=['LotS', 'LitL'],
)
fig.savefig(os.path.join(plots_dir, f"difference-mnli-{combined}-{genre}.{figtype}"))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …