# Import

In [1]:
%matplotlib widget

In [2]:
import os

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

# Define

In [3]:
def my_plot(table, plot_args, title, xlabel='iteration', ylabel='accuracy', ylim=[0, 1]):
    fig, ax = plt.subplots(2, 1)
    
    lo = ax[0].plot(
        table.columns.values,
        table.transpose(),
        **plot_args
    )
    ax[0].legend(iter(lo), table.index.values, loc='best')

    ax[0].set_title(title)
    ax[0].set_xlabel(xlabel)
    ax[0].set_ylabel(ylabel)
    ax[0].set_ylim(*ylim)
    
    cell_text = []
    order = ['baseline', 'LotS', 'LitL']
    for treat in order:
        display_text = [f'{acc*100:.2f}%' for acc in table.loc[treat, :]]
        cell_text.append(display_text)
    
    ax[1].table(cellText=cell_text, colLabels=table.columns, rowLabels=order, loc='center')
    ax[1].axis('off')
    
    plt.tight_layout()
    return fig
    

In [4]:
def display_val_summary(modifier, iteration, plot_args, eval_dir, ylim=[0,1], save_plot=False, plot_type='jpg'):
    fname = os.path.join(eval_dir, f'r{iteration}', 'tables', f'configs.{modifier}.csv')
    summary_table = pd.read_csv(fname, index_col = 0)
    summary_table = summary_table[[str(n) for n in range(1, iteration+1)]]
    
    print(summary_table)
    fig = my_plot(summary_table, plot_args, modifier, ylim=ylim)
    
    if save_plot:
        assert not plot_type is None
        os.makedirs(os.path.join(eval_dir, f'r{iteration}', 'sum_plots'), exist_ok=True)
        fig.savefig(os.path.join(eval_dir, f'r{iteration}', 'sum_plots', f'configs.{modifier}.{plot_type}'))
        
        print(f"Saved at\n{os.path.join(eval_dir, f'r{iteration}', 'sum_plots')}")


def display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, combined, ylim=[0,1], save_plot=False, plot_type='jpg'):
    rep = {
        '/': '-',
        ';': '--',
    }
    
    fname_key = '.'.join(sub_keys.values())
    for old_char, new_char in rep.items():
        fname_key = fname_key.replace(old_char, new_char)
    fname = os.path.join(eval_dir, f'r{iteration}', 'tables', combined, f'iterevals.{fname_key}.csv')
    summary_table = pd.read_csv(fname, index_col = 0)
    summary_table = summary_table[[str(n) for n in range(1, iteration+1)]]
    
    print(summary_table)
    title = fname_key = '-'.join(sub_keys.values())
    fig = my_plot(summary_table, plot_args, title, ylim=ylim)
    
    if save_plot:
        assert not plot_type is None
        os.makedirs(os.path.join(eval_dir, f'r{iteration}', 'sum_plots', combined), exist_ok=True)
        fig.savefig(os.path.join(eval_dir, f'r{iteration}', 'sum_plots', combined, f'iterevals.{fname_key}.{plot_type}'))
        
        print(f"Saved at\n{os.path.join(eval_dir, f'r{iteration}', 'sum_plots')}")
    

# Summarize

In [5]:
model='roberta-large-mnli'
repo = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
eval_dir = os.path.join(repo, 'eval_summary', model)
iteration = 5

## Validation Sets

In [6]:
mod = 'separate'  # combined, hyp, separate, separate_hyp
save_plot = False
plot_type = 'jpg'

ylim = [0.5,1.0]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_val_summary(mod, iteration, plot_args, eval_dir, ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.862069  0.896970  0.896552  0.909639  0.917836
LotS      0.855984  0.839757  0.863179  0.867617  0.838057
LitL      0.890269  0.870103  0.865031  0.861856  0.863354


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

RuntimeError: libpng signaled error

## Iterevals

In [7]:
glue_case_keys = pd.read_csv('glue_case_keys.csv', header=0, index_col=False)
hans_case_keys = pd.read_csv('hans_case_keys.csv', header=0, index_col=False)

In [8]:
'''
Cases for GLUE:
    combined, Knowledge, Lexical Semantics, Logic, Predicate-Argument Structure

Cases for HANS:
    combined, constituent, lexical_overlap, subsequence

'''

glue_case = 'Lexical Semantics'
print(glue_case_keys.loc[glue_case_keys['case'] == glue_case, 'subcase'])
# print('='*90 + '\n')

# hans_case = 'constituent'
# print(hans_case_keys.loc[hans_case_keys['case'] == hans_case, 'subcase'])

4                           combined
5                          Factivity
6              Factivity;Quantifiers
7                 Lexical entailment
8       Lexical entailment;Factivity
9     Lexical entailment;Quantifiers
10            Morphological negation
11                    Named entities
12                       Quantifiers
13                        Redundancy
14             Symmetry/Collectivity
Name: subcase, dtype: object


In [9]:
sub_keys = {
    'dataset': 'glue',     # either hans or glue
    'case': 'Logic',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'combined',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}
save_plot = False
plot_type = 'jpg'

combined = 'combined' # either combined or separate
ylim = [0.6,0.75]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, combined, ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.620879  0.623626  0.618132  0.612637  0.618132
LotS      0.631868  0.640110  0.609890  0.620879  0.618132
LitL      0.609890  0.609890  0.631868  0.626374  0.615385


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Save Plots

## In-Distribution

In [10]:
mod = 'combined'  # combined, hyp, separate, separate_hyp
save_plot = True
plot_type = 'jpg'

ylim = [0.75,1.0]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_val_summary(mod, iteration, plot_args, eval_dir, ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.866126  0.882591  0.888589  0.887317  0.890638
LotS      0.847870  0.831643  0.840189  0.848531  0.845624
LitL      0.896480  0.879132  0.875772  0.867662  0.868454


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [11]:
mod = 'separate'  # combined, hyp, separate, separate_hyp

save_plot = True
plot_type = 'jpg'

ylim = [0.75,1.0]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_val_summary(mod, iteration, plot_args, eval_dir, ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.862069  0.896970  0.896552  0.909639  0.917836
LotS      0.855984  0.839757  0.863179  0.867617  0.838057
LitL      0.890269  0.870103  0.865031  0.861856  0.863354


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [12]:
mod = 'hyp'  # combined, hyp, separate, separate_hyp

save_plot = True
plot_type = 'jpg'

ylim = [0.5,0.75]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_val_summary(mod, iteration, plot_args, eval_dir, ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.624746  0.621457  0.629980  0.644770  0.648910
LotS      0.586207  0.565923  0.575185  0.581054  0.565235
LitL      0.587992  0.571281  0.587509  0.578270  0.571546


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [13]:
mod = 'separate_hyp'  # combined, hyp, separate, separate_hyp

save_plot = True
plot_type = 'jpg'

ylim = [0.5,0.75]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_val_summary(mod, iteration, plot_args, eval_dir, ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.628803  0.646465  0.651116  0.674699  0.655311
LotS      0.580122  0.563895  0.605634  0.598778  0.540486
LitL      0.590062  0.616495  0.586912  0.560825  0.581781


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


## Held-out

In [14]:
'''
Cases for GLUE:
    combined, Knowledge, Lexical Semantics, Logic, Predicate-Argument Structure

Cases for HANS:
    combined, constituent, lexical_overlap, subsequence

'''

glue_case = 'Lexical Semantics'
print(glue_case_keys.loc[glue_case_keys['case'] == glue_case, 'subcase'])
# print('='*90 + '\n')

# hans_case = 'constituent'
# print(hans_case_keys.loc[hans_case_keys['case'] == hans_case, 'subcase'])

4                           combined
5                          Factivity
6              Factivity;Quantifiers
7                 Lexical entailment
8       Lexical entailment;Factivity
9     Lexical entailment;Quantifiers
10            Morphological negation
11                    Named entities
12                       Quantifiers
13                        Redundancy
14             Symmetry/Collectivity
Name: subcase, dtype: object


## GLUE

In [15]:
sub_keys = {
    'dataset': 'glue',     # either hans or glue
    'case': 'Lexical Semantics',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'combined',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
ylim = [0.5,1.0]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.679348  0.714674  0.695652  0.701087  0.698370
LotS      0.698370  0.701087  0.706522  0.692935  0.692935
LitL      0.695652  0.706522  0.687500  0.687500  0.687500


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
                 1         2         3         4         5
baseline  0.706522  0.673913  0.706522  0.703804  0.690217
LotS      0.692935  0.690217  0.695652  0.684783  0.665761
LitL      0.695652  0.684783  0.687500  0.673913  0.684783


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [16]:
sub_keys = {
    'dataset': 'glue',     # either hans or glue
    'case': 'Knowledge',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'combined',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
ylim = [0.5,1.0]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.605634  0.619718  0.633803  0.609155  0.630282
LotS      0.637324  0.623239  0.630282  0.644366  0.637324
LitL      0.637324  0.637324  0.651408  0.630282  0.647887


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
                 1         2         3         4         5
baseline  0.640845  0.661972  0.630282  0.647887  0.640845
LotS      0.612676  0.626761  0.598592  0.637324  0.640845
LitL      0.619718  0.647887  0.651408  0.616197  0.630282


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [17]:
sub_keys = {
    'dataset': 'glue',     # either hans or glue
    'case': 'Logic',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'combined',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
ylim = [0.25,0.75]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.620879  0.623626  0.618132  0.612637  0.618132
LotS      0.631868  0.640110  0.609890  0.620879  0.618132
LitL      0.609890  0.609890  0.631868  0.626374  0.615385


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
                 1         2         3         4         5
baseline  0.615385  0.601648  0.612637  0.598901  0.612637
LotS      0.615385  0.615385  0.640110  0.604396  0.609890
LitL      0.640110  0.615385  0.612637  0.557692  0.615385


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [18]:
sub_keys = {
    'dataset': 'glue',     # either hans or glue
    'case': 'Predicate-Argument Structure',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'combined',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
ylim = [0.5,1.00]
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

                 1         2         3         4         5
baseline  0.705189  0.693396  0.702830  0.691038  0.683962
LotS      0.700472  0.693396  0.714623  0.693396  0.712264
LitL      0.693396  0.705189  0.700472  0.683962  0.695755


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
                 1         2         3         4         5
baseline  0.698113  0.700472  0.700472  0.702830  0.705189
LotS      0.695755  0.707547  0.712264  0.709906  0.702830
LitL      0.686321  0.705189  0.700472  0.686321  0.698113


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


## HANS

In [19]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'lexical_overlap',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

ylim = [0,1]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

ylim = [0,1.0]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

               1       2       3       4       5
baseline  0.9980  0.2172  0.2232  0.2320  0.2140
LotS      0.9914  0.2228  0.2092  0.2128  0.2318
LitL      0.9998  0.2080  0.2204  0.2318  0.2078


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
               1       2       3       4       5
baseline  0.9994  0.2250  0.2164  0.2110  0.2054
LotS      0.9996  0.2036  0.2028  0.2034  0.2130
LitL      0.9964  0.2176  0.2106  0.2708  0.2034


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [20]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'lexical_overlap',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'non-entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

ylim = [0,1]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

ylim = [0,1.0]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

               1       2       3       4       5
baseline  0.7744  0.9660  0.9136  0.9384  0.9542
LotS      0.9548  0.9568  0.9440  0.9360  0.9718
LitL      0.8894  0.9332  0.9562  0.9834  0.9482


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
               1       2       3       4       5
baseline  0.8848  0.9258  0.9290  0.9256  0.8662
LotS      0.9604  0.8960  0.9206  0.9080  0.9602
LitL      0.9238  0.9490  0.8876  0.9590  0.9818


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [21]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'subsequence',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

ylim = [0,1]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

ylim = [0,1.0]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

               1       2       3       4       5
baseline  0.9996  0.0352  0.0392  0.0536  0.0292
LotS      0.9942  0.0400  0.0244  0.0304  0.0522
LitL      1.0000  0.0352  0.0554  0.0802  0.0306


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
               1       2       3       4       5
baseline  1.0000  0.0424  0.0342  0.0212  0.0224
LotS      1.0000  0.0106  0.0058  0.0214  0.0350
LitL      0.9998  0.0574  0.0342  0.1982  0.0198


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [22]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'subsequence',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'non-entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

ylim = [0,1]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

ylim = [0,1.0]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

               1       2       3       4       5
baseline  0.2714  0.9776  0.9634  0.9520  0.9668
LotS      0.3938  0.9692  0.9794  0.9710  0.9652
LitL      0.3230  0.9712  0.9648  0.9636  0.9730


  


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
               1       2       3       4       5
baseline  0.3532  0.9674  0.9674  0.9812  0.9878
LotS      0.3620  0.9850  0.9876  0.9880  0.9770
LitL      0.3640  0.9710  0.9628  0.9384  0.9884


  


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [23]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'constituent',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

ylim = [0,1]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

ylim = [0,1.0]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

               1       2       3       4       5
baseline  0.9890  0.1904  0.1802  0.1642  0.1386
LotS      0.9970  0.1888  0.1686  0.1818  0.2056
LitL      0.9978  0.1718  0.1616  0.1918  0.1632


  


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
               1       2       3       4       5
baseline  0.9866  0.1902  0.1618  0.0806  0.1512
LotS      0.9964  0.1616  0.1372  0.1702  0.1972
LitL      0.9954  0.1594  0.1530  0.2320  0.1426


  


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots


In [24]:
sub_keys = {
    'dataset': 'hans',     # either hans or glue
    'case': 'constituent',    # combined or specific to respective itereval set
    'subcase': 'combined', # combined or specific to respective itereval set
    'label': 'non-entailment',   # combined or [entailment, neutral, contradiction] for glue, [entailment, non-entailment] for hans
}

save_plot = True
plot_type = 'jpg'

# combined = 'combined' # either combined or separate
plot_args = {
    'linestyle': '-',
    'marker': 'o',
}

ylim = [0,1]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'combined', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

ylim = [0,1.0]
display_itereval_summary(sub_keys, iteration, plot_args, eval_dir, 'separate', ylim=ylim, save_plot=save_plot, plot_type=plot_type)

               1       2      3       4       5
baseline  0.3962  0.9386  0.924  0.9214  0.9348
LotS      0.3532  0.9374  0.943  0.9404  0.9386
LitL      0.2264  0.9436  0.941  0.9290  0.9424


  


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
               1       2       3       4       5
baseline  0.2972  0.9374  0.9292  0.9314  0.9266
LotS      0.3458  0.9408  0.9418  0.9438  0.9188
LitL      0.4012  0.9378  0.9358  0.7860  0.9448


  


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Saved at
C:\Users\Willi\Documents\NYU\2020_Fall\semantics_seminar\lip\ling_in_loop\eval_summary\roberta-large-mnli\r5\sum_plots
