In [1]:
# Notebook for ner results table

In [2]:
import pandas as pd
import numpy as np
import json

In [3]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/electra-metric/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['last', 'all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'MC', 'DPP_on_masks', 'DPP_with_ood']
ues_layers = ['last', 'all', 'last', 'last']
metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'bald', 'sampled_max_prob', 'variance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

In [118]:
def get_df(raw_path, reg_type, baselines_dict={}, baselines=None):
    raw_dict = {}
    df_dict = {}
    for ue, ue_name in zip(ues, ues):
        #ue_path = raw_path + ue + '/'
        # enter row level
        raw_dict[ue_name] = {}
        df_dict[ue_name] = {}
        for ue_type in types:
            raw_dict[ue_name][ue_type] = {}
            for metric in metrics:
                ue_path = raw_path + ue_type + '/' + ue + '/'
                fname = ue_path + f'metrics_{metric}.json'
                with open(fname, 'r') as f:
                    curr_metrics = json.loads(f.read())
                metric_results = {}
                for ue_method in ue_methods:
                    mean, std = np.mean(list(curr_metrics[ue_method].values())), np.std(list(curr_metrics[ue_method].values()))
                    if metric in perc_metrics:
                        mean, std = mean * 100, std * 100
                    if ue_method == 'max_prob':
                        baseline = mean
                        if baselines is None:
                            baselines_dict[ue_type + metric + ue_method] = baseline
                        else:
                            baseline = baselines_dict[ue_type + metric + ue_method]
                    if metric in diff_metrics and ue_method != 'max_prob':
                        mean = mean - baseline
                    value = '{:.{prec}f}'.format(mean, prec=2) + '$\\pm$' + '{:.{prec}f}'.format(std, prec=2)
                    metric_results[ue_method] = value

                # so we obtained two dict for one metric
                raw_dict[ue_name][ue_type][metric] = metric_results
                # make buf dataframe
            type_df = pd.DataFrame.from_dict(raw_dict[ue_name][ue_type])
            df_dict[ue_name][ue_type] = type_df

    dataset_dfs = [pd.concat([df_dict[ue][ue_type] for ue in ues]) for ue_type in types]
    # make multiindex
    for idx, df in enumerate(dataset_dfs):
        df.columns = pd.MultiIndex.from_tuples([(types_names[idx], metric) for metric in metrics])
        dataset_dfs[idx] = df
    #token_df.columns = pd.MultiIndex.from_tuples([('CoNNL-2003 (10%, token level)', metric) for metric in metrics])
    #seq_df.columns = pd.MultiIndex.from_tuples([('CoNNL-2003 (10%, sequence level)', metric) for metric in metrics])
    raw_df = pd.concat(dataset_dfs, axis=1)

    # after rename max_prob column to baseline and drop all max_prob columns
    max_prob_rows = raw_df.loc['max_prob']
    if len(max_prob_rows) != len(metrics) * len(types_names):# or len(types_names) == 1:
        buf_max_prob = raw_df.loc['max_prob'].drop_duplicates().loc['max_prob']
    else:
        buf_max_prob = raw_df.loc['max_prob']
    if len(buf_max_prob) != 1:
        print('There are several rows for SR, used first one')
        print(buf_max_prob)
        buf_max_prob = buf_max_prob.iloc[0]
    raw_df.drop('max_prob', inplace=True)
    raw_df.loc['max_prob'] = buf_max_prob
    names_df = pd.DataFrame()
    methods = []
    for ue in ues_names:
        methods += [ue] * (len(ue_methods) - 1)
    methods += ['Baseline']
    layers = []
    for ue in ues_layers:
        layers += [ue] * (len(ue_methods) - 1)
    layers += ['-']
    reg_type = [reg_type] * len(raw_df)
    names_df['Method'] = methods
    names_df['Reg. Type'] = reg_type
    # names_df['Dropout Layers'] = layers
    names_df['UE Score'] = raw_df.index
    names_df.index = raw_df.index
    raw_df = pd.concat([names_df, raw_df], axis=1)
    return raw_df, baselines_dict

In [36]:
def get_df_ner(raw_path, reg_type, baselines_dict={}, baselines=None):
    raw_dict = {}
    df_dict = {}
    for ue, ue_name in zip(ues, ues):
        ue_path = raw_path + ue + '/'
        # enter row level
        raw_dict[ue_name] = {}
        df_dict[ue_name] = {}
        for ue_type in types:
            raw_dict[ue_name][ue_type] = {}
            for metric in metrics:
                fname = ue_path + f'metrics_{ue_type}_{metric}.json'
                with open(fname, 'r') as f:
                    curr_metrics = json.loads(f.read())
                metric_results = {}
                for ue_method in ue_methods:
                    mean, std = np.mean(list(curr_metrics[ue_method].values())), np.std(list(curr_metrics[ue_method].values()))
                    if metric in perc_metrics:
                        mean, std = mean * 100, std * 100
                    if ue_method == 'max_prob':
                        baseline = mean
                        if baselines is None:
                            baselines_dict[ue_type + metric + ue_method] = baseline
                        else:
                            baseline = baselines_dict[ue_type + metric + ue_method]
                    if metric in diff_metrics and ue_method != 'max_prob':
                        mean = mean - baseline
                    value = '{:.{prec}f}'.format(mean, prec=2) + '$\\pm$' + '{:.{prec}f}'.format(std, prec=2)
                    metric_results[ue_method] = value

                # so we obtained two dict for one metric
                raw_dict[ue_name][ue_type][metric] = metric_results
                # make buf dataframe
            type_df = pd.DataFrame.from_dict(raw_dict[ue_name][ue_type])
            df_dict[ue_name][ue_type] = type_df

    token_df = pd.concat([df_dict[ue]['token'] for ue in ues])
    seq_df = pd.concat([df_dict[ue]['seq'] for ue in ues])
    # make multiindex
    token_df.columns = pd.MultiIndex.from_tuples([('CoNLL-2003 (token level)', metric) for metric in metrics])
    seq_df.columns = pd.MultiIndex.from_tuples([('CoNLL-2003 (sequence level)', metric) for metric in metrics])
    raw_df = pd.concat([token_df, seq_df], axis=1)

    # after rename max_prob column to baseline and drop all max_prob columns
    max_prob_rows = raw_df.loc['max_prob']
    if len(max_prob_rows) != len(metrics) * 2:
        buf_max_prob = raw_df.loc['max_prob'].drop_duplicates().loc['max_prob']
    else:
        buf_max_prob = raw_df.loc['max_prob']
    if len(buf_max_prob) == 2:
        buf_max_prob = buf_max_prob.iloc[0]
    raw_df.drop('max_prob', inplace=True)
    raw_df.loc['max_prob'] = buf_max_prob
    names_df = pd.DataFrame()
    methods = []
    for ue in ues_names:
        methods += [ue] * (len(ue_methods) - 1)
    methods += ['SR (baseline)']
    layers = []
    for ue in ues_layers:
        layers += [ue] * (len(ue_methods) - 1)
    layers += ['-']
    reg_type = [reg_type] * len(raw_df)
    names_df['Method'] = methods
    names_df['Reg. Type'] = reg_type
    #names_df['Dropout Layers'] = layers
    names_df['UE Score'] = raw_df.index
    names_df.index = raw_df.index
    raw_df = pd.concat([names_df, raw_df], axis=1)
    return raw_df, baselines_dict

In [37]:
# TODO: add the same, but only for cola
def get_full_df(path_cls, path_ner, reg_type, ues, ues_names, ues_layers, ue_methods,
                baselines_dict_cls={}, baselines_dict_ner={}, baselines=True):

    raw_df, baselines_dict = get_df(path_cls, reg_type, baselines_dict_cls, baselines)

    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # set params for NER
    #global types
    types = ['token', 'seq']
    reg_df, baselines_dict = get_df_ner(path_ner, reg_type, baselines_dict_ner, baselines)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)
    # collect full table
    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)
    return miscl_df

In [50]:
def prepare_latex_table(table):
    latex_table = table.to_latex(bold_rows=False, index=False)
    latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
    latex_table = latex_table.replace('variance', 'PV')
    latex_table = latex_table.replace('var\_ratio', 'VR')
    latex_table = latex_table.replace('sampled\_entropy', 'SE')
    latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
    latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
    latex_table = latex_table.replace('max\_prob', 'MP')
    latex_table = latex_table.replace('bald', 'BALD')
    latex_table = latex_table.replace('mixup', 'DS')
    return latex_table

# TODO:
1. Replace dpp and dpp with ood in electra
2. Replace new sngp

# New ELECTRA, full

### Metric, CER, None - MC, DPP, DPP with OOD, Maha, Maha SN, SNGP, DE, mixup
1. No data for DPP & DPP with OOD for CER and raw regularizers, for cls
2. No SNGP for new ner, no rpp metric for sngp for cls
3. 

In [92]:
# MC & DPP, metric
raw_path = '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/'
ues = ['all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'last', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
#reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fixed_omega_sn/conll/mixup/'
reg_path = '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/conll/'

types = ['token', 'seq']

baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'metric', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)

miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([ner_df, miscl_df], axis=1)

#ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
#miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllll}
\toprule
            Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
                MC &    metric &         PV &                       7.04$\pm$2.09 &                   0.10$\pm$0.03 &                         16.90$\pm$4.14 &                      2.00$\pm$0.49 &  12.81$\pm$2.48 & 1.56$\pm$0.23 &  13.71$\pm$2.29 & 0.85$\pm$0.10 &  47.26$\pm$2.82 & 2.29$\pm$0.10 \\
                MC &    metric &             BALD &                       7.16$\

In [124]:
# MC-all, ELECTRA
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_raw_no_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_reg_no_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_raw_no_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_reg_no_sn/conll2003/',]
ues_array = [['mc_all', 'ddpp_dpp', 'ddpp_ood'], ['all', 'dpp', 'dpp_with_ood'], ['mc_all', 'ddpp_dpp', 'ddpp_ood']]
ues_array_ner = [['mc_all', 'ddpp_dpp_best', 'ddpp_ood_best'], ['all', 'dpp', 'dpp_with_ood'], ['mc_all', 'ddpp_dpp_best', 'ddpp_ood_best']]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ues_array[idx]
    
    ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
    ues_layers = ['all', 'all', 'all']
    
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    ues = ues_array_ner[idx]
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
There are several rows for SR, used first one
                    MRPC                           SST2                 \
                 rcc-auc            rpp         rcc-auc            rpp   
max_prob  23.28$\pm$7.63  2.68$\pm$0.62  18.07$\pm$5.58  1.23$\pm$0.37   
max_prob  17.19$\pm$4.72  2.14$\pm$0.42  18.19$\pm$3.17  1.23$\pm$0.21   

                     CoLA                 
                  rcc-auc            rpp  
max_prob   59.04$\pm$7.46  2.63$\pm$0.16  
max_prob  61.85$\pm$10.31  2.74$\pm$0.25  
\begin{tabular}{lllllllllllll}
\toprule
            Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
                MC &         - &         PV &                       6.05$\pm$1.27 &                   

In [94]:
# Maha SN, ELECTRA
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_raw_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_reg_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_raw_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_reg_sn/conll2003/',]
ues_array = ['mahalanobis', 'maha_sn', 'mahalanobis']
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = [ues_array[idx]]
    ues_names = ['MD SN (Ours)']
    ues_layers = ['all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mahalanobis_distance']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &             UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
 MD SN (Ours) &         - & MD &                       9.41$\pm$2.92 &                   0.14$\pm$0.04 &                         19.07$\pm$4.62 &                      2.26$\pm$0.52 &  13.61$\pm$1.89 & 1.80$\pm$0.16 &  12.16$\pm$1.76 & 0.83$\pm$0.10 &  40.42$\pm$2.10 & 1.96$\pm$0.11 \\
SR (baseline) &         - &             MP &                       7.99$\pm$2.33 &                   0.12$\pm$0.03 &                         22.41$\pm$4.80 &                      2.68$\pm$0.61 &  18.72$\pm$6.21 & 2.17$\pm$0.36 &  17.83$\pm$3.89 & 1.11$\pm$0.25 & 79.92$\pm$10.27 & 3.53$\pm$0.30 \\
\bottomrule
\end{tabular}

Model w

In [95]:
# Maha, ELECTRA
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_raw_no_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_reg_no_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_raw_no_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_reg_no_sn/conll2003/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['maha']
    ues_names = ['MD']
    ues_layers = ['all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mahalanobis_distance']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &             UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
           MD &         - & MD &                       8.98$\pm$2.24 &                   0.13$\pm$0.03 &                         17.33$\pm$4.48 &                      2.04$\pm$0.40 &  13.21$\pm$1.53 & 1.75$\pm$0.21 &  13.01$\pm$2.63 & 0.89$\pm$0.17 &  41.63$\pm$1.31 & 1.96$\pm$0.05 \\
SR (baseline) &         - &             MP &                       6.71$\pm$2.16 &                   0.11$\pm$0.03 &                         19.13$\pm$3.90 &                      2.31$\pm$0.48 &  23.28$\pm$7.63 & 2.68$\pm$0.62 &  18.07$\pm$5.58 & 1.23$\pm$0.37 &  59.04$\pm$7.46 & 2.63$\pm$0.16 \\
\bottomrule
\end{tabular}

Model w

In [120]:
# Electra, SNGP
reg_types = ['-']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_raw_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_raw_sn/conll2003/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['sngp']
    ues_names = ['SNGP']
    ues_layers = ['-']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'stds']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
There are several rows for SR, used first one
MRPC  rcc-auc    17.87$\pm$2.20
      rpp         2.62$\pm$0.22
SST2  rcc-auc    45.53$\pm$9.80
      rpp         2.40$\pm$0.35
CoLA  rcc-auc    64.62$\pm$3.83
      rpp         3.41$\pm$0.08
Name: max_prob, dtype: object
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type & UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &    (MRPC, rpp) & (SST2, rcc-auc) &    (SST2, rpp) & (CoLA, rcc-auc) &    (CoLA, rpp) \\
\midrule
         SNGP &         - &     stds &                     87.09$\pm$51.27 &                   0.90$\pm$0.54 &                        59.49$\pm$30.09 &                      5.29$\pm$0.60 &  15.78$\pm$2.95 &  2.19$\pm$0.38 &  13.88$\pm$1.45 &  0.94$\pm$0.09 &  51.87$\pm$2.13 &  2.64$\pm$0.05 \\
SR (baseline) &         - & MP &                    262.81$\pm$5

In [64]:
# Electra, Deep Ensemble
reg_types = ['-']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_raw_no_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_raw_no_sn/conll2003/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['deep_ensemble']
    ues_names = ['DE']
    ues_layers = ['-']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
           DE &         - &         PV &                       5.10$\pm$0.81 &                   0.07$\pm$0.02 &                         15.33$\pm$3.55 &                      1.80$\pm$0.45 &  25.56$\pm$5.33 & 2.41$\pm$0.28 &  17.88$\pm$2.32 & 0.97$\pm$0.06 &  68.49$\pm$8.00 & 2.87$\pm$0.27 \\
           DE &         - &             BALD &                       4.95$\pm$1.13 &                   0.07$\pm$0.02 &                         15.33$\pm$3.61 &                      1.78$\pm$0.45 &  26.61$\pm$3.71 & 2.51$\pm$0.26 &  21.29$\pm$3.27 & 1.02$\pm$0.08 &  73.43$\pm$9.62 & 2.96$\pm$0.32 \\
           DE &         - & 

In [96]:
# Electra, Mixup
reg_types = ['MSD']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/mixup_electra_fix_repro_fix/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/mixup_electra_fix_repro_fix/conll/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['msd/all', 'msd/last']
    ues_names = ['MSD-all', 'MSD-last']
    ues_layers = ['all', 'last']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mixup']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    ues = ['mixup/all', 'mixup/last']
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: MSD
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type & UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
      MSD-all &       MSD &    DS &                      10.40$\pm$2.30 &                   0.15$\pm$0.02 &                         16.21$\pm$2.19 &                      1.93$\pm$0.29 &  16.38$\pm$8.33 & 2.27$\pm$1.20 &  11.17$\pm$0.94 & 0.78$\pm$0.06 &  39.21$\pm$1.99 & 1.90$\pm$0.11 \\
     MSD-last &       MSD &    DS &                      11.06$\pm$2.35 &                   0.16$\pm$0.02 &                         16.83$\pm$2.20 &                      2.06$\pm$0.27 &  16.52$\pm$8.29 & 2.30$\pm$1.20 &  11.45$\pm$0.87 & 0.80$\pm$0.05 &  40.57$\pm$1.88 & 1.97$\pm$0.11 \\
SR (baseline) &       MSD & MP &                  

### Distilbert - MC, DPP, DPP with OOD, Maha, Maha SN, DE, mixup

In [71]:
# MC-all, dpp, dpp with ood, Distilbert
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/raw/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/metric_171/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/cer/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/raw/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/metric_171/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/cer/conll/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['all', 'dpp', 'dpp_with_ood']
    ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
    ues_layers = ['all', 'all', 'all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllllllll}
\toprule
            Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
                MC &         - &         PV &                       8.02$\pm$1.75 &                   0.11$\pm$0.02 &                         18.93$\pm$4.98 &                      1.84$\pm$0.22 &  27.11$\pm$1.67 & 3.09$\pm$0.25 &  35.47$\pm$2.74 & 1.96$\pm$0.18 &  118.44$\pm$9.14 & 4.55$\pm$0.22 \\
                MC &         - &             BALD &                       8.08$\pm$1.75 &                   0.11$\pm$0.02 &                         19.22$\pm$4.95 &                      1.86$\pm$0.21 &  27.05$\pm$1.83 & 3.07$\pm$0.29 &  35.43$\pm$2.00 & 1.95$\pm$0.17 &  117.94$\pm$9.20 & 4.57$\pm$0.23 \\
          

In [72]:
# Maha SN, Distilbert
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/raw_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/metric_171_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/cer_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/raw_sn/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/metric_171_sn/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/cer_sn/conll/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['maha_sn']
    ues_names = ['MD SN (Ours)']
    ues_layers = ['all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mahalanobis_distance']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &             UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
 MD SN (Ours) &         - & MD &                       7.28$\pm$2.30 &                   0.10$\pm$0.03 &                         19.68$\pm$5.47 &                      1.95$\pm$0.33 &  29.89$\pm$2.22 & 3.48$\pm$0.27 &  32.99$\pm$3.74 & 2.03$\pm$0.20 & 104.40$\pm$5.39 & 4.32$\pm$0.16 \\
SR (baseline) &         - &             MP &                       7.45$\pm$1.72 &                   0.11$\pm$0.02 &                         22.55$\pm$5.36 &                      2.35$\pm$0.38 &  42.41$\pm$4.64 & 4.37$\pm$0.48 &  46.26$\pm$9.40 & 2.74$\pm$0.54 & 109.89$\pm$4.13 & 4.48$\pm$0.22 \\
\bottomrule
\end{tabular}

Model w

In [73]:
# Maha, Distilbert
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/raw/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/metric_171/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/cer/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/raw/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/metric_171/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert/cer/conll/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['maha']
    ues_names = ['MD']
    ues_layers = ['all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mahalanobis_distance']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &             UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
           MD &         - & MD &                       7.24$\pm$2.46 &                   0.10$\pm$0.03 &                         20.01$\pm$5.71 &                      1.98$\pm$0.22 &  26.24$\pm$2.13 & 3.12$\pm$0.23 &  29.69$\pm$3.10 & 1.79$\pm$0.18 & 120.22$\pm$10.12 & 4.67$\pm$0.30 \\
SR (baseline) &         - &             MP &                       7.53$\pm$2.14 &                   0.11$\pm$0.03 &                         22.43$\pm$5.16 &                      2.25$\pm$0.24 &  31.21$\pm$3.80 & 3.51$\pm$0.29 &  37.07$\pm$5.39 & 2.18$\pm$0.26 & 135.69$\pm$14.42 & 4.99$\pm$0.42 \\
\bottomrule
\end{tabular}

Mode

In [74]:
# Distilbert, Deep Ensemble
reg_types = ['-']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert_ensemble/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/distilbert_ensemble/conll2003/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['de']
    ues_names = ['DE']
    ues_layers = ['-']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
           DE &         - &         PV &                       6.30$\pm$2.54 &                   0.08$\pm$0.03 &                         17.17$\pm$2.38 &                      1.59$\pm$0.30 &  34.20$\pm$3.28 & 3.36$\pm$0.08 &  29.69$\pm$4.80 & 1.61$\pm$0.20 & 122.00$\pm$12.58 & 4.40$\pm$0.27 \\
           DE &         - &             BALD &                       5.89$\pm$1.76 &                   0.08$\pm$0.03 &                         17.46$\pm$2.44 &                      1.60$\pm$0.31 &  35.41$\pm$3.94 & 3.44$\pm$0.08 &  30.38$\pm$4.89 & 1.64$\pm$0.20 & 123.73$\pm$13.09 & 4.45$\pm$0.27 \\
           DE &         -

In [97]:
# Distilbert, Mixup
reg_types = ['MSD']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/mixup_distilbert_fix_repro_fix/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/mixup_distilbert_fix_repro_fix/conll/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['msd/all', 'msd/last']
    ues_names = ['MSD-all', 'MSD-last']
    ues_layers = ['all', 'last']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mixup']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    ues = ['mixup/all', 'mixup/last']
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: MSD
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type & UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
      MSD-all &       MSD &    DS &                       6.39$\pm$1.67 &                   0.09$\pm$0.02 &                         17.70$\pm$3.57 &                      1.85$\pm$0.09 &  58.41$\pm$6.88 & 6.87$\pm$1.16 &  29.97$\pm$2.81 & 1.84$\pm$0.19 & 118.91$\pm$27.91 & 4.97$\pm$1.26 \\
     MSD-last &       MSD &    DS &                       6.46$\pm$1.67 &                   0.09$\pm$0.02 &                         17.76$\pm$3.54 &                      1.86$\pm$0.09 &  58.49$\pm$6.85 & 6.89$\pm$1.15 &  30.08$\pm$2.82 & 1.85$\pm$0.19 & 120.80$\pm$27.37 & 5.04$\pm$1.17 \\
SR (baseline) &       MSD & MP &               

### Deberta - MC, DPP, DPP with OOD, Maha, Maha SN, DE, mixup

In [106]:
# MC-all, dpp, dpp with ood, Distilbert
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_raw_no_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_metric_no_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_reg_no_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_raw_no_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_metric_no_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_reg_no_sn/conll2003/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['mc_all', 'ddpp_dpp', 'ddpp_ood']
    ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
    ues_layers = ['all', 'all', 'all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
There are several rows for SR, used first one
                    MRPC                           SST2                 \
                 rcc-auc            rpp         rcc-auc            rpp   
max_prob  22.65$\pm$3.97  2.67$\pm$0.36  18.25$\pm$1.80  1.23$\pm$0.12   
max_prob  22.33$\pm$3.84  2.66$\pm$0.35  18.25$\pm$1.80  1.23$\pm$0.12   

                    CoLA                 
                 rcc-auc            rpp  
max_prob  69.61$\pm$4.25  3.32$\pm$0.17  
max_prob  69.49$\pm$4.17  3.32$\pm$0.17  
\begin{tabular}{lllllllllllll}
\toprule
            Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
                MC &         - &         PV &                       5.27$\pm$0.89 &                   0.08

In [79]:
# Maha SN, Deberta
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_raw_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_metric_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_reg_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_raw_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_metric_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_reg_sn/conll2003/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['mahalanobis']
    ues_names = ['MD SN (Ours)']
    ues_layers = ['all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mahalanobis_distance']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &             UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
 MD SN (Ours) &         - & MD &                       5.36$\pm$1.14 &                   0.08$\pm$0.01 &                         16.72$\pm$4.89 &                      1.90$\pm$0.18 &  12.28$\pm$0.74 & 1.73$\pm$0.11 &  12.46$\pm$1.94 & 0.90$\pm$0.13 &  54.40$\pm$4.10 & 2.66$\pm$0.18 \\
SR (baseline) &         - &             MP &                       4.74$\pm$0.88 &                   0.07$\pm$0.01 &                         18.01$\pm$3.76 &                      2.08$\pm$0.31 &  22.60$\pm$5.09 & 2.47$\pm$0.43 &  18.07$\pm$4.99 & 1.22$\pm$0.30 &  63.14$\pm$4.09 & 3.07$\pm$0.26 \\
\bottomrule
\end{tabular}

Model w

In [80]:
# Maha, Deberta
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_raw_no_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_metric_no_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_reg_no_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_raw_no_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_metric_no_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_reg_no_sn/conll2003/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['mahalanobis']
    ues_names = ['MD']
    ues_layers = ['all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mahalanobis_distance']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &             UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
           MD &         - & MD &                       4.87$\pm$0.89 &                   0.07$\pm$0.01 &                         14.44$\pm$4.09 &                      1.78$\pm$0.47 &  16.56$\pm$3.44 & 2.21$\pm$0.37 &  13.00$\pm$3.11 & 0.89$\pm$0.20 &  58.02$\pm$4.50 & 2.80$\pm$0.18 \\
SR (baseline) &         - &             MP &                       5.37$\pm$0.44 &                   0.08$\pm$0.01 &                         17.31$\pm$5.20 &                      2.12$\pm$0.58 &  22.65$\pm$3.97 & 2.67$\pm$0.36 &  18.25$\pm$1.80 & 1.23$\pm$0.12 &  69.61$\pm$4.25 & 3.32$\pm$0.17 \\
\bottomrule
\end{tabular}

Model w

In [123]:
# Deberta, Deep Ensemble
reg_types = ['-']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_raw_no_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/deberta/deberta_raw_no_sn/conll2003/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['deep_ensemble']
    ues_names = ['DE']
    ues_layers = ['-']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
There are several rows for SR, used first one
MRPC  rcc-auc    23.59$\pm$6.34
      rpp         2.66$\pm$0.39
SST2  rcc-auc    17.12$\pm$1.87
      rpp         1.13$\pm$0.09
CoLA  rcc-auc    82.36$\pm$4.37
      rpp         3.88$\pm$0.17
Name: max_prob, dtype: object
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &    (MRPC, rpp) & (SST2, rcc-auc) &    (SST2, rpp) & (CoLA, rcc-auc) &    (CoLA, rpp) \\
\midrule
           DE &         - &         PV &                       3.82$\pm$1.86 &                   0.05$\pm$0.03 &                         11.85$\pm$3.05 &                      1.39$\pm$0.48 &  18.71$\pm$2.55 &  2.02$\pm$0.23 &  11.90$\pm$1.02 &  0.75$\pm$0.03 & 81.47$\pm$13.66 &  3.32$\pm$0.23 \\
           DE &         - &             BALD &        

In [98]:
# Deberta, Mixup
reg_types = ['MSD']

cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/mixup_deberta_fix_repro_fix/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/mixup_deberta_fix_repro_fix/conll/',]
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = ['msd/all', 'msd/last']
    ues_names = ['MSD-all', 'MSD-last']
    ues_layers = ['all', 'last']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mixup']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    ues = ['mixup/all', 'mixup/last']
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: MSD
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type & UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
      MSD-all &       MSD &    DS &                       6.84$\pm$2.28 &                   0.10$\pm$0.03 &                         14.68$\pm$3.92 &                      1.82$\pm$0.45 &  13.08$\pm$1.25 & 1.88$\pm$0.19 &  11.66$\pm$2.60 & 0.81$\pm$0.12 &   53.42$\pm$4.73 & 2.61$\pm$0.20 \\
     MSD-last &       MSD &    DS &                       6.84$\pm$2.28 &                   0.10$\pm$0.03 &                         14.68$\pm$3.94 &                      1.82$\pm$0.45 &  13.08$\pm$1.25 & 1.88$\pm$0.19 &  11.66$\pm$2.60 & 0.81$\pm$0.12 &   53.42$\pm$4.73 & 2.61$\pm$0.20 \\
SR (baseline) &       MSD & MP &               

### New SN results VS old SN results

In [84]:
# Old SN, Electra
# Maha SN, ELECTRA
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_raw_sn/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/current_paper_results/electra_reg_sn/',]
ner_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_raw_sn/conll2003/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/metric_opt_electra_param_last_fix_171/conll/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/new_fixed_conll/electra_reg_sn/conll2003/',]
ues_array = ['mahalanobis', 'maha_sn', 'mahalanobis']
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = [ues_array[idx]]
    ues_names = ['MD SN (Ours)']
    ues_layers = ['all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mahalanobis_distance']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    '''
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)
    '''

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllll}
\toprule
      Method & Reg. Type &             UE Score & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
MD SN (Ours) &         - & MD &  13.61$\pm$1.89 & 1.80$\pm$0.16 &  12.16$\pm$1.76 & 0.83$\pm$0.10 &  40.42$\pm$2.10 & 1.96$\pm$0.11 \\
    Baseline &         - &             MP &  18.72$\pm$6.21 & 2.17$\pm$0.36 &  17.83$\pm$3.89 & 1.11$\pm$0.25 & 79.92$\pm$10.27 & 3.53$\pm$0.30 \\
\bottomrule
\end{tabular}

Model with regularization: metric
\begin{tabular}{lllllllll}
\toprule
      Method & Reg. Type &             UE Score & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
MD SN (Ours) &    metric & MD &  14.46$\pm$1.21 & 1.96$\pm$0.14 &  11.67$\pm$1.07 & 0.82$\pm$0.06 &  43.50$\pm$2.27 & 2.07$\pm$0.11 \\
    Baseline &    metric &             MP &  20.51$\pm$3.27 & 2.32$\pm$0.23 &  14.22$\pm$1.78 & 0

In [85]:
# New SN, Electra
# Maha SN, ELECTRA
reg_types = ['-', 'metric', 'CER']
cls_pathes = ['/home/jovyan/uncertainty-estimation/workdir/final_results/new_sn_results/electra_raw_sn_new/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/new_sn_results/electra_metric_sn_new/',
              '/home/jovyan/uncertainty-estimation/workdir/final_results/new_sn_results/electra_reg_sn_new/',]
ues_array = ['mahalanobis', 'mahalanobis', 'mahalanobis']
for idx in range(len(reg_types)):
    reg_type = reg_types[idx]
    cls_path = cls_pathes[idx]
    ner_path = ner_pathes[idx]
    print(f'Model with regularization: {reg_type}')
    #reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
    ues = [ues_array[idx]]
    ues_names = ['MD SN (Ours)']
    ues_layers = ['all']
    metrics = ['rcc-auc', 'rpp']
    metric_names = ['rcc-auc', 'rpp']
    types = ['mrpc', 'sst2', 'cola']
    types_names = ['MRPC', 'SST2', 'CoLA']
    ue_methods = ['max_prob', 'mahalanobis_distance']
    perc_metrics = ['rejection-curve-auc', 'rpp']
    diff_metrics = ['rejection-curve-auc', 'roc-auc']


    # copied from table
    baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                      'mrpcrcc-aucmax_prob': 23.279293481630972,
                      'mrpcrppmax_prob': 0.026788574907087016 * 100,
                      'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                      'colarcc-aucmax_prob': 59.03726591032054,
                      'colarppmax_prob': 0.02631936969193335 * 100,
                      'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      'sst2rcc-aucmax_prob': 18.067838464295736,
                      'sst2rppmax_prob': 0.012349462026204303 * 100,
                      '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                      '20ngrcc-aucmax_prob': 18.067838464295736,
                      '20ngrppmax_prob': 0.012349462026204303 * 100}
    raw_df, baselines_dict = get_df(cls_path, reg_type, baselines_dict, True)
    miscl_df = raw_df
    miscl_df.reset_index(inplace=True, drop=True)

    # NER
    '''
    types = ['token', 'seq']
    baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}
    reg_df, baselines_dict = get_df_ner(ner_path, reg_type, baselines_dict, 1)
    ner_df = pd.concat([reg_df])
    ner_df.reset_index(inplace=True, drop=True)

    miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
    miscl_df = pd.concat([ner_df, miscl_df], axis=1)
    '''

    latex_table = prepare_latex_table(miscl_df)
    print(latex_table)

Model with regularization: -
\begin{tabular}{lllllllll}
\toprule
      Method & Reg. Type &             UE Score & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
MD SN (Ours) &         - & MD &  13.70$\pm$2.23 & 1.86$\pm$0.24 &  12.77$\pm$1.67 & 0.90$\pm$0.11 &  38.46$\pm$2.53 & 1.87$\pm$0.10 \\
    Baseline &         - &             MP &  16.20$\pm$2.24 & 2.08$\pm$0.29 &  19.19$\pm$3.26 & 1.28$\pm$0.21 &  45.72$\pm$3.39 & 2.43$\pm$0.19 \\
\bottomrule
\end{tabular}

Model with regularization: metric
\begin{tabular}{lllllllll}
\toprule
      Method & Reg. Type &             UE Score & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
MD SN (Ours) &    metric & MD &  12.47$\pm$2.30 & 1.66$\pm$0.26 &  11.11$\pm$1.84 & 0.80$\pm$0.12 &  39.35$\pm$2.64 & 1.90$\pm$0.08 \\
    Baseline &    metric &             MP &  16.25$\pm$4.14 & 2.09$\pm$0.39 &  14.84$\pm$3.73 & 1