In [2]:
# Notebook for ner results table

In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/electra-metric/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['last', 'all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'MC', 'DPP_on_masks', 'DPP_with_ood']
ues_layers = ['last', 'all', 'last', 'last']
metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'bald', 'sampled_max_prob', 'variance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

In [3]:
def get_df(raw_path, reg_type, baselines_dict={}, baselines=None):
    raw_dict = {}
    df_dict = {}
    for ue, ue_name in zip(ues, ues):
        #ue_path = raw_path + ue + '/'
        # enter row level
        raw_dict[ue_name] = {}
        df_dict[ue_name] = {}
        for ue_type in types:
            raw_dict[ue_name][ue_type] = {}
            for metric in metrics:
                ue_path = raw_path + ue_type + '/' + ue + '/'
                fname = ue_path + f'metrics_{metric}.json'
                with open(fname, 'r') as f:
                    curr_metrics = json.loads(f.read())
                metric_results = {}
                for ue_method in ue_methods:
                    mean, std = np.mean(list(curr_metrics[ue_method].values())), np.std(list(curr_metrics[ue_method].values()))
                    if metric in perc_metrics:
                        mean, std = mean * 100, std * 100
                    if ue_method == 'max_prob':
                        baseline = mean
                        if baselines is None:
                            baselines_dict[ue_type + metric + ue_method] = baseline
                        else:
                            baseline = baselines_dict[ue_type + metric + ue_method]
                    if metric in diff_metrics and ue_method != 'max_prob':
                        mean = mean - baseline
                    value = '{:.{prec}f}'.format(mean, prec=2) + '$\\pm$' + '{:.{prec}f}'.format(std, prec=2)
                    metric_results[ue_method] = value

                # so we obtained two dict for one metric
                raw_dict[ue_name][ue_type][metric] = metric_results
                # make buf dataframe
            type_df = pd.DataFrame.from_dict(raw_dict[ue_name][ue_type])
            df_dict[ue_name][ue_type] = type_df

    dataset_dfs = [pd.concat([df_dict[ue][ue_type] for ue in ues]) for ue_type in types]
    # make multiindex
    for idx, df in enumerate(dataset_dfs):
        df.columns = pd.MultiIndex.from_tuples([(types_names[idx], metric) for metric in metrics])
        dataset_dfs[idx] = df
    #token_df.columns = pd.MultiIndex.from_tuples([('CoNNL-2003 (10%, token level)', metric) for metric in metrics])
    #seq_df.columns = pd.MultiIndex.from_tuples([('CoNNL-2003 (10%, sequence level)', metric) for metric in metrics])
    raw_df = pd.concat(dataset_dfs, axis=1)

    # after rename max_prob column to baseline and drop all max_prob columns
    max_prob_rows = raw_df.loc['max_prob']
    if len(max_prob_rows) != len(metrics) * len(types_names):# or len(types_names) == 1:
        buf_max_prob = raw_df.loc['max_prob'].drop_duplicates().loc['max_prob']
    else:
        buf_max_prob = raw_df.loc['max_prob']
    raw_df.drop('max_prob', inplace=True)
    raw_df.loc['max_prob'] = buf_max_prob
    names_df = pd.DataFrame()
    methods = []
    for ue in ues_names:
        methods += [ue] * (len(ue_methods) - 1)
    methods += ['Baseline']
    layers = []
    for ue in ues_layers:
        layers += [ue] * (len(ue_methods) - 1)
    layers += ['-']
    reg_type = [reg_type] * len(raw_df)
    names_df['Method'] = methods
    names_df['Reg. Type'] = reg_type
    # names_df['Dropout Layers'] = layers
    names_df['UE Score'] = raw_df.index
    names_df.index = raw_df.index
    raw_df = pd.concat([names_df, raw_df], axis=1)
    return raw_df, baselines_dict

In [4]:
def get_df_ner(raw_path, reg_type, baselines_dict={}, baselines=None):
    raw_dict = {}
    df_dict = {}
    for ue, ue_name in zip(ues, ues):
        ue_path = raw_path + ue + '/'
        # enter row level
        raw_dict[ue_name] = {}
        df_dict[ue_name] = {}
        for ue_type in types:
            raw_dict[ue_name][ue_type] = {}
            for metric in metrics:
                fname = ue_path + f'metrics_{ue_type}_{metric}.json'
                with open(fname, 'r') as f:
                    curr_metrics = json.loads(f.read())
                metric_results = {}
                for ue_method in ue_methods:
                    mean, std = np.mean(list(curr_metrics[ue_method].values())), np.std(list(curr_metrics[ue_method].values()))
                    if metric in perc_metrics:
                        mean, std = mean * 100, std * 100
                    if ue_method == 'max_prob':
                        baseline = mean
                        if baselines is None:
                            baselines_dict[ue_type + metric + ue_method] = baseline
                        else:
                            baseline = baselines_dict[ue_type + metric + ue_method]
                    if metric in diff_metrics and ue_method != 'max_prob':
                        mean = mean - baseline
                    value = '{:.{prec}f}'.format(mean, prec=2) + '$\\pm$' + '{:.{prec}f}'.format(std, prec=2)
                    metric_results[ue_method] = value

                # so we obtained two dict for one metric
                raw_dict[ue_name][ue_type][metric] = metric_results
                # make buf dataframe
            type_df = pd.DataFrame.from_dict(raw_dict[ue_name][ue_type])
            df_dict[ue_name][ue_type] = type_df

    token_df = pd.concat([df_dict[ue]['token'] for ue in ues])
    seq_df = pd.concat([df_dict[ue]['seq'] for ue in ues])
    # make multiindex
    token_df.columns = pd.MultiIndex.from_tuples([('CoNLL-2003 (token level)', metric) for metric in metrics])
    seq_df.columns = pd.MultiIndex.from_tuples([('CoNLL-2003 (sequence level)', metric) for metric in metrics])
    raw_df = pd.concat([token_df, seq_df], axis=1)

    # after rename max_prob column to baseline and drop all max_prob columns
    max_prob_rows = raw_df.loc['max_prob']
    if len(max_prob_rows) != len(metrics) * 2:
        buf_max_prob = raw_df.loc['max_prob'].drop_duplicates().loc['max_prob']
    else:
        buf_max_prob = raw_df.loc['max_prob']
    if len(buf_max_prob) == 2:
        buf_max_prob = buf_max_prob.iloc[0]
    raw_df.drop('max_prob', inplace=True)
    raw_df.loc['max_prob'] = buf_max_prob
    names_df = pd.DataFrame()
    methods = []
    for ue in ues_names:
        methods += [ue] * (len(ue_methods) - 1)
    methods += ['SR (baseline)']
    layers = []
    for ue in ues_layers:
        layers += [ue] * (len(ue_methods) - 1)
    layers += ['-']
    reg_type = [reg_type] * len(raw_df)
    names_df['Method'] = methods
    names_df['Reg. Type'] = reg_type
    #names_df['Dropout Layers'] = layers
    names_df['UE Score'] = raw_df.index
    names_df.index = raw_df.index
    raw_df = pd.concat([names_df, raw_df], axis=1)
    return raw_df, baselines_dict

In [160]:
# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9278390522875816 * 100,
                  'mrpcrcc-aucmax_prob': 15.032621362700068,
                  'mrpcrppmax_prob': 0.019659906446238627 * 100,
                  'colarejection-curve-aucmax_prob': 0.9223434004474272 * 100,
                  'colarcc-aucmax_prob': 48.80838215134696,
                  'colarppmax_prob': 0.024288603779875087 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9389000382262997 * 100,
                  'sst2rcc-aucmax_prob': 17.100472209733763,
                  'sst2rppmax_prob': 0.01133199365934405 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)
#reg_df = get_df(reg_path, 'reg')
#reg_df, baselines_dict = get_df(reg_path, 'reg', baselines_dict, 1)

In [161]:
miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

In [162]:
miscl_df

Unnamed: 0,Method,Reg. Type,Dropout Layers,UE Score,"(MRPC, rejection-curve-auc)","(MRPC, rcc-auc)","(MRPC, rpp)","(CoLA, rejection-curve-auc)","(CoLA, rcc-auc)","(CoLA, rpp)","(SST2 (10%), rejection-curve-auc)","(SST2 (10%), rcc-auc)","(SST2 (10%), rpp)"
0,MC,metric,last,bald,-1.00$\pm$0.57,27.03$\pm$5.29,2.96$\pm$0.48,-0.50$\pm$0.20,60.46$\pm$6.03,2.85$\pm$0.16,-0.34$\pm$0.13,25.95$\pm$4.05,1.49$\pm$0.13
1,MC,metric,last,sampled_max_prob,0.12$\pm$0.49,13.85$\pm$3.12,1.79$\pm$0.41,-0.01$\pm$0.22,47.63$\pm$4.18,2.33$\pm$0.21,0.22$\pm$0.03,13.45$\pm$0.87,0.93$\pm$0.03
2,MC,metric,last,variance,-0.18$\pm$0.43,18.34$\pm$2.70,2.12$\pm$0.35,-0.11$\pm$0.20,50.65$\pm$4.83,2.43$\pm$0.17,0.07$\pm$0.09,17.02$\pm$2.48,1.08$\pm$0.11
3,MC,metric,all,bald,0.39$\pm$0.21,12.30$\pm$1.08,1.53$\pm$0.15,-0.23$\pm$0.19,52.04$\pm$2.89,2.59$\pm$0.17,0.24$\pm$0.10,13.70$\pm$1.87,0.90$\pm$0.11
4,MC,metric,all,sampled_max_prob,0.39$\pm$0.35,11.96$\pm$2.03,1.52$\pm$0.26,0.05$\pm$0.16,46.10$\pm$3.28,2.29$\pm$0.14,0.30$\pm$0.05,12.19$\pm$0.73,0.85$\pm$0.04
5,MC,metric,all,variance,0.41$\pm$0.25,12.02$\pm$1.25,1.51$\pm$0.16,-0.16$\pm$0.19,49.58$\pm$2.97,2.49$\pm$0.15,0.29$\pm$0.08,12.63$\pm$0.87,0.86$\pm$0.08
6,DPP_on_masks,metric,last,bald,-0.46$\pm$0.34,19.34$\pm$3.11,2.40$\pm$0.33,-0.00$\pm$0.15,50.04$\pm$4.15,2.34$\pm$0.09,-0.19$\pm$0.27,20.61$\pm$6.41,1.33$\pm$0.28
7,DPP_on_masks,metric,last,sampled_max_prob,0.12$\pm$0.45,14.12$\pm$2.84,1.79$\pm$0.37,0.01$\pm$0.21,47.32$\pm$3.98,2.32$\pm$0.19,0.20$\pm$0.04,13.59$\pm$0.99,0.95$\pm$0.04
8,DPP_on_masks,metric,last,variance,0.05$\pm$0.31,14.69$\pm$1.67,1.86$\pm$0.23,0.10$\pm$0.16,45.89$\pm$2.97,2.24$\pm$0.09,0.11$\pm$0.16,14.94$\pm$2.24,1.04$\pm$0.15
9,DPP_with_ood,metric,last,bald,-0.05$\pm$0.45,15.04$\pm$2.72,1.98$\pm$0.43,-0.06$\pm$0.14,49.57$\pm$1.92,2.41$\pm$0.16,0.00$\pm$0.26,16.85$\pm$3.99,1.15$\pm$0.25


### Here we just print table and copy by strings

In [163]:
latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{lllllllllllll}
\toprule
      Method & Reg. Type & Dropout Layers &         UE Score & (MRPC, rejection-curve-auc) & (MRPC, rcc-auc) &   (MRPC, rpp) & (CoLA, rejection-curve-auc) & (CoLA, rcc-auc) &   (CoLA, rpp) & (SST2 (10\%), rejection-curve-auc) & (SST2 (10\%), rcc-auc) & (SST2 (10\%), rpp) \\
\midrule
          MC &    metric &           last &             BALD &              -1.00$\pm$0.57 &  27.03$\pm$5.29 & 2.96$\pm$0.48 &              -0.50$\pm$0.20 &  60.46$\pm$6.03 & 2.85$\pm$0.16 &                    -0.34$\pm$0.13 &        25.95$\pm$4.05 &     1.49$\pm$0.13 \\
          MC &    metric &           last & SMP &               0.12$\pm$0.49 &  13.85$\pm$3.12 & 1.79$\pm$0.41 &              -0.01$\pm$0.22 &  47.63$\pm$4.18 & 2.33$\pm$0.21 &                     0.22$\pm$0.03 &        13.45$\pm$0.87 &     0.93$\pm$0.03 \\
          MC &    metric &           last &         PV &              -0.18$\pm$0.43 &  18.34$\pm$2.70 & 2.12$\pm$0.35 &              -0.11$\pm$0

### Make the same for NUQ and Maha

In [12]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/electra-metric/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha_mc']
ues_names = ['Mahalanobis']
ues_layers = ['-']

metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9278390522875816 * 100,
                  'mrpcrcc-aucmax_prob': 15.032621362700068,
                  'mrpcrppmax_prob': 0.019659906446238627 * 100,
                  'colarejection-curve-aucmax_prob': 0.9223434004474272 * 100,
                  'colarcc-aucmax_prob': 48.80838215134696,
                  'colarppmax_prob': 0.024288603779875087 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9389000382262997 * 100,
                  'sst2rcc-aucmax_prob': 17.100472209733763,
                  'sst2rppmax_prob': 0.01133199365934405 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{lllllllllllll}
\toprule
     Method & Reg. Type & Dropout Layers &                     UE Score & (MRPC, rejection-curve-auc) & (MRPC, rcc-auc) &   (MRPC, rpp) & (CoLA, rejection-curve-auc) &  (CoLA, rcc-auc) &   (CoLA, rpp) & (SST2 (10\%), rejection-curve-auc) & (SST2 (10\%), rcc-auc) & (SST2 (10\%), rpp) \\
\midrule
Mahalanobis &    metric &              - &         MD &              -0.13$\pm$0.35 &  15.46$\pm$2.46 & 2.07$\pm$0.30 &              -0.77$\pm$0.16 &   61.25$\pm$2.16 & 3.10$\pm$0.13 &                     0.19$\pm$0.10 &        13.27$\pm$1.31 &     0.94$\pm$0.08 \\
Mahalanobis &    metric &              - & sampled\_MD &              -4.73$\pm$1.10 & 75.41$\pm$15.01 & 6.76$\pm$1.05 &              -4.87$\pm$0.59 & 207.72$\pm$21.23 & 7.32$\pm$0.60 &                    -3.47$\pm$0.45 &      114.84$\pm$16.47 &     4.70$\pm$0.46 \\
   Baseline &    metric &              - &                     MP &              92.92$\pm$0.49 &  13.71$\pm$3.04 & 1.78$\pm$0.40 &

In [13]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/electra-metric/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha_sn_mc']
ues_names = ['Mahalanobis']
ues_layers = ['-']

metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9278390522875816 * 100,
                  'mrpcrcc-aucmax_prob': 15.032621362700068,
                  'mrpcrppmax_prob': 0.019659906446238627 * 100,
                  'colarejection-curve-aucmax_prob': 0.9223434004474272 * 100,
                  'colarcc-aucmax_prob': 48.80838215134696,
                  'colarppmax_prob': 0.024288603779875087 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9389000382262997 * 100,
                  'sst2rcc-aucmax_prob': 17.100472209733763,
                  'sst2rppmax_prob': 0.01133199365934405 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric+SN', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{lllllllllllll}
\toprule
     Method & Reg. Type & Dropout Layers &                     UE Score & (MRPC, rejection-curve-auc) &   (MRPC, rcc-auc) &   (MRPC, rpp) & (CoLA, rejection-curve-auc) &   (CoLA, rcc-auc) &   (CoLA, rpp) & (SST2 (10\%), rejection-curve-auc) & (SST2 (10\%), rcc-auc) & (SST2 (10\%), rpp) \\
\midrule
Mahalanobis & metric+SN &              - &         MD &            -13.47$\pm$19.05 & 139.76$\pm$177.63 & 4.03$\pm$2.98 &            -13.34$\pm$18.21 & 366.14$\pm$437.38 & 5.12$\pm$3.19 &                  -14.26$\pm$20.39 &     294.77$\pm$398.06 &     2.61$\pm$2.36 \\
Mahalanobis & metric+SN &              - & sampled\_MD &            -14.92$\pm$15.44 & 158.15$\pm$128.47 & 5.50$\pm$0.94 &            -14.79$\pm$14.32 & 415.05$\pm$302.63 & 6.58$\pm$0.85 &                  -15.25$\pm$16.39 &     320.24$\pm$283.31 &     3.66$\pm$1.83 \\
   Baseline & metric+SN &              - &                     MP &             77.47$\pm$17.86 & 156.49$\pm$166.24 & 6.14

In [166]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/electra-metric/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['nuq']
ues_names = ['NUQ']
ues_layers = ['-']

metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'nuq_epistemic', 'nuq_aleatoric', 'nuq_total']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

In [167]:
# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9278390522875816 * 100,
                  'mrpcrcc-aucmax_prob': 15.032621362700068,
                  'mrpcrppmax_prob': 0.019659906446238627 * 100,
                  'colarejection-curve-aucmax_prob': 0.9223434004474272 * 100,
                  'colarcc-aucmax_prob': 48.80838215134696,
                  'colarppmax_prob': 0.024288603779875087 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9389000382262997 * 100,
                  'sst2rcc-aucmax_prob': 17.100472209733763,
                  'sst2rppmax_prob': 0.01133199365934405 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

In [168]:
miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

In [169]:
miscl_df

Unnamed: 0,Method,Reg. Type,Dropout Layers,UE Score,"(MRPC, rejection-curve-auc)","(MRPC, rcc-auc)","(MRPC, rpp)","(CoLA, rejection-curve-auc)","(CoLA, rcc-auc)","(CoLA, rpp)","(SST2 (10%), rejection-curve-auc)","(SST2 (10%), rcc-auc)","(SST2 (10%), rpp)"
0,NUQ,metric,-,nuq_epistemic,-0.02$\pm$0.36,14.79$\pm$2.42,1.95$\pm$0.29,-0.34$\pm$0.16,54.96$\pm$3.29,2.69$\pm$0.13,0.12$\pm$0.11,14.05$\pm$1.06,1.01$\pm$0.11
1,NUQ,metric,-,nuq_aleatoric,-0.16$\pm$0.40,15.51$\pm$2.55,2.08$\pm$0.32,-0.29$\pm$0.14,54.10$\pm$3.16,2.62$\pm$0.13,0.08$\pm$0.13,14.49$\pm$1.22,1.05$\pm$0.13
2,NUQ,metric,-,nuq_total,-0.07$\pm$0.39,14.98$\pm$2.48,1.98$\pm$0.30,-0.30$\pm$0.14,54.41$\pm$3.17,2.65$\pm$0.13,0.10$\pm$0.13,14.23$\pm$1.19,1.03$\pm$0.13
3,Baseline,metric,-,max_prob,92.92$\pm$0.49,13.71$\pm$3.04,1.78$\pm$0.40,92.22$\pm$0.22,47.59$\pm$4.19,2.33$\pm$0.21,94.10$\pm$0.03,13.44$\pm$0.87,0.93$\pm$0.02


In [170]:
latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('nuq\_aleatoric', 'aleatoric')
latex_table = latex_table.replace('nuq\_epistemic', 'epistemic')
latex_table = latex_table.replace('nuq\_total', 'total')
print(latex_table)

\begin{tabular}{lllllllllllll}
\toprule
  Method & Reg. Type & Dropout Layers &      UE Score & (MRPC, rejection-curve-auc) & (MRPC, rcc-auc) &   (MRPC, rpp) & (CoLA, rejection-curve-auc) & (CoLA, rcc-auc) &   (CoLA, rpp) & (SST2 (10\%), rejection-curve-auc) & (SST2 (10\%), rcc-auc) & (SST2 (10\%), rpp) \\
\midrule
     NUQ &    metric &              - & epistemic &              -0.02$\pm$0.36 &  14.79$\pm$2.42 & 1.95$\pm$0.29 &              -0.34$\pm$0.16 &  54.96$\pm$3.29 & 2.69$\pm$0.13 &                     0.12$\pm$0.11 &        14.05$\pm$1.06 &     1.01$\pm$0.11 \\
     NUQ &    metric &              - & aleatoric &              -0.16$\pm$0.40 &  15.51$\pm$2.55 & 2.08$\pm$0.32 &              -0.29$\pm$0.14 &  54.10$\pm$3.16 & 2.62$\pm$0.13 &                     0.08$\pm$0.13 &        14.49$\pm$1.22 &     1.05$\pm$0.13 \\
     NUQ &    metric &              - &     total &              -0.07$\pm$0.39 &  14.98$\pm$2.48 & 1.98$\pm$0.30 &              -0.30$\pm$0.14 &  54.41$\pm$3.17

In [171]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/electra-metric/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['nuq_sn']
ues_names = ['NUQ']
ues_layers = ['-']

metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'nuq_epistemic', 'nuq_aleatoric', 'nuq_total']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9278390522875816 * 100,
                  'mrpcrcc-aucmax_prob': 15.032621362700068,
                  'mrpcrppmax_prob': 0.019659906446238627 * 100,
                  'colarejection-curve-aucmax_prob': 0.9223434004474272 * 100,
                  'colarcc-aucmax_prob': 48.80838215134696,
                  'colarppmax_prob': 0.024288603779875087 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9389000382262997 * 100,
                  'sst2rcc-aucmax_prob': 17.100472209733763,
                  'sst2rppmax_prob': 0.01133199365934405 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric+SN', baselines_dict, True)
miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)
latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('nuq\_aleatoric', 'aleatoric')
latex_table = latex_table.replace('nuq\_epistemic', 'epistemic')
latex_table = latex_table.replace('nuq\_total', 'total')
print(latex_table)

\begin{tabular}{lllllllllllll}
\toprule
  Method & Reg. Type & Dropout Layers &      UE Score & (MRPC, rejection-curve-auc) & (MRPC, rcc-auc) &   (MRPC, rpp) & (CoLA, rejection-curve-auc) & (CoLA, rcc-auc) &   (CoLA, rpp) & (SST2 (10\%), rejection-curve-auc) & (SST2 (10\%), rcc-auc) & (SST2 (10\%), rpp) \\
\midrule
     NUQ & metric+SN &              - & epistemic &              -0.02$\pm$0.36 &  14.79$\pm$2.42 & 1.95$\pm$0.29 &              -0.34$\pm$0.16 &  54.92$\pm$3.29 & 2.69$\pm$0.13 &                     0.12$\pm$0.11 &        14.05$\pm$1.06 &     1.01$\pm$0.11 \\
     NUQ & metric+SN &              - & aleatoric &              -0.16$\pm$0.40 &  15.51$\pm$2.55 & 2.08$\pm$0.32 &              -0.29$\pm$0.15 &  54.06$\pm$3.17 & 2.62$\pm$0.13 &                     0.08$\pm$0.13 &        14.49$\pm$1.22 &     1.05$\pm$0.13 \\
     NUQ & metric+SN &              - &     total &              -0.07$\pm$0.39 &  14.98$\pm$2.48 & 1.98$\pm$0.30 &              -0.30$\pm$0.14 &  54.37$\pm$3.18

# Final tables

In [59]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_3hyp/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'last', 'last']
metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'bald', 'var_ratio', 'sampled_max_prob', 'variance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{llllllllllll}
\toprule
            Method & Reg. Type &         UE Score & (MRPC, rejection-curve-auc) & (MRPC, rcc-auc) &   (MRPC, rpp) & (CoLA, rejection-curve-auc) & (CoLA, rcc-auc) &   (CoLA, rpp) & (SST2 (10\%), rejection-curve-auc) & (SST2 (10\%), rcc-auc) & (SST2 (10\%), rpp) \\
\midrule
                MC &    metric &             BALD &               0.15$\pm$0.21 &  24.01$\pm$3.24 & 2.53$\pm$0.27 &               0.29$\pm$0.13 &  47.51$\pm$3.18 & 2.31$\pm$0.08 &                     0.35$\pm$0.10 &        14.38$\pm$2.29 &     0.88$\pm$0.10 \\
                MC &    metric &        VR &               0.08$\pm$0.40 &  24.14$\pm$3.27 & 3.06$\pm$0.31 &              -0.41$\pm$0.22 &  69.94$\pm$4.87 & 3.09$\pm$0.15 &                    -0.25$\pm$0.10 &        26.31$\pm$1.87 &     1.66$\pm$0.11 \\
                MC &    metric & SMP &               0.25$\pm$0.24 &  22.26$\pm$3.23 & 2.42$\pm$0.26 &               0.42$\pm$0.08 &  44.34$\pm$1.02 & 2.21$\pm$0.07 &       

In [51]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_3hyp/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha_mc']
ues_names = ['MD']
ues_layers = ['-']

metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['cola']#['mrpc', 'sst2']#['mrpc', 'cola', 'sst2']
types_names = ['cola']#['MRPC', 'SST2 (10%)']#['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('sampled\_MD', 'SMD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{llllll}
\toprule
  Method & Reg. Type &                     UE Score & (cola, rejection-curve-auc) & (cola, rcc-auc) &   (cola, rpp) \\
\midrule
      MD &    metric &         MD &               0.37$\pm$0.12 &  46.30$\pm$2.97 & 2.23$\pm$0.12 \\
      MD &    metric & SMD &              -0.13$\pm$0.12 &  55.56$\pm$2.39 & 2.76$\pm$0.11 \\
Baseline &    metric &                     MP &              91.81$\pm$0.13 &  56.01$\pm$3.06 & 2.82$\pm$0.11 \\
\bottomrule
\end{tabular}



In [52]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_3hyp/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha_sn_mc']
ues_names = ['MD SN (Ours)']
ues_layers = ['-']

metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['cola']#['mrpc', 'sst2']#['mrpc', 'cola', 'sst2']
types_names = ['CoLA']#['MRPC', 'SST2 (10%)']#['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('sampled\_MD', 'SMD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{llllll}
\toprule
      Method & Reg. Type &                     UE Score & (CoLA, rejection-curve-auc) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
MD SN (Ours) &    metric &         MD &              -1.06$\pm$0.21 &   70.29$\pm$3.13 & 3.44$\pm$0.18 \\
MD SN (Ours) &    metric & SMD &              -3.71$\pm$0.38 & 175.69$\pm$11.57 & 6.18$\pm$0.34 \\
    Baseline &    metric &                     MP &              89.46$\pm$0.41 & 148.20$\pm$12.91 & 5.18$\pm$0.47 \\
\bottomrule
\end{tabular}



In [32]:
# MRPC with new pars, maha
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_fix/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha_mc']
ues_names = ['MD']
ues_layers = ['-']

metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc']
types_names = ['MRPC']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('sampled\_MD', 'SMD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{llllll}
\toprule
  Method & Reg. Type &                     UE Score & (MRPC, rejection-curve-auc) & (MRPC, rcc-auc) &   (MRPC, rpp) \\
\midrule
      MD &    metric &         MD &               0.23$\pm$0.36 &  18.38$\pm$3.13 & 2.29$\pm$0.32 \\
      MD &    metric & SMD &              -0.61$\pm$1.02 & 31.14$\pm$11.04 & 3.14$\pm$0.98 \\
Baseline &    metric &                     MP &              91.46$\pm$0.35 &  27.26$\pm$7.23 & 3.14$\pm$0.34 \\
\bottomrule
\end{tabular}



In [33]:
# MRPC with new pars, maha
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_fix6/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha_mc']
ues_names = ['MD']
ues_layers = ['-']

metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc']
types_names = ['MRPC']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('sampled\_MD', 'SMD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{llllll}
\toprule
  Method & Reg. Type &                     UE Score & (MRPC, rejection-curve-auc) & (MRPC, rcc-auc) &   (MRPC, rpp) \\
\midrule
      MD &    metric &         MD &               0.58$\pm$0.40 &  15.95$\pm$2.84 & 2.03$\pm$0.40 \\
      MD &    metric & SMD &               0.22$\pm$0.37 &  23.27$\pm$4.29 & 2.43$\pm$0.36 \\
Baseline &    metric &                     MP &              92.16$\pm$0.52 &  21.00$\pm$4.21 & 2.57$\pm$0.46 \\
\bottomrule
\end{tabular}



In [43]:
# New table for MRPC
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_fix6/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'last', 'last']
metrics = ['rejection-curve-auc', 'rcc-auc', 'rpp']
metric_names = ['rejection-curve-auc', 'rcc-auc', 'rpp']
types = ['mrpc']
types_names = ['MRPC']
ue_methods = ['max_prob', 'bald', 'sampled_max_prob', 'variance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{llllll}
\toprule
            Method & Reg. Type &         UE Score & (MRPC, rejection-curve-auc) & (MRPC, rcc-auc) &   (MRPC, rpp) \\
\midrule
                MC &    metric &             BALD &               0.22$\pm$0.29 &  22.25$\pm$3.29 & 2.42$\pm$0.27 \\
                MC &    metric & SMP &               0.44$\pm$0.37 &  20.17$\pm$3.83 & 2.22$\pm$0.33 \\
                MC &    metric &         PV &               0.27$\pm$0.30 &  21.76$\pm$3.53 & 2.38$\pm$0.29 \\
DDPP (+DPP) (Ours) &    metric &             BALD &              -0.21$\pm$0.65 &  23.46$\pm$6.14 & 2.83$\pm$0.66 \\
DDPP (+DPP) (Ours) &    metric & SMP &              -0.01$\pm$0.50 &  21.88$\pm$3.63 & 2.67$\pm$0.45 \\
DDPP (+DPP) (Ours) &    metric &         PV &              -0.08$\pm$0.74 &  22.22$\pm$6.44 & 2.70$\pm$0.74 \\
DDPP (+OOD) (Ours) &    metric &             BALD &              -0.38$\pm$0.55 &  24.67$\pm$6.18 & 3.05$\pm$0.53 \\
DDPP (+OOD) (Ours) &    metric & SMP &               0.05$\p

# Results for metric after new hypopt

In [104]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_3hyp_new_pars/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'last', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
reg_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_3hyp_new_pars/conll/metric/'
#reg_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_opt_hyp/metric/'

ues = ['all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'last', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']
baselines_dict_ner = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}


reg_df, _ = get_df_ner(reg_path, 'metric', baselines_dict_ner, True)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)
ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

\begin{tabular}{lllllllllllll}
\toprule
            Method & Reg. Type &         UE Score & (MRPC, rcc-auc) &   (MRPC, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) & (SST2 (10\%), rcc-auc) & (SST2 (10\%), rpp) & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) \\
\midrule
                MC &    metric &         PV &  26.14$\pm$4.41 & 2.93$\pm$0.36 &  43.68$\pm$5.31 & 2.15$\pm$0.16 &        14.36$\pm$3.01 &     0.88$\pm$0.14 &                     48.36$\pm$22.50 &                   0.52$\pm$0.21 &                         36.18$\pm$7.92 &                      3.99$\pm$0.59 \\
                MC &    metric &             BALD &  27.39$\pm$4.03 & 3.03$\pm$0.38 &  44.74$\pm$5.74 & 2.18$\pm$0.15 &        15.12$\pm$4.15 &     0.88$\pm$0.15 &                     30.37$\pm$11.86 &                   0.37$\pm$0.14 &                        35.06$\pm$10.54 &                      3.35$\pm$0.57 \\
          

In [106]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_3hyp_new_pars/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha_mc']
ues_names = ['MD']
ues_layers = ['-']

metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df_md = raw_df
miscl_df_md.reset_index(inplace=True, drop=True)

reg_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_3hyp_new_pars/conll/metric/'
#reg_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_opt_hyp/metric/'

ues = ['maha_mc']
ues_names = ['MD']
ues_layers = ['-']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']
baselines_dict_ner = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                      'tokenrcc-aucmax_prob': 230.81709306328332,
                      'tokenrppmax_prob': 1.8920894383333335,
                      'seqrejection-curve-aucmax_prob': 85.96980676333334,
                      'seqrcc-aucmax_prob': 69.59317634405001,
                      'seqrppmax_prob': 7.4613176516666675}


reg_df, _ = get_df_ner(reg_path, 'metric', baselines_dict_ner, True)
ner_df_md = pd.concat([reg_df])
ner_df_md.reset_index(inplace=True, drop=True)

ner_df_md.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df_md = pd.concat([miscl_df_md, ner_df_md], axis=1)
# Remove baseline
miscl_df_md.drop(len(miscl_df_md)-1, axis=0, inplace=True)

raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_3hyp_new_pars/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha_sn_mc']
ues_names = ['MD SN (Ours)']
ues_layers = ['-']

metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2']
types_names = ['MRPC', 'CoLA', 'SST2 (10%)']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']

perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

# copied from table
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df_mdsn = raw_df
miscl_df_mdsn.reset_index(inplace=True, drop=True)

reg_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_3hyp_new_pars/conll/metric_sn/'
#reg_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_opt_hyp/metric/'

ues = ['maha_mc']
ues_names = ['MD SN (Ours)']
ues_layers = ['-']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mahalanobis_distance', 'sampled_mahalanobis_distance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']

reg_df, _ = get_df_ner(reg_path, 'metric', baselines_dict_ner, True)
ner_df_mdsn = pd.concat([reg_df])
ner_df_mdsn.reset_index(inplace=True, drop=True)
ner_df_mdsn.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df_mdsn = pd.concat([miscl_df_mdsn, ner_df_mdsn], axis=1)
miscl_df_mdsn.loc[len(miscl_df_mdsn) - 1]['Method'] = str(miscl_df_mdsn.loc[len(miscl_df_mdsn) - 1]['Method']) + ' SN'
miscl_df_maha = pd.concat([miscl_df_md, miscl_df_mdsn], axis=0)
miscl_df_overall = pd.concat([miscl_df, miscl_df_maha], axis=0)
latex_table = miscl_df_overall.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('sampled\_MD', 'SMD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')

print(latex_table)

\begin{tabular}{lllllllllllll}
\toprule
            Method & Reg. Type &                     UE Score & (MRPC, rcc-auc) &   (MRPC, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) & (SST2 (10\%), rcc-auc) & (SST2 (10\%), rpp) & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) \\
\midrule
                MC &    metric &                     PV &  26.14$\pm$4.41 & 2.93$\pm$0.36 &  43.68$\pm$5.31 & 2.15$\pm$0.16 &        14.36$\pm$3.01 &     0.88$\pm$0.14 &                     48.36$\pm$22.50 &                   0.52$\pm$0.21 &                         36.18$\pm$7.92 &                      3.99$\pm$0.59 \\
                MC &    metric &                         BALD &  27.39$\pm$4.03 & 3.03$\pm$0.38 &  44.74$\pm$5.74 & 2.18$\pm$0.15 &        15.12$\pm$4.15 &     0.88$\pm$0.15 &                     30.37$\pm$11.86 &                   0.37$\pm$0.14 &                        35.06$\pm$10.54 &             

# Mixup

In [114]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['msd/all', 'msd/last']
ues_names = ['MSD-all', 'MSD-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2', '20ng']
types_names = ['MRPC', 'CoLA', 'SST2', '20 NG']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'MSD', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
reg_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra/conll/mixup/'

ues = ['all', 'last']
ues_names = ['MC-all', 'MC-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'MSD', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)
ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllllll}
\toprule
  Method & Reg. Type & UE Score & (MRPC, rcc-auc) &   (MRPC, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (20 NG, rcc-auc) &  (20 NG, rpp) & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) \\
\midrule
 MSD-all &       MSD &    mixup &  12.54$\pm$1.03 & 1.66$\pm$0.14 &  41.25$\pm$2.00 & 2.06$\pm$0.06 &  13.80$\pm$0.82 & 0.96$\pm$0.06 & 502.38$\pm$42.17 & 2.71$\pm$0.16 &                    231.84$\pm$38.89 &                   2.26$\pm$0.34 &                        61.57$\pm$11.44 &                      5.88$\pm$0.73 \\
MSD-last &       MSD &    mixup &  12.

# Mixup with optimal hyperparameters

In [30]:
raw_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_opt/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['msd/all', 'msd/last']
ues_names = ['MSD-all', 'MSD-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'cola', 'sst2', '20ng']
types_names = ['MRPC', 'CoLA', 'SST2', '20NG']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'MSD', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
reg_path = '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_opt/conll/mixup/'

ues = ['all', 'last']
ues_names = ['MC-all', 'MC-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'MSD', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)
ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

FileNotFoundError: [Errno 2] No such file or directory: '/notebook/ue/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_opt/mrpc/msd/all/metrics_rcc-auc.json'

In [13]:
# NER
# Same for metric
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_opt/conll/mixup/'

ues = ['all', 'last']
ues_names = ['MC-all', 'MC-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'MSD', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)
ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = ner_df#pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{llll}
\toprule
(CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) \\
\midrule
                     11.20$\pm$2.58 &                   0.16$\pm$0.03 &                         19.01$\pm$4.28 &                      2.18$\pm$0.40 \\
                     11.75$\pm$2.61 &                   0.17$\pm$0.03 &                         19.48$\pm$4.17 &                      2.26$\pm$0.35 \\
                     15.51$\pm$4.09 &                   0.22$\pm$0.05 &                         20.69$\pm$3.05 &                      2.59$\pm$0.29 \\
\bottomrule
\end{tabular}



In [21]:
# New ner
# NER
# Same for metric
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/workdir/run_calc_ues_metrics/electra_metric_no_sn/conll2003/'

ues = ['mc_all', 'ddpp_dpp_best', 'ddpp_ood_best']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'last', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, '-', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)
#ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = ner_df#pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllll}
\toprule
            Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) \\
\midrule
                MC &         - &         PV &                       7.32$\pm$1.97 &                   0.11$\pm$0.02 &                         16.23$\pm$2.57 &                      2.02$\pm$0.22 \\
                MC &         - &             BALD &                       7.50$\pm$1.91 &                   0.11$\pm$0.02 &                         16.40$\pm$2.43 &                      2.02$\pm$0.20 \\
                MC &         - & SMP &                       6.93$\pm$1.68 &           

In [29]:
#Maha
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/workdir/run_calc_ues_metrics/electra_metric_sn/conll2003/'

ues = ['mahalanobis']
ues_names = ['MC']
ues_layers = ['all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mahalanobis_distance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, '-', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)
#ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = ner_df#pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllll}
\toprule
       Method & Reg. Type &             UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) \\
\midrule
           MC &         - & MD &                       7.91$\pm$1.38 &                   0.12$\pm$0.02 &                         18.37$\pm$2.80 &                      2.03$\pm$0.39 \\
SR (baseline) &         - &             MP &                       7.85$\pm$1.68 &                   0.12$\pm$0.02 &                         23.17$\pm$3.50 &                      2.75$\pm$0.37 \\
\bottomrule
\end{tabular}



In [52]:
raw_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fixed_omega_no_mixup/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['msd/all', 'msd/last']
ues_names = ['MSD-all', 'MSD-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'MSD', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
#reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fixed_omega_sn/conll/mixup/'
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fixed_omega_no_mixup/conll/mixup/'

ues = ['all', 'last']
ues_names = ['MC-all', 'MC-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'MSD', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)

miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([ner_df, miscl_df], axis=1)

#ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
#miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type & UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
       MC-all &       MSD &    DS &                       9.63$\pm$1.73 &                   0.14$\pm$0.02 &                         17.41$\pm$3.15 &                      2.05$\pm$0.22 &  12.15$\pm$1.16 & 1.57$\pm$0.17 &  11.42$\pm$1.25 & 0.80$\pm$0.09 &  38.14$\pm$0.82 & 1.87$\pm$0.10 \\
      MC-last &       MSD &    DS &                       9.98$\pm$1.75 &                   0.14$\pm$0.

# DistilBERT

In [7]:
raw_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/distilbert/metric_171/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'all', 'all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
#reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fixed_omega_sn/conll/mixup/'
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/distilbert/metric_171/conll/'

ues = ['all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'all', 'all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'metric', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)

miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([ner_df, miscl_df], axis=1)

#ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
#miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllll}
\toprule
            Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
                MC &    metric &         PV &                       6.71$\pm$1.50 &                   0.09$\pm$0.02 &                         19.75$\pm$4.18 &                      1.94$\pm$0.42 &  25.64$\pm$2.39 & 2.90$\pm$0.14 &  45.73$\pm$3.12 & 2.44$\pm$0.13 &  158.58$\pm$7.82 & 5.49$\pm$0.27 \\
                MC &    metric &             BALD &                       6.83

In [10]:
raw_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/distilbert/metric_171/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha']
ues_names = ['MD (Ours)']
ues_layers = ['all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'mahalanobis_distance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
#reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fixed_omega_sn/conll/mixup/'
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/distilbert/metric_171/conll/'

ues = ['maha']
ues_names = ['MD (Ours)']
ues_layers = ['all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mahalanobis_distance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'metric', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)

miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([ner_df, miscl_df], axis=1)

#ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
#miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &             UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
    MD (Ours) &    metric & MD &                       6.87$\pm$1.71 &                   0.10$\pm$0.02 &                         20.95$\pm$2.83 &                      2.11$\pm$0.18 &  26.98$\pm$1.77 & 3.30$\pm$0.26 &  31.31$\pm$3.44 & 1.96$\pm$0.19 & 107.63$\pm$3.63 & 4.48$\pm$0.16 \\
SR (baseline) &    metric &             MP &                       6.72$\pm$1.70 &            

In [68]:
# DE
raw_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/distilbert_ensemble/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['de']
ues_names = ['Deep Ensemble']
ues_layers = ['all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, '-', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
#reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fixed_omega_sn/conll/mixup/'
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/distilbert_ensemble/conll2003/'

ues = ['de']
ues_names = ['Deep Ensemble']
ues_layers = ['all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, '-', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)

miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([ner_df, miscl_df], axis=1)

#ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
#miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
Deep Ensemble &         - &         PV &                       6.30$\pm$2.54 &                   0.08$\pm$0.03 &                         17.17$\pm$2.38 &                      1.59$\pm$0.30 &  34.20$\pm$3.28 & 3.36$\pm$0.08 &  29.69$\pm$4.80 & 1.61$\pm$0.20 & 122.00$\pm$12.58 & 4.40$\pm$0.27 \\
Deep Ensemble &         - &             BALD &                       5.89$\pm$1.76 &    

# Metric ELECTRA

In [18]:
# New metric loss
raw_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_param_last_fix_171/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'all', 'all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
#types = ['mrpc', 'sst2']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
#reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fixed_omega_sn/conll/mixup/'
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_param_last_fix_conll_171/conll/metric/'

ues = ['all', 'dpp', 'dpp_with_ood']
ues_names = ['MC', 'DDPP (+DPP) (Ours)', 'DDPP (+OOD) (Ours)']
ues_layers = ['all', 'all', 'all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'variance', 'bald', 'sampled_max_prob']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'metric', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)

miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([ner_df, miscl_df], axis=1)

#ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
#miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllll}
\toprule
            Method & Reg. Type &         UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
                MC &    metric &         PV &                       7.04$\pm$2.09 &                   0.10$\pm$0.03 &                         16.90$\pm$4.14 &                      2.00$\pm$0.49 &  12.81$\pm$2.48 & 1.56$\pm$0.23 &  13.71$\pm$2.29 & 0.85$\pm$0.10 &  47.26$\pm$2.82 & 2.29$\pm$0.10 \\
                MC &    metric &             BALD &                       7.16$\

In [20]:
raw_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_param_last_fix_171/'
#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['maha_sn']
ues_names = ['MD SN (Ours)']
ues_layers = ['all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'mahalanobis_distance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'metric', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
#reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fixed_omega_sn/conll/mixup/'
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/metric_opt_electra_param_last_fix_conll_171/conll/metric_sn/'

ues = ['maha']
ues_names = ['MD SN (Ours)']
ues_layers = ['all']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mahalanobis_distance']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'metric', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)

miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([ner_df, miscl_df], axis=1)

#ner_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
#miscl_df = pd.concat([miscl_df, ner_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type &             UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
 MD SN (Ours) &    metric & MD &                       6.53$\pm$1.50 &                   0.10$\pm$0.02 &                         16.50$\pm$3.05 &                      1.79$\pm$0.31 &  14.46$\pm$1.21 & 1.96$\pm$0.14 &  11.67$\pm$1.07 & 0.82$\pm$0.06 &  43.50$\pm$2.27 & 2.07$\pm$0.11 \\
SR (baseline) &    metric &             MP &                       7.17$\pm$1.19 &            

# MSD Distilbert & Electra & Deberta

In [19]:
raw_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fix_repro_fix/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['msd/all', 'msd/last']
ues_names = ['MSD-all', 'MSD-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'MSD', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_electra_fix_repro_fix/conll/mixup/'

ues = ['all', 'last']
ues_names = ['MSD-all', 'MSD-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'MSD', baselines_dict, 1)

print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)
miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([ner_df, miscl_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type & UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) & (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
      MSD-all &       MSD &    DS &                      10.58$\pm$2.52 &                   0.15$\pm$0.03 &                         18.47$\pm$3.25 &                      1.99$\pm$0.30 &  16.38$\pm$8.33 & 2.27$\pm$1.20 &  11.17$\pm$0.94 & 0.78$\pm$0.06 &  39.21$\pm$1.99 & 1.90$\pm$0.11 \\
     MSD-last &       MSD &    DS &                      11.03$\pm$2.44 &                   0.15$\pm$0.

In [20]:
raw_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_distilbert_fix_repro_fix/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['msd/all', 'msd/last']
ues_names = ['MSD-all', 'MSD-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'MSD', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_distilbert_fix_repro_fix/conll/mixup/'

ues = ['all', 'last']
ues_names = ['MSD-all', 'MSD-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
reg_df, baselines_dict = get_df_ner(reg_path, 'MSD', baselines_dict, 1)
print(baselines_dict)
ner_df = pd.concat([reg_df])
ner_df.reset_index(inplace=True, drop=True)
miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([ner_df, miscl_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllllllll}
\toprule
       Method & Reg. Type & UE Score & (CoNLL-2003 (token level), rcc-auc) & (CoNLL-2003 (token level), rpp) & (CoNLL-2003 (sequence level), rcc-auc) & (CoNLL-2003 (sequence level), rpp) & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
      MSD-all &       MSD &    DS &                      10.25$\pm$4.14 &                   0.14$\pm$0.05 &                         19.78$\pm$5.61 &                      1.73$\pm$0.34 &  58.41$\pm$6.88 & 6.87$\pm$1.16 &  29.97$\pm$2.81 & 1.84$\pm$0.19 & 118.91$\pm$27.91 & 4.97$\pm$1.26 \\
     MSD-last &       MSD &    DS &                      10.41$\pm$4.15 &                   0.14$\pm$

In [21]:
raw_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_deberta_fix_repro_fix/'

#reg_path = '/data/gkuzmin/uncertainty-estimation/workdir/run_calc_ues_metrics/conll2003_electra_reg_01_fix/'
ues = ['msd/all', 'msd/last']
ues_names = ['MSD-all', 'MSD-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['mrpc', 'sst2', 'cola']
types_names = ['MRPC', 'SST2', 'CoLA']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


# copied from table
baselines_dict = {'mrpcrejection-curve-aucmax_prob': 0.9208435457516339 * 100,
                  'mrpcrcc-aucmax_prob': 23.279293481630972,
                  'mrpcrppmax_prob': 0.026788574907087016 * 100,
                  'colarejection-curve-aucmax_prob': 0.9203619367209971 * 100,
                  'colarcc-aucmax_prob': 59.03726591032054,
                  'colarppmax_prob': 0.02631936969193335 * 100,
                  'sst2rejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  'sst2rcc-aucmax_prob': 18.067838464295736,
                  'sst2rppmax_prob': 0.012349462026204303 * 100,
                  '20ngrejection-curve-aucmax_prob': 0.9379778287461774 * 100,
                  '20ngrcc-aucmax_prob': 18.067838464295736,
                  '20ngrppmax_prob': 0.012349462026204303 * 100}
raw_df, baselines_dict = get_df(raw_path, 'MSD', baselines_dict, True)

miscl_df = raw_df
miscl_df.reset_index(inplace=True, drop=True)

# NER
# Same for metric
reg_path = '/home/jovyan/uncertainty-estimation/workdir/run_calc_ues_metrics/mixup_deberta_fix_repro_fix/conll/mixup/'

ues = ['all', 'last']
ues_names = ['MSD-all', 'MSD-last']
ues_layers = ['all', 'last']
metrics = ['rcc-auc', 'rpp']
metric_names = ['rcc-auc', 'rpp']
types = ['token', 'seq']
ue_methods = ['max_prob', 'mixup']
perc_metrics = ['rejection-curve-auc', 'rpp']
diff_metrics = ['rejection-curve-auc', 'roc-auc']


baselines_dict = {'tokenrejection-curve-aucmax_prob': 93.184446145,
                  'tokenrcc-aucmax_prob': 230.81709306328332,
                  'tokenrppmax_prob': 1.8920894383333335,
                  'seqrejection-curve-aucmax_prob': 85.96980676333334,
                  'seqrcc-aucmax_prob': 69.59317634405001,
                  'seqrppmax_prob': 7.4613176516666675}
#reg_df, baselines_dict = get_df_ner(reg_path, 'MSD', baselines_dict, 1)
print(baselines_dict)
#ner_df = pd.concat([reg_df])
#ner_df.reset_index(inplace=True, drop=True)
#miscl_df.drop(['Method', 'Reg. Type', 'UE Score'], axis=1, inplace=True)
miscl_df = pd.concat([miscl_df], axis=1)



latex_table = miscl_df.to_latex(bold_rows=False, index=False)
latex_table = latex_table.replace('\\$\\textbackslash pm\\$', '$\pm$')
latex_table = latex_table.replace('variance', 'PV')
latex_table = latex_table.replace('var\_ratio', 'VR')
latex_table = latex_table.replace('sampled\_entropy', 'SE')
latex_table = latex_table.replace('sampled\_max\_prob', 'SMP')
latex_table = latex_table.replace('mahalanobis\_distance', 'MD')
latex_table = latex_table.replace('max\_prob', 'MP')
latex_table = latex_table.replace('bald', 'BALD')
latex_table = latex_table.replace('mixup', 'DS')
print(latex_table)

{'tokenrejection-curve-aucmax_prob': 93.184446145, 'tokenrcc-aucmax_prob': 230.81709306328332, 'tokenrppmax_prob': 1.8920894383333335, 'seqrejection-curve-aucmax_prob': 85.96980676333334, 'seqrcc-aucmax_prob': 69.59317634405001, 'seqrppmax_prob': 7.4613176516666675}
\begin{tabular}{lllllllll}
\toprule
  Method & Reg. Type & UE Score & (MRPC, rcc-auc) &   (MRPC, rpp) & (SST2, rcc-auc) &   (SST2, rpp) &  (CoLA, rcc-auc) &   (CoLA, rpp) \\
\midrule
 MSD-all &       MSD &    DS &  13.08$\pm$1.25 & 1.88$\pm$0.19 &  11.66$\pm$2.60 & 0.81$\pm$0.12 &   53.42$\pm$4.73 & 2.61$\pm$0.20 \\
MSD-last &       MSD &    DS &  13.08$\pm$1.25 & 1.88$\pm$0.19 &  11.66$\pm$2.60 & 0.81$\pm$0.12 &   53.42$\pm$4.73 & 2.61$\pm$0.20 \\
Baseline &       MSD & MP &  18.25$\pm$1.39 & 2.59$\pm$0.16 &  22.73$\pm$4.64 & 1.43$\pm$0.29 & 103.47$\pm$13.84 & 4.30$\pm$0.39 \\
\bottomrule
\end{tabular}

