In [15]:
# Like common file, but with another table structure

In [123]:
import yaml
import os
from yaml import Loader as Loader
from pathlib import Path
import pandas as pd
import numpy as np
import json
from sklearn.metrics import roc_auc_score

from analyze_results import (
    extract_result,
    aggregate_runs,
    from_model_outputs_calc_rcc_auc,
)
from analyze_results import (
    format_results2,
    improvement_over_baseline,
    from_model_outputs_calc_pr_auc,
    from_model_outputs_calc_rpp,
    from_model_outputs_calc_roc_auc,
    from_model_outputs_calc_arc_auc
)

from utils.utils_wandb import init_wandb, wandb
from ue4nlp.ue_scores import *


In [124]:
def choose_metric(metric_type):
    if metric_type  == "rejection-curve-auc":
        return from_model_outputs_calc_arc_auc
    if metric_type == "roc-auc":
        return from_model_outputs_calc_roc_auc
    elif metric_type == "rcc-auc":
        return from_model_outputs_calc_rcc_auc

    elif metric_type == "pr-auc":
        return from_model_outputs_calc_pr_auc

    elif metric_type == "rpp":
        return from_model_outputs_calc_rpp

    else:
        raise ValueError("Wrong metric type!")


def get_one_table(runs_dir, metric_types=["rejection-curve-auc", "roc-auc", "rcc-auc", "pr-auc", "rpp"], baseline=None):
    default_methods = {
        "bald": bald,
        "sampled_max_prob": sampled_max_prob,
        "variance": probability_variance,
        "var.ratio": var_ratio,
        #"sampled_entropy": mean_entropy,
    }

    table = []
    for metric_type in metric_types:
        metric = choose_metric(metric_type=metric_type)

        agg_res = aggregate_runs(
            runs_dir, methods=default_methods, metric=metric
        )

        if agg_res.empty:
            print("Broken\n")
            continue

        if metric_type == "rcc-auc":
            final_score = format_results2(agg_res, percents=False)
        elif metric_type == "rpp":
            final_score = format_results2(agg_res, percents=True)
        else:
            final_score = improvement_over_baseline(agg_res, baseline_col="max_prob", baseline=baseline, metric=metric_type, percents=True, subtract=True)
        table.append(final_score)
    res_table = pd.concat(table, axis=1)
    res_table.columns = metric_types
    # fix for rcc-auc and rpp
    if 'baseline (max_prob)' not in res_table.index:
        res_table.loc['baseline (max_prob)'] = 0
    for metric in ['rcc-auc', 'rpp']:
        try:
            res_table[metric].loc['baseline (max_prob)'] = res_table[metric].loc['max_prob']
        except:
            pass
    try:
        res_table = res_table.drop(['max_prob', 'count'])
    except:
        res_table = res_table.drop(['max_prob'])
    return res_table


def collect_tables(run_dirs, names, metric_types=["rejection-curve-auc", "roc-auc", "rcc-auc", "pr-auc", "rpp"], baseline=None):
    all_tables = []
    for run_dir, name in zip(run_dirs, names):
        buf_table = get_one_table(run_dir, metric_types, baseline)
        #print(buf_table)
        # add name to index
        indices = [(name, ind) for ind in list(buf_table.index)]
        baseline_name = 'baseline|'+'|'.join(name.split('|')[2:])
        buf_table.loc[baseline_name] = buf_table.loc['baseline (max_prob)']
        # add reindex
        indices = indices + [(baseline_name, 'max_prob')]
        
        index = pd.MultiIndex.from_tuples(indices, names=['Method', 'UE Score'])
        buf_table.index = index
        buf_table.drop((name, 'baseline (max_prob)'), inplace=True)
        # add buf_table to final_table
        all_tables.append(buf_table)
    return pd.concat(all_tables)


def collect_datasets(runs_dirs, names, dataset_names, metric_types=["rejection-curve-auc", "roc-auc", "rcc-auc", "pr-auc", "rpp"], baselines={}):
    all_tables = []
    for run_dir, dataset_name in zip(runs_dirs, dataset_names):
        try:
            dataset_table = collect_tables(run_dir, names, metric_types, baselines.get(dataset_name, None))
            columns = pd.MultiIndex.from_tuples([(dataset_name, ind) for ind in list(dataset_table.columns)])
            dataset_table.columns = columns
            all_tables.append(dataset_table)
        except:
            print(f'empty dir {run_dir}')
    return pd.concat(all_tables, axis=1)

In [125]:
import os 

default_methods = {
        "bald": bald,
        "sampled_max_prob": sampled_max_prob,
        "variance": probability_variance,
    }

metric_types=["rejection-curve-auc", "rcc-auc", 'rpp']
methods = ['mahalanobis']
regs = ['raw']
dataset_names = ['MRPC', 'CoLA', 'SST-2']
dataset_fnames = ['mrpc', 'cola', 'sst2']
names = []
tables = []
raw_baselines = {}
for ds_fname, ds_name in zip(dataset_fnames, dataset_names):
    model_series_dir = f'../workdir/final_res/run_glue_for_model_series/electra-raw/{ds_fname}/0.0/ddpp_dpp/'
    table = []
    for metric_type in metric_types:
        metric = choose_metric(metric_type=metric_type)

        agg_res = aggregate_runs(
            model_series_dir, methods=default_methods, metric=metric
        )
        mean_res = agg_res.mean(axis=0)
        final_results = mean_res.T
        table.append(final_results.loc[['max_prob']])
    res_table = pd.concat(table, axis=1)
    res_table.columns = metric_types
    raw_baselines[ds_name] = res_table#{k:v for k,v in zip(res_table.columns.values.tolist(), res_table.values[0].tolist())}

In [126]:
raw_baselines

{'MRPC':           rejection-curve-auc    rcc-auc       rpp
 max_prob             0.926328  17.186488  0.021353,
 'CoLA':           rejection-curve-auc    rcc-auc       rpp
 max_prob             0.919459  61.845381  0.027353,
 'SST-2':           rejection-curve-auc    rcc-auc       rpp
 max_prob             0.937949  18.193756  0.012302}

In [127]:
import json

with open('../../glue_new_baselines.json') as json_file:
    raw_baselines_ = json.load(json_file)

In [128]:
for k in raw_baselines_:
    for k1 in raw_baselines_[k]:
         raw_baselines_[k][k1] = [float(raw_baselines_[k][k1])]

In [129]:
raw_baselines = {}
raw_baselines['MRPC'] = pd.DataFrame.from_dict(raw_baselines_['MRPC'])
raw_baselines['CoLA'] = pd.DataFrame.from_dict(raw_baselines_['CoLA'])
raw_baselines['SST-2'] = pd.DataFrame.from_dict(raw_baselines_['SST2 (10%)'])

# MC-DPP all

In [130]:
import os 

#metric_types=["rejection-curve-auc", "rcc-auc", 'rpp']
metric_types=["rcc-auc", 'rpp']

methods = ['ddpp_dpp', 'ddpp_ood']
regs = ['raw', 'reg']

dataset_names = ['MRPC', 'CoLA', 'SST-2']
dataset_fnames = ['mrpc', 'cola', 'sst2']
names = []
tables = []
baselines = []
for method in methods:
    for reg in regs:
        run_dirs = []
        names = [f'{method}|{reg}']
        for name in dataset_fnames:
            model_series_dir = f'../workdir/final_res/run_glue_for_model_series/electra-{reg}/{name}/0.0/{method}'
            #print(model_series_dir)
            run_dirs.append([model_series_dir])
        try:
            res_df = collect_datasets(run_dirs, names, dataset_names, metric_types=metric_types, baselines=raw_baselines)
            baselines.append(res_df.iloc[-1:])
            tables.append(res_df.iloc[:-1])
        except:
            print(f'Not exists one of this dirs: {run_dirs}')

In [133]:
def preproc_regs(x):
    regs = x.split('|')
    return '-' if regs[-1] == 'raw' else 'CER'

def preproc_methods(x):
    meth = x.split('|')[0]
    if meth == 'ddpp_ood':
        return 'DDPP (+OOD) (Ours)'
    if meth == 'ddpp_dpp':
        return 'DDPP (+DPP) (Ours)'
    return 'SR'

def preproc_ues(x):
    if x == 'bald':
        return 'BALD'
    if x == 'sampled_max_prob':
        return 'SMP'
    if x == 'variance':
        return 'PV'
    if x == 'var.ratio':
        return 'VR'
    return 'MP'

table_dpp_reg = pd.concat(tables)#, pd.concat(baselines[-2:])])
table_dpp_reg = table_dpp_reg.reset_index()
table_dpp_reg['Reg. Type'] = table_dpp_reg.Method.apply(lambda x: preproc_regs(x))
table_dpp_reg['Method'] = table_dpp_reg['Method'].apply(lambda x: preproc_methods(x))
table_dpp_reg['UE Score'] = table_dpp_reg['UE Score'].apply(lambda x: preproc_ues(x))
table_dpp_reg = table_dpp_reg[list(table_dpp_reg.columns[:1]) + list(table_dpp_reg.columns[-1:]) + list(table_dpp_reg.columns[1:-1])]

In [135]:
table_dpp_reg

Unnamed: 0_level_0,Method,Reg. Type,UE Score,MRPC,MRPC,CoLA,CoLA,SST-2,SST-2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,rcc-auc,rpp,rcc-auc,rpp,rcc-auc,rpp
0,DDPP (+DPP) (Ours),-,BALD,19.44±4.31,2.28±0.38,61.24±9.04,2.85±0.19,17.27±3.39,1.05±0.17
1,DDPP (+DPP) (Ours),-,SMP,17.84±4.71,2.17±0.49,60.16±10.10,2.78±0.24,18.40±3.09,1.24±0.23
2,DDPP (+DPP) (Ours),-,PV,18.77±4.90,2.23±0.44,62.11±11.04,2.81±0.23,17.04±2.72,1.14±0.21
3,DDPP (+DPP) (Ours),-,VR,41.84±5.68,5.39±0.50,145.17±6.56,5.27±0.29,49.03±5.28,3.28±0.36
4,DDPP (+DPP) (Ours),CER,BALD,20.61±5.78,2.37±0.62,54.93±6.30,2.72±0.30,18.22±3.60,1.16±0.31
5,DDPP (+DPP) (Ours),CER,SMP,19.69±4.13,2.42±0.38,52.62±5.76,2.60±0.30,17.35±3.15,1.16±0.26
6,DDPP (+DPP) (Ours),CER,PV,18.27±5.25,2.24±0.54,54.13±5.82,2.68±0.30,17.59±3.80,1.14±0.31
7,DDPP (+DPP) (Ours),CER,VR,43.19±5.45,5.68±0.34,138.98±10.07,5.47±0.30,47.68±3.53,3.60±0.24
8,DDPP (+OOD) (Ours),-,BALD,21.92±4.78,2.50±0.58,82.59±5.64,2.96±0.15,25.01±6.83,1.44±0.35
9,DDPP (+OOD) (Ours),-,SMP,19.11±6.58,2.24±0.29,78.49±9.84,2.87±0.24,17.14±4.61,1.06±0.22


In [136]:
print(str(table_dpp_reg.to_latex(index=False)).replace('±', '$\pm$'))

\begin{tabular}{lllllllll}
\toprule
            Method & Reg. Type & UE Score & \multicolumn{2}{l}{MRPC} & \multicolumn{2}{l}{CoLA} & \multicolumn{2}{l}{SST-2} \\
                   &    rcc-auc &       rpp &      rcc-auc &       rpp &    rcc-auc &       rpp \\
\midrule
DDPP (+DPP) (Ours) &         - &     BALD & 19.44$\pm$4.31 & 2.28$\pm$0.38 &   61.24$\pm$9.04 & 2.85$\pm$0.19 & 17.27$\pm$3.39 & 1.05$\pm$0.17 \\
DDPP (+DPP) (Ours) &         - &      SMP & 17.84$\pm$4.71 & 2.17$\pm$0.49 &  60.16$\pm$10.10 & 2.78$\pm$0.24 & 18.40$\pm$3.09 & 1.24$\pm$0.23 \\
DDPP (+DPP) (Ours) &         - &       PV & 18.77$\pm$4.90 & 2.23$\pm$0.44 &  62.11$\pm$11.04 & 2.81$\pm$0.23 & 17.04$\pm$2.72 & 1.14$\pm$0.21 \\
DDPP (+DPP) (Ours) &         - &       VR & 41.84$\pm$5.68 & 5.39$\pm$0.50 &  145.17$\pm$6.56 & 5.27$\pm$0.29 & 49.03$\pm$5.28 & 3.28$\pm$0.36 \\
DDPP (+DPP) (Ours) &       CER &     BALD & 20.61$\pm$5.78 & 2.37$\pm$0.62 &   54.93$\pm$6.30 & 2.72$\pm$0.30 & 18.22$\pm$3.60 & 1.16$\pm$0.31 \\

# DPP calibration

In [109]:
import os 

#metric_types=["rejection-curve-auc", "rcc-auc", 'rpp']
metric_types=["rcc-auc", 'rpp']
methods = ['ddpp_dpp', 'ddpp_ood']
regs = ['raw']#, 'reg']

dataset_names = ['MRPC', 'CoLA', 'SST-2']
dataset_fnames = ['mrpc', 'cola', 'sst2']
names = []
tables = []
baselines = []
for method in methods:
    for reg in regs:
        run_dirs = []
        names = [f'{method}|{reg}']
        for name in dataset_fnames:
            model_series_dir = f'../workdir/final_res/run_glue_for_model_series/electra-{reg}/{name}/0.1/{method}'
            #print(model_series_dir)
            run_dirs.append([model_series_dir])
        try:
            res_df = collect_datasets(run_dirs, names, dataset_names, metric_types=metric_types, baselines=raw_baselines)
            baselines.append(res_df.iloc[-1:])
            tables.append(res_df.iloc[:-1])
        except:
            print(f'Not exists one of this dirs: {run_dirs}')

In [110]:
def preproc_regs(x):
    regs = x.split('|')
    return '-' if regs[-1] == 'raw' else 'CER'

def preproc_methods(x):
    meth = x.split('|')[0]
    if meth == 'ddpp_ood':
        return 'DDPP (+OOD) (Ours)'
    if meth == 'ddpp_dpp':
        return 'DDPP (+DPP) (Ours)'
    return 'SR'

def preproc_ues(x):
    if x == 'bald':
        return 'BALD'
    if x == 'sampled_max_prob':
        return 'SMP'
    if x == 'variance':
        return 'PV'
    return 'MP'

table_dpp_cal = pd.concat(tables)#, pd.concat(baselines[-2:])])
table_dpp_cal = table_dpp_cal.reset_index()
table_dpp_cal['Calibr. Dataset'] = 'val.'#table_dpp_cal.Method.apply(lambda x: preproc_regs(x))
table_dpp_cal['Method'] = table_dpp_cal['Method'].apply(lambda x: preproc_methods(x))
table_dpp_cal['UE Score'] = table_dpp_cal['UE Score'].apply(lambda x: preproc_ues(x))
table_dpp_cal_val = table_dpp_cal[list(table_dpp_cal.columns[:1]) + list(table_dpp_cal.columns[-1:]) + list(table_dpp_cal.columns[1:-1])]

In [111]:
table_dpp_cal_val

Unnamed: 0_level_0,Method,Calibr. Dataset,UE Score,MRPC,MRPC,CoLA,CoLA,SST-2,SST-2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,rcc-auc,rpp,rcc-auc,rpp,rcc-auc,rpp
0,DDPP (+DPP) (Ours),val.,BALD,29.07±5.64,3.14±0.71,65.06±12.18,3.15±0.22,21.78±8.36,1.26±0.36
1,DDPP (+DPP) (Ours),val.,SMP,25.85±6.97,2.89±0.50,61.67±10.68,2.78±0.20,19.76±6.17,1.28±0.35
2,DDPP (+DPP) (Ours),val.,PV,26.50±6.11,2.91±0.47,61.79±10.76,2.95±0.19,19.28±7.31,1.13±0.27
3,DDPP (+OOD) (Ours),val.,BALD,36.18±6.91,3.70±0.64,83.14±11.21,3.03±0.46,26.80±6.62,1.56±0.29
4,DDPP (+OOD) (Ours),val.,SMP,24.76±5.21,2.71±0.32,76.92±10.19,2.80±0.30,18.65±6.61,1.17±0.32
5,DDPP (+OOD) (Ours),val.,PV,27.74±6.86,2.94±0.57,78.99±9.69,2.86±0.33,22.99±5.80,1.37±0.28


In [112]:
table_dpp_cal_train = table_dpp_reg[table_dpp_reg['Reg. Type'] == '-']
table_dpp_cal_train.drop(columns = ['Reg. Type'], inplace=True)
table_dpp_cal_train['Calibr. Dataset'] = 'train'
table_dpp_cal_train = table_dpp_cal_train[table_dpp_cal_val.columns]

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [113]:
table_dpp_cal_full = pd.concat([table_dpp_cal_train.iloc[:3], table_dpp_cal_val.iloc[:3], table_dpp_cal_train.iloc[3:], table_dpp_cal_val.iloc[3:]])

In [114]:
table_dpp_cal_full

Unnamed: 0_level_0,Method,Calibr. Dataset,UE Score,MRPC,MRPC,CoLA,CoLA,SST-2,SST-2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,rcc-auc,rpp,rcc-auc,rpp,rcc-auc,rpp
0,DDPP (+DPP) (Ours),train,BALD,19.44±4.31,2.28±0.38,61.24±9.04,2.85±0.19,17.27±3.39,1.05±0.17
1,DDPP (+DPP) (Ours),train,SMP,17.84±4.71,2.17±0.49,60.16±10.10,2.78±0.24,18.40±3.09,1.24±0.23
2,DDPP (+DPP) (Ours),train,PV,18.77±4.90,2.23±0.44,62.11±11.04,2.81±0.23,17.04±2.72,1.14±0.21
0,DDPP (+DPP) (Ours),val.,BALD,29.07±5.64,3.14±0.71,65.06±12.18,3.15±0.22,21.78±8.36,1.26±0.36
1,DDPP (+DPP) (Ours),val.,SMP,25.85±6.97,2.89±0.50,61.67±10.68,2.78±0.20,19.76±6.17,1.28±0.35
2,DDPP (+DPP) (Ours),val.,PV,26.50±6.11,2.91±0.47,61.79±10.76,2.95±0.19,19.28±7.31,1.13±0.27
6,DDPP (+OOD) (Ours),train,BALD,21.92±4.78,2.50±0.58,82.59±5.64,2.96±0.15,25.01±6.83,1.44±0.35
7,DDPP (+OOD) (Ours),train,SMP,19.11±6.58,2.24±0.29,78.49±9.84,2.87±0.24,17.14±4.61,1.06±0.22
8,DDPP (+OOD) (Ours),train,PV,19.79±5.55,2.35±0.53,80.78±6.16,2.92±0.18,23.80±6.49,1.36±0.33
3,DDPP (+OOD) (Ours),val.,BALD,36.18±6.91,3.70±0.64,83.14±11.21,3.03±0.46,26.80±6.62,1.56±0.29


In [115]:
print(str(table_dpp_cal_full.to_latex(index=False)).replace('±', '$\pm$'))

\begin{tabular}{lllllllll}
\toprule
            Method & Calibr. Dataset & UE Score & \multicolumn{2}{l}{MRPC} & \multicolumn{2}{l}{CoLA} & \multicolumn{2}{l}{SST-2} \\
                   &    rcc-auc &       rpp &     rcc-auc &       rpp &    rcc-auc &       rpp \\
\midrule
DDPP (+DPP) (Ours) &           train &     BALD & 19.44$\pm$4.31 & 2.28$\pm$0.38 &  61.24$\pm$9.04 & 2.85$\pm$0.19 & 17.27$\pm$3.39 & 1.05$\pm$0.17 \\
DDPP (+DPP) (Ours) &           train &      SMP & 17.84$\pm$4.71 & 2.17$\pm$0.49 & 60.16$\pm$10.10 & 2.78$\pm$0.24 & 18.40$\pm$3.09 & 1.24$\pm$0.23 \\
DDPP (+DPP) (Ours) &           train &       PV & 18.77$\pm$4.90 & 2.23$\pm$0.44 & 62.11$\pm$11.04 & 2.81$\pm$0.23 & 17.04$\pm$2.72 & 1.14$\pm$0.21 \\
DDPP (+DPP) (Ours) &            val. &     BALD & 29.07$\pm$5.64 & 3.14$\pm$0.71 & 65.06$\pm$12.18 & 3.15$\pm$0.22 & 21.78$\pm$8.36 & 1.26$\pm$0.36 \\
DDPP (+DPP) (Ours) &            val. &      SMP & 25.85$\pm$6.97 & 2.89$\pm$0.50 & 61.67$\pm$10.68 & 2.78$\pm$0.20 & 19.

# DPP Raw

In [116]:
import os 

#metric_types=["rejection-curve-auc", "rcc-auc", 'rpp']
metric_types=["rcc-auc", 'rpp']
methods = ['ddpp_dpp']
regs = ['raw']#, 'reg']

dataset_names = ['MRPC', 'CoLA', 'SST-2']
dataset_fnames = ['mrpc', 'cola', 'sst2']
names = []
tables = []
baselines = []
for method in methods:
    for reg in regs:
        run_dirs = []
        names = [f'raw_{method}|{reg}']
        for name in dataset_fnames:
            model_series_dir = f'../workdir/final_res/run_glue_for_model_series/electra-{reg}/{name}/0.0/raw_{method}'
            #print(model_series_dir)
            run_dirs.append([model_series_dir])
        try:
            res_df = collect_datasets(run_dirs, names, dataset_names, metric_types=metric_types, baselines=raw_baselines)
            baselines.append(res_df.iloc[-1:])
            tables.append(res_df.iloc[:-1])
        except:
            print(f'Not exists one of this dirs: {run_dirs}')

In [117]:
def preproc_regs(x):
    regs = x.split('|')
    return '-' if regs[-1] == 'raw' else 'CER'

def preproc_methods(x):
    meth = x.split('|')[0]
    if meth == 'raw_ddpp_dpp':
        return 'DPP'
    return 'SR'

def preproc_ues(x):
    if x == 'bald':
        return 'BALD'
    if x == 'sampled_max_prob':
        return 'SMP'
    if x == 'variance':
        return 'PV'
    return 'MP'

table_dpp_raw = pd.concat(tables)#, pd.concat(baselines[-2:])])
table_dpp_raw = table_dpp_raw.reset_index()
table_dpp_raw['Method'] = table_dpp_raw['Method'].apply(lambda x: preproc_methods(x))
table_dpp_raw['UE Score'] = table_dpp_raw['UE Score'].apply(lambda x: preproc_ues(x))

In [118]:
table_dpp_raw

Unnamed: 0_level_0,Method,UE Score,MRPC,MRPC,CoLA,CoLA,SST-2,SST-2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,rcc-auc,rpp,rcc-auc,rpp,rcc-auc,rpp
0,DPP,BALD,22.16±4.42,2.51±0.51,62.85±7.80,3.06±0.26,19.10±2.85,1.19±0.22
1,DPP,SMP,18.04±4.85,2.18±0.49,57.92±6.33,2.84±0.24,18.37±2.72,1.24±0.23
2,DPP,PV,20.40±5.07,2.36±0.53,61.18±7.12,2.99±0.26,18.36±1.68,1.22±0.20


In [119]:
table_dpp_raw_full = pd.concat([table_dpp_raw, table_dpp_reg[table_dpp_raw.columns].iloc[[0,1,2,6,7,8]]])

In [120]:
table_dpp_raw_full

Unnamed: 0_level_0,Method,UE Score,MRPC,MRPC,CoLA,CoLA,SST-2,SST-2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,rcc-auc,rpp,rcc-auc,rpp,rcc-auc,rpp
0,DPP,BALD,22.16±4.42,2.51±0.51,62.85±7.80,3.06±0.26,19.10±2.85,1.19±0.22
1,DPP,SMP,18.04±4.85,2.18±0.49,57.92±6.33,2.84±0.24,18.37±2.72,1.24±0.23
2,DPP,PV,20.40±5.07,2.36±0.53,61.18±7.12,2.99±0.26,18.36±1.68,1.22±0.20
0,DDPP (+DPP) (Ours),BALD,19.44±4.31,2.28±0.38,61.24±9.04,2.85±0.19,17.27±3.39,1.05±0.17
1,DDPP (+DPP) (Ours),SMP,17.84±4.71,2.17±0.49,60.16±10.10,2.78±0.24,18.40±3.09,1.24±0.23
2,DDPP (+DPP) (Ours),PV,18.77±4.90,2.23±0.44,62.11±11.04,2.81±0.23,17.04±2.72,1.14±0.21
6,DDPP (+OOD) (Ours),BALD,21.92±4.78,2.50±0.58,82.59±5.64,2.96±0.15,25.01±6.83,1.44±0.35
7,DDPP (+OOD) (Ours),SMP,19.11±6.58,2.24±0.29,78.49±9.84,2.87±0.24,17.14±4.61,1.06±0.22
8,DDPP (+OOD) (Ours),PV,19.79±5.55,2.35±0.53,80.78±6.16,2.92±0.18,23.80±6.49,1.36±0.33


In [122]:
print(str(table_dpp_raw_full.to_latex(index=False)).replace('±', '$\pm$'))

\begin{tabular}{llllllll}
\toprule
            Method & UE Score & \multicolumn{2}{l}{MRPC} & \multicolumn{2}{l}{CoLA} & \multicolumn{2}{l}{SST-2} \\
                   &    rcc-auc &       rpp &     rcc-auc &       rpp &    rcc-auc &       rpp \\
\midrule
               DPP &     BALD & 22.16$\pm$4.42 & 2.51$\pm$0.51 &  62.85$\pm$7.80 & 3.06$\pm$0.26 & 19.10$\pm$2.85 & 1.19$\pm$0.22 \\
               DPP &      SMP & 18.04$\pm$4.85 & 2.18$\pm$0.49 &  57.92$\pm$6.33 & 2.84$\pm$0.24 & 18.37$\pm$2.72 & 1.24$\pm$0.23 \\
               DPP &       PV & 20.40$\pm$5.07 & 2.36$\pm$0.53 &  61.18$\pm$7.12 & 2.99$\pm$0.26 & 18.36$\pm$1.68 & 1.22$\pm$0.20 \\
DDPP (+DPP) (Ours) &     BALD & 19.44$\pm$4.31 & 2.28$\pm$0.38 &  61.24$\pm$9.04 & 2.85$\pm$0.19 & 17.27$\pm$3.39 & 1.05$\pm$0.17 \\
DDPP (+DPP) (Ours) &      SMP & 17.84$\pm$4.71 & 2.17$\pm$0.49 & 60.16$\pm$10.10 & 2.78$\pm$0.24 & 18.40$\pm$3.09 & 1.24$\pm$0.23 \\
DDPP (+DPP) (Ours) &       PV & 18.77$\pm$4.90 & 2.23$\pm$0.44 & 62.11$\pm$11.