In [1]:
from myst_nb import glue

# Results
The evaluation of automatic predictions had two different scenarios or sub-tracks:

1.  **NER offset and entity type classification**: the first sub-track was focused
on the identification and classification of sensitive information (e.g., patient
names, telephones, addresses, etc.).  

2.  **Sensitive span detection**: the second sub-track was focused on the detection
of sensitive text more specific to the practical scenario necessary for the
release of de-identified clinical documents, where the objective is to identify
and to mask confidential data, regardless of the real type of entity or the
correct identification of PHI type.

We evaluate our models using the various evaluation scripts and report averaged F1-Score over treee runs.

First create some code to automatically extract the evaluation results.

In [2]:
import pandas as pd
from collections import defaultdict
from pathlib import Path
from typing import Callable, DefaultDict, List, NamedTuple


class SubtrackScores(NamedTuple):
    precision: float
    recall: float

def _get_scores(folder_path: Path, filename: str, precision_line: int, recall_line: int) -> SubtrackScores:
    fpth = Path(folder_path / filename)
    if not fpth.exists():
        raise FileNotFoundError(f"{fpth} not found!")

    lines = fpth.read_text().split("\n")

    precision = float(lines[precision_line].split("=")[-1])
    recall = float(lines[recall_line].split("=")[-1])

    return SubtrackScores(precision, recall)

def get_subtrack1_scores(folder_path: Path) -> SubtrackScores:
    return _get_scores(folder_path, "ner", -3, -2)

def get_subtrack2_strict_scores(folder_path: Path) -> SubtrackScores:
    return _get_scores(folder_path, "spans", -6, -5)

def get_subtrack2_merged_scores(folder_path: Path) -> SubtrackScores:
    return _get_scores(folder_path, "spans", -3, -2)

def get_scores_as_df(seeds: List[int], get_folder: Callable[[int], Path]) -> pd.DataFrame:
    subtracks_scores: DefaultDict[List, float] = defaultdict(list)

    for seed in seeds:
        fpth = get_folder(seed)

        p, r = get_subtrack1_scores(fpth)
        subtracks_scores["1_p"].append(p)
        subtracks_scores["1_r"].append(r)

        p, r = get_subtrack2_strict_scores(fpth)
        subtracks_scores["2_1_p"].append(p)
        subtracks_scores["2_1_r"].append(r)

        p, r = get_subtrack2_merged_scores(fpth)
        subtracks_scores["2_2_p"].append(p)
        subtracks_scores["2_2_r"].append(r)

    df = pd.DataFrame.from_dict(subtracks_scores)
    for col in ["1", "2_1", "2_2"]:
        df[f"{col}_f1"] = 2*df[f"{col}_p"]*df[f"{col}_r"] / (df[f"{col}_p"] + df[f"{col}_r"])

    # Reorder columns
    new_columns = ["1_p", "1_r", "1_f1", "2_1_p", "2_1_r", "2_1_f1", "2_2_p", "2_2_r", "2_2_f1"]
    df = df[new_columns]

    # Prepare multi index names
    multi_index = pd.MultiIndex.from_product(
        [
            ["Subtrack 1", "Subtrack 2 [Strict]", "Subtrack 2 [Merged]"],
            ["precision", "recall", "f1"]
        ],
        names=["Track", "Scores"]
    )
    # Give multi index to df
    return pd.DataFrame(df.to_numpy().T, index=multi_index)

2. Define code to visualize the results in a convenient way

In [3]:
import pandas as pd
import matplotlib.pyplot as plt  
from matplotlib import colors

def make_pretty(styler):
    styler.set_table_styles([
        {'selector': '.index_name', 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'},
        {'selector': 'th.level1', 'props': 'text-align: left;'},
        {'selector': 'th.level0', 'props': 'text-align: center;'},
        {'selector': 'th.col_heading', 'props': 'text-align: center;'},
        {'selector': 'th.col_heading.level0', 'props': 'font-size: 1.5em;'},
        {'selector': 'td', 'props': 'text-align: center; font-weight: bold;'},
    ], overwrite=False)
    # .set_caption("Ajuste fino evaluado con distinctas métricas")
    styler.hide(axis="index", level=2)
    styler.hide(axis="columns", level=1)
    styler.format(precision=2)
    return styler

def visualize_df(df: pd.DataFrame):
    # Get the text that will be display in the form mean plus minus std
    std = (df*100).iloc[1::2, ::].round(2).astype(str).droplevel(2)
    mean = (df*100).iloc[::2, ::].round(2).astype(str).droplevel(2)
    df_txt = (mean + " \u00b1 " + std)

    # Extract the mean value that will serve to create the gradient map
    background_df = df.iloc[::2, ::]

    def b_g(s, cmap='PuBu', low=0, high=0):
        # Taken from https://stackoverflow.com/questions/47391948/pandas-style-background-gradient-using-other-dataframe
        nonlocal background_df
        # Pass the columns from Dataframe background_df
        a = background_df.loc[:,s.name].copy()
        rng = a.max() - a.min()
        norm = colors.Normalize(a.min() - (rng * low), a.max() + (rng * high))
        normed = norm(a.values)
        c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed*0.9)]
        return ['background-color: %s' % color for color in c]

    return df_txt.style.apply(b_g, cmap='PuBu').pipe(make_pretty)

Define the root folder where all the results and the trained models are stored

In [4]:
base_folder = Path("/home/wave/Project/MedDocAn")

Store the evaluation for each model in a ``pandas.DataFrame``.

## LSTM CRF + FLAIR + WE

In [5]:
seeds = [1, 10, 25, 33, 42]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_flair_we_lstm_crf/results_seed_{seed}/evals/test"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_FLAIR_WE = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_FLAIR_WE

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.968385,0.001274
Subtrack 2 [Strict],f1,0.973075,0.001142
Subtrack 2 [Merged],f1,0.984384,0.000787


## LSTM CRF + FLAIR

In [6]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_flair_lstm_crf/an_wh_rs_True_dpt_0.08716810045694838_emb_seed_{seed}_Stack(0_lm-es-forward.pt, 1_lm-es-backward.pt)_hdn_sz_256_lr_0.1_it_150_bs_4_opti_SGD_pjct_emb_True_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_FLAIR = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_FLAIR

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.968768,0.001296
Subtrack 2 [Strict],f1,0.976428,0.001584
Subtrack 2 [Merged],f1,0.983721,0.001211


## FINETUNE + BETO + CONTEXT

In [7]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_finetune/an_wh_rs_False_dpt_0_emb_beto-cased-context_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_150_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.973673,0.001587
Subtrack 2 [Strict],f1,0.97966,0.001506
Subtrack 2 [Merged],f1,0.985812,0.000989


## FINETUNE + BETO + CONTEXT + WE

In [8]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_we_finetune_it_40/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto-cased_FT_True_Ly_-1_seed_{seed})_lr_5e-06_it_40_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.05/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO_WE_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO_WE_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.972428,0.000332
Subtrack 2 [Strict],f1,0.977995,0.000407
Subtrack 2 [Merged],f1,0.985487,0.000467


In [9]:
seeds = [1, 12]  #, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_we_finetune_it_150/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto-cased_FT_True_Ly_-1_seed_{seed})_lr_5e-06_it_150_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO_WE_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO_WE_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.972346,0.001297
Subtrack 2 [Strict],f1,0.978853,0.000304
Subtrack 2 [Merged],f1,0.985047,0.001184


## FINETUNE + BETO

In [10]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_finetune_it_40/an_wh_rs_False_dpt_0_emb_beto-cased_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_40_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.05/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.972018,0.001341
Subtrack 2 [Strict],f1,0.977234,0.001437
Subtrack 2 [Merged],f1,0.984776,0.000964


## FINETUNE + BETO + WE

In [11]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_we_finetune_it_150/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto-cased_FT_True_Ly_-1_seed_{seed})_lr_5e-06_it_150_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO_WE = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO_WE

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.973058,0.000866
Subtrack 2 [Strict],f1,0.978869,0.000248
Subtrack 2 [Merged],f1,0.98628,0.000735


## LSTM CRF + BETO

In [12]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_lstm_crf/an_wh_rs_False_dpt_0_emb_beto_Ly_all_mean_seed_{seed}_hdn_sz_256_lr_0.1_it_500_bs_4_opti_SGD_pjct_emb_False_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_BETO = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_BETO

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.971994,0.000463
Subtrack 2 [Strict],f1,0.978217,0.000734
Subtrack 2 [Merged],f1,0.985028,0.000503


## LSTM CRF + BETO + WE

In [13]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_we_lstm_crf/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto_Ly_all_mean_seed_{seed})_hdn_sz_256_lr_0.1_it_500_bs_4_opti_SGD_pjct_emb_False_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_BETO_WE = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_BETO_WE

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.973711,0.000714
Subtrack 2 [Strict],f1,0.979351,0.000444
Subtrack 2 [Merged],f1,0.986297,0.00063


## LSTM CRF + BETO + WE + CONTEXT

In [14]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_we_lstm_crf/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto_Ly_all_mean_context_seed_{seed})_hdn_sz_256_lr_0.1_it_500_bs_4_opti_SGD_pjct_emb_False_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_BETO_WE_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_BETO_WE_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.974963,0.000642
Subtrack 2 [Strict],f1,0.980899,0.000815
Subtrack 2 [Merged],f1,0.986733,0.000919


## LSTM CRF + BETO + CONTEXT

In [15]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_lstm_crf/an_wh_rs_False_dpt_0_emb_beto_Ly_all_mean_context_seed_{seed}_hdn_sz_256_lr_0.1_it_500_bs_4_opti_SGD_pjct_emb_False_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_BETO_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_BETO_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.970914,0.000867
Subtrack 2 [Strict],f1,0.977782,0.000641
Subtrack 2 [Merged],f1,0.984388,0.001002


## FINETUNE + XLMR LARGE + CONTEXT

In [17]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_xlmrl_context_finetune/an_wh_rs_False_dpt_0_emb_xlm-roberta-large-cased-context_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_40_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/test"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_XLMR_LARGE_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_XLMR_LARGE_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.974911,0.000691
Subtrack 2 [Strict],f1,0.980135,0.000723
Subtrack 2 [Merged],f1,0.986529,0.000465


Group the evaluation for all the models in a ``pandas.DataFrame``.

In [None]:
data = {
        # ("FINETUNE + LINEAR", "XLMR LARGE + CONTEXT"): FINE_TUNE_XLMR_LARGE_CONTEXT,
        ("FINETUNE", "BETO", "LINEAR"): FINE_TUNE_BETO,
        ("FINETUNE", "BETO", "+ CONTEXT"): FINE_TUNE_BETO_CONTEXT,
        ("FINETUNE", "BETO", "+ WE"): FINE_TUNE_BETO_WE,
        ("FINETUNE", "BETO", "+ WE + CONTEXT"): FINE_TUNE_BETO_WE_CONTEXT,
        ("FEATURE_BASED", "BETO", "LSTM CRF"): LSTM_CRF_BETO,
        ("FEATURE_BASED", "BETO", "+ CONTEXT"): LSTM_CRF_BETO_CONTEXT,
        ("FEATURE_BASED", "BETO", "+ WE"): LSTM_CRF_BETO_WE,
        ("FEATURE_BASED", "BETO", "+ WE + CONTEXT"): LSTM_CRF_BETO_WE_CONTEXT,
        ("FEATURE_BASED", "FLAIR", "LSTM CRF"): LSTM_CRF_FLAIR,
        ("FEATURE_BASED", "FLAIR", " + WE"): LSTM_CRF_FLAIR_WE,
    }

result_metrics = pd.concat(data.values(), axis=1, keys=data.keys(), names=["Estrategia", "language model", "Modalidad"]).T

def make_pretty(styler):
    styler.set_table_styles([
        {'selector': '.index_name', 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'},
        {'selector': 'th.level1', 'props': 'text-align: left;'},
        {'selector': 'th.level0', 'props': 'text-align: center;'},
        {'selector': 'th.col_heading', 'props': 'text-align: center;'},
        {'selector': 'th.col_heading.level0', 'props': 'font-size: 1.5em;'},
        {'selector': 'td', 'props': 'text-align: center; font-weight: bold;'},
    ], overwrite=False)
    # .set_caption("Ajuste fino evaluado con distinctas métricas")
    # styler.hide(axis="index", level=2)
    styler.hide(axis="columns", level=1)
    styler.format(precision=2)
    return styler

def visualize_df(df: pd.DataFrame):
    # Get the text that will be display in the form mean plus minus std
    level_to_drop = len(df.index[0]) - 1
    std = (df*100).iloc[1::2, ::].round(2).astype(str).droplevel(level_to_drop)
    mean = (df*100).iloc[::2, ::].round(2).astype(str).droplevel(level_to_drop)
    df_txt = (mean + " \u00b1 " + std)

    # Extract the mean value that will serve to create the gradient map
    background_df = df.iloc[::2, ::]

    def b_g(s, cmap='PuBu', low=0, high=0):
        # Taken from https://stackoverflow.com/questions/47391948/pandas-style-background-gradient-using-other-dataframe
        nonlocal background_df
        # Pass the columns from Dataframe background_df
        a = background_df.loc[:,s.name].copy()
        rng = a.max() - a.min()
        norm = colors.Normalize(a.min() - (rng * low), a.max() + (rng * high))
        normed = norm(a.values)
        c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed*0.9)]
        return ['background-color: %s' % color for color in c]

    return df_txt.style.apply(b_g, cmap='PuBu').pipe(make_pretty)

glue("table_test", visualize_df(result_metrics))


Unnamed: 0_level_0,Unnamed: 1_level_0,Track,Subtrack 1,Subtrack 2 [Strict],Subtrack 2 [Merged]
Estrategia,language model,Modalidad,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
FINETUNE,BETO,LINEAR,97.2 ± 0.13,97.72 ± 0.14,98.48 ± 0.1
FINETUNE,BETO,+ CONTEXT,97.37 ± 0.16,97.97 ± 0.15,98.58 ± 0.1
FINETUNE,BETO,+ WE,97.31 ± 0.09,97.89 ± 0.02,98.63 ± 0.07
FINETUNE,BETO,+ WE + CONTEXT,97.23 ± 0.13,97.89 ± 0.03,98.5 ± 0.12
FEATURE_BASED,BETO,LSTM CRF,97.2 ± 0.05,97.82 ± 0.07,98.5 ± 0.05
FEATURE_BASED,BETO,+ CONTEXT,97.09 ± 0.09,97.78 ± 0.06,98.44 ± 0.1
FEATURE_BASED,BETO,+ WE,97.37 ± 0.07,97.94 ± 0.04,98.63 ± 0.06
FEATURE_BASED,BETO,+ WE + CONTEXT,97.5 ± 0.06,98.09 ± 0.08,98.67 ± 0.09
FEATURE_BASED,FLAIR,LSTM CRF,96.88 ± 0.13,97.64 ± 0.16,98.37 ± 0.12
FEATURE_BASED,FLAIR,+ WE,96.84 ± 0.13,97.31 ± 0.11,98.44 ± 0.08


In [None]:
diff = (result_metrics[::2] - result_metrics.loc[("FEATURE_BASED", "FLAIR", "LSTM CRF")].values.squeeze()[0])*100
diff = diff.drop([("FEATURE_BASED", "FLAIR")])
glue("compare_with_flair", diff.style.pipe(make_pretty).hide(axis="index", level=[1, 3]).background_gradient())

  diff = (result_metrics[::2] - result_metrics.loc[("FEATURE_BASED", "FLAIR", "LSTM CRF")].values.squeeze()[0])*100
  diff = diff.drop([("FEATURE_BASED", "FLAIR")])


Unnamed: 0_level_0,Track,Subtrack 1,Subtrack 2 [Strict],Subtrack 2 [Merged]
Estrategia,Modalidad,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FINETUNE,LINEAR,0.32,0.08,0.11
FINETUNE,+ CONTEXT,0.49,0.32,0.21
FINETUNE,+ WE,0.43,0.24,0.26
FINETUNE,+ WE + CONTEXT,0.36,0.24,0.13
FEATURE_BASED,LSTM CRF,0.32,0.18,0.13
FEATURE_BASED,+ CONTEXT,0.21,0.14,0.07
FEATURE_BASED,+ WE,0.49,0.29,0.26
FEATURE_BASED,+ WE + CONTEXT,0.62,0.45,0.3


In [None]:
def make_pretty(styler):
    styler.background_gradient()
    styler.set_table_styles([
        {'selector': '.index_name', 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'},
        {'selector': 'th.level1', 'props': 'text-align: left;'},
        {'selector': 'th.level0', 'props': 'text-align: center;'},
        {'selector': 'th.col_heading', 'props': 'text-align: center;'},
        {'selector': 'th.col_heading.level0', 'props': 'font-size: 1.5em;'},
        {'selector': 'td', 'props': 'text-align: center; font-weight: bold;'},
    ], overwrite=False)
    styler.hide(axis="index", level=2)
    styler.hide(axis="columns", level=1)
    styler.format(precision=2)
    return styler

Do the same just for the fine-tuning approach

In [None]:
data = {
        ("XLMR LARGE", "+ context"): FINE_TUNE_XLMR_LARGE_CONTEXT,
        ("BETO", "transformador lineal"): FINE_TUNE_BETO,
        ("BETO", "+ Context"): FINE_TUNE_BETO_CONTEXT,
        ("BETO", "+ WE"): FINE_TUNE_BETO_WE,
        ("BETO", "+ WE + Context"): FINE_TUNE_BETO_WE_CONTEXT,
    }
df = pd.concat(data.values(), axis=1, keys=data.keys(), names=["Estrategia", "computation"]).T
glue("table_finetune_test", visualize_df(df))

Unnamed: 0_level_0,Track,Subtrack 1,Subtrack 2 [Strict],Subtrack 2 [Merged]
Estrategia,computation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
XLMR LARGE,+ context,97.49 ± 0.07,98.01 ± 0.07,98.65 ± 0.05
BETO,transformador lineal,97.2 ± 0.13,97.72 ± 0.14,98.48 ± 0.1
BETO,+ Context,97.37 ± 0.16,97.97 ± 0.15,98.58 ± 0.1
BETO,+ WE,97.31 ± 0.09,97.89 ± 0.02,98.63 ± 0.07
BETO,+ WE + Context,97.23 ± 0.13,97.89 ± 0.03,98.5 ± 0.12


Now produce results just for the feature-based approach

In [None]:
data = {
        ("BETO", "LSTM-CRF (ultimas 4 capas)"): LSTM_CRF_BETO,
        ("BETO", "+ Context"): LSTM_CRF_BETO_CONTEXT,
        ("BETO", "+ WE"): LSTM_CRF_BETO_WE,
        ("BETO", "+ WE + Context"): LSTM_CRF_BETO_WE_CONTEXT,
    }
df = pd.concat(data.values(), axis=1, keys=data.keys(), names=["Estrategia", "computation"]).T
glue("table_feature_based_test", visualize_df(df))

Unnamed: 0_level_0,Track,Subtrack 1,Subtrack 2 [Strict],Subtrack 2 [Merged]
Estrategia,computation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BETO,LSTM-CRF (ultimas 4 capas),97.2 ± 0.05,97.82 ± 0.07,98.5 ± 0.05
BETO,+ Context,97.09 ± 0.09,97.78 ± 0.06,98.44 ± 0.1
BETO,+ WE,97.37 ± 0.07,97.94 ± 0.04,98.63 ± 0.06
BETO,+ WE + Context,97.5 ± 0.06,98.09 ± 0.08,98.67 ± 0.09
