In [1]:
from myst_nb import glue

# Results on dev set
The evaluation of automatic predictions had two different scenarios or sub-tracks:

1.  **NER offset and entity type classification**: the first sub-track was focused
on the identification and classification of sensitive information (e.g., patient
names, telephones, addresses, etc.).  

2.  **Sensitive span detection**: the second sub-track was focused on the detection
of sensitive text more specific to the practical scenario necessary for the
release of de-identified clinical documents, where the objective is to identify
and to mask confidential data, regardless of the real type of entity or the
correct identification of PHI type.

We evaluate our models using the various evaluation scripts and report averaged F1-Score over treee runs.

First create some code to automatically extract the evaluation results.

In [2]:
import pandas as pd
from collections import defaultdict
from pathlib import Path
from typing import Callable, DefaultDict, List, NamedTuple


class SubtrackScores(NamedTuple):
    precision: float
    recall: float

def _get_scores(folder_path: Path, filename: str, precision_line: int, recall_line: int) -> SubtrackScores:
    fpth = Path(folder_path / filename)
    if not fpth.exists():
        raise FileNotFoundError(f"{fpth} not found!")

    lines = fpth.read_text().split("\n")

    precision = float(lines[precision_line].split("=")[-1])
    recall = float(lines[recall_line].split("=")[-1])

    return SubtrackScores(precision, recall)

def get_subtrack1_scores(folder_path: Path) -> SubtrackScores:
    return _get_scores(folder_path, "ner", -3, -2)

def get_subtrack2_strict_scores(folder_path: Path) -> SubtrackScores:
    return _get_scores(folder_path, "spans", -6, -5)

def get_subtrack2_merged_scores(folder_path: Path) -> SubtrackScores:
    return _get_scores(folder_path, "spans", -3, -2)

def get_scores_as_df(seeds: List[int], get_folder: Callable[[int], Path]) -> pd.DataFrame:
    subtracks_scores: DefaultDict[List, float] = defaultdict(list)

    for seed in seeds:
        fpth = get_folder(seed)

        p, r = get_subtrack1_scores(fpth)
        subtracks_scores["1_p"].append(p)
        subtracks_scores["1_r"].append(r)

        p, r = get_subtrack2_strict_scores(fpth)
        subtracks_scores["2_1_p"].append(p)
        subtracks_scores["2_1_r"].append(r)

        p, r = get_subtrack2_merged_scores(fpth)
        subtracks_scores["2_2_p"].append(p)
        subtracks_scores["2_2_r"].append(r)

    df = pd.DataFrame.from_dict(subtracks_scores)
    for col in ["1", "2_1", "2_2"]:
        df[f"{col}_f1"] = 2*df[f"{col}_p"]*df[f"{col}_r"] / (df[f"{col}_p"] + df[f"{col}_r"])

    # Reorder columns
    new_columns = ["1_p", "1_r", "1_f1", "2_1_p", "2_1_r", "2_1_f1", "2_2_p", "2_2_r", "2_2_f1"]
    df = df[new_columns]

    # Prepare multi index names
    multi_index = pd.MultiIndex.from_product(
        [
            ["Subtrack 1", "Subtrack 2 [Strict]", "Subtrack 2 [Merged]"],
            ["precision", "recall", "f1"]
        ],
        names=["Track", "Scores"]
    )
    # Give multi index to df
    return pd.DataFrame(df.to_numpy().T, index=multi_index)

2. Define code to visualize the results in a convenient way

In [33]:
import pandas as pd
import matplotlib.pyplot as plt  
from matplotlib import colors

def make_pretty(styler):
    styler.set_table_styles([
        {'selector': '.index_name', 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'},
        {'selector': 'th.level1', 'props': 'text-align: left;'},
        {'selector': 'th.level0', 'props': 'text-align: center;'},
        {'selector': 'th.col_heading', 'props': 'text-align: center;'},
        {'selector': 'th.col_heading.level0', 'props': 'font-size: 1.5em;'},
        {'selector': 'td', 'props': 'text-align: center; font-weight: bold;'},
    ], overwrite=False)
    # .set_caption("Ajuste fino evaluado con distinctas métricas")
    styler.hide(axis="index", level=2)
    styler.hide(axis="columns", level=1)
    styler.format(precision=2)
    return styler

def visualize_df(df: pd.DataFrame):
    # Get the text that will be display in the form mean plus minus std
    std = (df*100).iloc[1::2, ::].round(2).astype(str).droplevel(2)
    mean = (df*100).iloc[::2, ::].round(2).astype(str).droplevel(2)
    df_txt = (mean + " \u00b1 " + std)

    # Extract the mean value that will serve to create the gradient map
    background_df = df.iloc[::2, ::]

    def b_g(s, cmap='PuBu', low=0, high=0):
        # Taken from https://stackoverflow.com/questions/47391948/pandas-style-background-gradient-using-other-dataframe
        nonlocal background_df
        # Pass the columns from Dataframe background_df
        a = background_df.loc[:,s.name].copy()
        rng = a.max() - a.min()
        norm = colors.Normalize(a.min() - (rng * low), a.max() + (rng * high))
        normed = norm(a.values)
        c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed*0.9)]
        return ['background-color: %s' % color for color in c]

    return df_txt.style.apply(b_g, cmap='PuBu').pipe(make_pretty)

Define the root folder where all the results and the trained models are stored

In [47]:
base_folder = Path.cwd().parents[1]

Store the evaluation for each model in a ``pandas.DataFrame``.

In [58]:
from functools import partial

seeds = [1, 33]
metric = "f1"
dataset = "dev"

get_scores_as_df_with_seed = partial(get_scores_as_df,  [1, 33])
get_folders = {
    "LSTM_CRF_FLAIR_WE": lambda seed: base_folder / f"experiments/corpus_sentence_flair_we_lstm_crf/results_seed_{seed}/evals/{dataset}",
    "LSTM_CRF_FLAIR": lambda seed: base_folder / f"experiments/corpus_sentence_flair_lstm_crf/an_wh_rs_True_dpt_0.08716810045694838_emb_seed_{seed}_Stack(0_lm-es-forward.pt, 1_lm-es-backward.pt)_hdn_sz_256_lr_0.1_it_150_bs_4_opti_SGD_pjct_emb_True_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/{dataset}",
    "FINE_TUNE_BETO_CONTEXT": lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_finetune/an_wh_rs_False_dpt_0_emb_beto-cased-context_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_150_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/{dataset}",
    "FINE_TUNE_BETO_CONTEXT": lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_finetune/an_wh_rs_False_dpt_0_emb_beto-cased-context_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_150_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/{dataset}",
}
dfs = {k: get_scores_as_df_with_seed(v).T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, [f"{metric}"]], :] for k,v in get_folders.items()}


## LSTM CRF + FLAIR + WE

In [48]:
seeds = [1, 33]  # [1, 10, 25, 33, 42]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_flair_we_lstm_crf/results_seed_{seed}/evals/dev"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_FLAIR_WE = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_FLAIR_WE

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.967403,0.000858
Subtrack 2 [Strict],f1,0.972404,0.000613
Subtrack 2 [Merged],f1,0.982069,0.000291


## LSTM CRF + FLAIR

In [6]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_flair_lstm_crf/an_wh_rs_True_dpt_0.08716810045694838_emb_seed_{seed}_Stack(0_lm-es-forward.pt, 1_lm-es-backward.pt)_hdn_sz_256_lr_0.1_it_150_bs_4_opti_SGD_pjct_emb_True_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_FLAIR = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_FLAIR

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.973216,0.000767
Subtrack 2 [Strict],f1,0.979308,0.000644
Subtrack 2 [Merged],f1,0.985303,0.000528


## FINETUNE + BETO + CONTEXT

In [7]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_finetune/an_wh_rs_False_dpt_0_emb_beto-cased-context_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_150_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.973197,0.001007
Subtrack 2 [Strict],f1,0.978363,0.000994
Subtrack 2 [Merged],f1,0.985226,0.000324


## FINETUNE + BETO + CONTEXT + WE

In [8]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_we_finetune_it_40/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto-cased_FT_True_Ly_-1_seed_{seed})_lr_5e-06_it_40_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.05/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO_WE_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO_WE_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.971168,0.00103
Subtrack 2 [Strict],f1,0.97604,0.000922
Subtrack 2 [Merged],f1,0.985143,0.000463


In [21]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_we_finetune_it_150/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto-cased_FT_True_Ly_-1_seed_{seed})_lr_5e-06_it_150_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO_WE_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO_WE_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.974008,0.001621
Subtrack 2 [Strict],f1,0.979574,0.001805
Subtrack 2 [Merged],f1,0.986593,0.000603


## FINETUNE + BETO

In [22]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_finetune_it_40/an_wh_rs_False_dpt_0_emb_beto-cased_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_40_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.05/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.971867,0.000628
Subtrack 2 [Strict],f1,0.976733,0.001166
Subtrack 2 [Merged],f1,0.984033,0.000884


## FINETUNE + BETO + WE

In [23]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_we_finetune_it_150/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto-cased_FT_True_Ly_-1_seed_{seed})_lr_5e-06_it_150_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_BETO_WE = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_BETO_WE

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.973038,0.000757
Subtrack 2 [Strict],f1,0.978029,0.001002
Subtrack 2 [Merged],f1,0.985476,0.000955


## LSTM CRF + BETO

In [24]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_lstm_crf/an_wh_rs_False_dpt_0_emb_beto_Ly_all_mean_seed_{seed}_hdn_sz_256_lr_0.1_it_500_bs_4_opti_SGD_pjct_emb_False_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_BETO = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_BETO

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.972545,0.000662
Subtrack 2 [Strict],f1,0.978971,0.000171
Subtrack 2 [Merged],f1,0.984741,0.000614


## LSTM CRF + BETO + WE

In [25]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_we_lstm_crf/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto_Ly_all_mean_seed_{seed})_hdn_sz_256_lr_0.1_it_500_bs_4_opti_SGD_pjct_emb_False_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_BETO_WE = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_BETO_WE

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.973502,0.001801
Subtrack 2 [Strict],f1,0.97941,0.001723
Subtrack 2 [Merged],f1,0.985644,0.001448


## LSTM CRF + BETO + WE + CONTEXT

In [26]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_we_lstm_crf/an_wh_rs_False_dpt_0_emb_Stack(0_es-wiki-fasttext-300d-1M, 1_1-beto_Ly_all_mean_context_seed_{seed})_hdn_sz_256_lr_0.1_it_500_bs_4_opti_SGD_pjct_emb_False_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_BETO_WE_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_BETO_WE_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.974593,0.001014
Subtrack 2 [Strict],f1,0.980214,0.001126
Subtrack 2 [Merged],f1,0.985926,0.000382


## LSTM CRF + BETO + CONTEXT

In [27]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_bert_context_lstm_crf/an_wh_rs_False_dpt_0_emb_beto_Ly_all_mean_context_seed_{seed}_hdn_sz_256_lr_0.1_it_500_bs_4_opti_SGD_pjct_emb_False_rnn_ly_2_sdl_AnnealOnPlateau_use_crf_True_use_rnn_True/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
LSTM_CRF_BETO_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
LSTM_CRF_BETO_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.97036,0.000835
Subtrack 2 [Strict],f1,0.976041,0.000999
Subtrack 2 [Merged],f1,0.983628,0.001307


## FINETUNE + XLMR LARGE + CONTEXT

In [32]:
seeds = [1, 12, 33]
get_folders = lambda seed: base_folder / f"experiments/corpus_sentence_xlmrl_context_finetune/an_wh_rs_False_dpt_0_emb_xlm-roberta-large-cased-context_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_40_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/dev"
df = get_scores_as_df(seeds, get_folders)
FINE_TUNE_XLMR_LARGE_CONTEXT = df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]
FINE_TUNE_XLMR_LARGE_CONTEXT

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.975062,0.000886
Subtrack 2 [Strict],f1,0.979414,0.000948
Subtrack 2 [Merged],f1,0.985318,0.000318


Group the evaluation for all the models in a ``pandas.DataFrame``.

In [17]:
data = {
    ("FINETUNE", "XLMR LARGE + CONTEXT"): FINE_TUNE_XLMR_LARGE_CONTEXT,
    ("FINETUNE", "BETO + CONTEXT"): FINE_TUNE_BETO_CONTEXT,
    ("FINETUNE", "BETO"): FINE_TUNE_BETO,
    ("FINETUNE", "BETO + WE"): FINE_TUNE_BETO_WE,
    ("FINETUNE", "BETO + WE + CONTEXT"): FINE_TUNE_BETO_WE_CONTEXT,
    ("LSTM CRF", "BETO + CONTEXT"): LSTM_CRF_BETO_CONTEXT,
    ("LSTM CRF", "BETO"): LSTM_CRF_BETO,
    ("LSTM CRF", "BETO + WE + CONTEXT"): LSTM_CRF_BETO_WE_CONTEXT,
    ("LSTM CRF", "BETO + WE"): LSTM_CRF_BETO_WE,
    ("LSTM CRF", "FLAIR + WE"): LSTM_CRF_FLAIR_WE,
    ("LSTM CRF", "FLAIR"): LSTM_CRF_FLAIR,

}
result_metrics = pd.concat(data.values(), axis=1, keys=data.keys(), names=["Model", "computation"]).T
visualize_df(result_metrics)

Unnamed: 0_level_0,Track,Subtrack 1,Subtrack 2 [Strict],Subtrack 2 [Merged]
Model,computation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FINETUNE,XLMR LARGE + CONTEXT,97.51 ± 0.09,97.94 ± 0.09,98.53 ± 0.03
FINETUNE,BETO + CONTEXT,97.32 ± 0.1,97.84 ± 0.1,98.52 ± 0.03
FINETUNE,BETO,97.19 ± 0.06,97.67 ± 0.12,98.4 ± 0.09
FINETUNE,BETO + WE,97.3 ± 0.08,97.8 ± 0.1,98.55 ± 0.1
FINETUNE,BETO + WE + CONTEXT,97.49 ± 0.09,98.06 ± 0.09,98.67 ± 0.08
LSTM CRF,BETO + CONTEXT,97.04 ± 0.08,97.6 ± 0.1,98.36 ± 0.13
LSTM CRF,BETO,97.25 ± 0.07,97.9 ± 0.02,98.47 ± 0.06
LSTM CRF,BETO + WE + CONTEXT,97.46 ± 0.1,98.02 ± 0.11,98.59 ± 0.04
LSTM CRF,BETO + WE,97.35 ± 0.18,97.94 ± 0.17,98.56 ± 0.14
LSTM CRF,FLAIR + WE,96.75 ± 0.07,97.24 ± 0.1,98.27 ± 0.06


## FINE-TUNING approach

In [18]:
data = {
        ("XLMR LARGE", "+ context"): FINE_TUNE_XLMR_LARGE_CONTEXT,
        ("BETO", "transformador lineal"): FINE_TUNE_BETO,
        ("BETO", "+ Context"): FINE_TUNE_BETO_CONTEXT,
        ("BETO", "+ WE"): FINE_TUNE_BETO_WE,
        ("BETO", "+ WE + Context"): FINE_TUNE_BETO_WE_CONTEXT,
    }
df = pd.concat(data.values(), axis=1, keys=data.keys(), names=["Estrategia", "computation"]).T
glue("table_finetune_dev", visualize_df(df))

Unnamed: 0_level_0,Track,Subtrack 1,Subtrack 2 [Strict],Subtrack 2 [Merged]
Estrategia,computation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
XLMR LARGE,+ context,97.51 ± 0.09,97.94 ± 0.09,98.53 ± 0.03
BETO,transformador lineal,97.19 ± 0.06,97.67 ± 0.12,98.4 ± 0.09
BETO,+ Context,97.32 ± 0.1,97.84 ± 0.1,98.52 ± 0.03
BETO,+ WE,97.3 ± 0.08,97.8 ± 0.1,98.55 ± 0.1
BETO,+ WE + Context,97.49 ± 0.09,98.06 ± 0.09,98.67 ± 0.08


## Feature-Based approach

In [19]:
data = {
        ("BETO", "LSTM-CRF (ultimas 4 capas)"): LSTM_CRF_BETO,
        ("BETO", "+ Context"): LSTM_CRF_BETO_CONTEXT,
        ("BETO", "+ WE"): LSTM_CRF_BETO_WE,
        ("BETO", "+ WE + Context"): LSTM_CRF_BETO_WE_CONTEXT,
    }
df = pd.concat(data.values(), axis=1, keys=data.keys(), names=["Estrategia", "computation"]).T
glue("table_feature_based_dev", visualize_df(df))

Unnamed: 0_level_0,Track,Subtrack 1,Subtrack 2 [Strict],Subtrack 2 [Merged]
Estrategia,computation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BETO,LSTM-CRF (ultimas 4 capas),97.25 ± 0.07,97.9 ± 0.02,98.47 ± 0.06
BETO,+ Context,97.04 ± 0.08,97.6 ± 0.1,98.36 ± 0.13
BETO,+ WE,97.35 ± 0.18,97.94 ± 0.17,98.56 ± 0.14
BETO,+ WE + Context,97.46 ± 0.1,98.02 ± 0.11,98.59 ± 0.04


## Flair

In [20]:
data = {
        ("LSTM-CRF", "FLAIR"): LSTM_CRF_FLAIR,
        ("LSTM-CRF", "+ WE"): LSTM_CRF_FLAIR_WE,
    }
df = pd.concat(data.values(), axis=1, keys=data.keys(), names=["Estrategia", "computation"]).T
glue("table_flair_dev", visualize_df(df))

Unnamed: 0_level_0,Track,Subtrack 1,Subtrack 2 [Strict],Subtrack 2 [Merged]
Estrategia,computation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LSTM-CRF,FLAIR,97.32 ± 0.08,97.93 ± 0.06,98.53 ± 0.05
LSTM-CRF,+ WE,96.75 ± 0.07,97.24 ± 0.1,98.27 ± 0.06
