# Retrive the scores obtained for Subtask 1 and 2 with XLM-R embeddings

In [1]:
import pandas as pd
from collections import defaultdict
from pathlib import Path
from typing import Callable, DefaultDict, List, NamedTuple


class SubtrackScores(NamedTuple):
    precision: float
    recall: float

def _get_scores(folder_path: Path, filename: str, precision_line: int, recall_line: int) -> SubtrackScores:
    fpth = Path(folder_path / filename)
    if not fpth.exists():
        raise FileNotFoundError(f"{fpth} not found!")

    lines = fpth.read_text().split("\n")

    precision = float(lines[precision_line].split("=")[-1])
    recall = float(lines[recall_line].split("=")[-1])

    return SubtrackScores(precision, recall)

def get_subtrack1_scores(folder_path: Path) -> SubtrackScores:
    return _get_scores(folder_path, "ner", -3, -2)

def get_subtrack2_strict_scores(folder_path: Path) -> SubtrackScores:
    return _get_scores(folder_path, "spans", -6, -5)

def get_subtrack2_merged_scores(folder_path: Path) -> SubtrackScores:
    return _get_scores(folder_path, "spans", -3, -2)

def get_scores_as_df(seeds: List[int], get_folder: Callable[[int], Path]) -> pd.DataFrame:
    subtracks_scores: DefaultDict[List, float] = defaultdict(list)

    for seed in seeds:
        fpth = get_folder(seed)

        p, r = get_subtrack1_scores(fpth)
        subtracks_scores["1_p"].append(p)
        subtracks_scores["1_r"].append(r)

        p, r = get_subtrack2_strict_scores(fpth)
        subtracks_scores["2_1_p"].append(p)
        subtracks_scores["2_1_r"].append(r)

        p, r = get_subtrack2_merged_scores(fpth)
        subtracks_scores["2_2_p"].append(p)
        subtracks_scores["2_2_r"].append(r)

    df = pd.DataFrame.from_dict(subtracks_scores)
    for col in ["1", "2_1", "2_2"]:
        df[f"{col}_f1"] = 2*df[f"{col}_p"]*df[f"{col}_r"] / (df[f"{col}_p"] + df[f"{col}_r"])

    # Reorder columns
    new_columns = ["1_p", "1_r", "1_f1", "2_1_p", "2_1_r", "2_1_f1", "2_2_p", "2_2_r", "2_2_f1"]
    df = df[new_columns]

    # Prepare multi index names
    multi_index = pd.MultiIndex.from_product(
        [
            ["Subtrack 1", "Subtrack 2 [Strict]", "Subtrack 2 [Merged]"],
            ["precision", "recall", "f1"]
        ],
        names=["Track", "Scores"]
    )
    # Give multi index to df
    return pd.DataFrame(df.to_numpy().T, index=multi_index)

Results obtained for the differents tracks on **dev** sets.

In [2]:
base_folder = Path("/home/wave/Project/MedDocAn/experiments/corpus_sentence_grid_search_flert_xlm-roberta_docstart")

df = get_scores_as_df(
    [1, 33, 12],
    lambda seed: base_folder / f"an_wh_rs_False_dpt_0_emb_xlm-roberta-large-cased-context_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_40_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/dev"
)
df.T.describe().T[["mean", "std"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,precision,0.971696,0.000807
Subtrack 1,recall,0.978452,0.00113
Subtrack 1,f1,0.975062,0.000886
Subtrack 2 [Strict],precision,0.976033,0.000948
Subtrack 2 [Strict],recall,0.982819,0.001122
Subtrack 2 [Strict],f1,0.979414,0.000948
Subtrack 2 [Merged],precision,0.982832,0.000605
Subtrack 2 [Merged],recall,0.987816,0.000247
Subtrack 2 [Merged],f1,0.985318,0.000318


The same for the **test** sets.

In [3]:
base_folder = Path("/home/wave/Project/MedDocAn/experiments/corpus_sentence_grid_search_flert_xlm-roberta_docstart")

df = get_scores_as_df(
    [1, 33, 12],
    lambda seed: base_folder / f"an_wh_rs_False_dpt_0_emb_xlm-roberta-large-cased-context_FT_True_Ly_-1_seed_{seed}_lr_5e-06_it_40_bs_4_opti_AdamW_pjct_emb_False_sdl_LinearSchedulerWithWarmup_use_crf_False_use_rnn_False_wup_0.1/0/evals/test"
)
df.T.describe().T[["mean", "std"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,precision,0.97169,0.001203
Subtrack 1,recall,0.978155,0.000204
Subtrack 1,f1,0.974911,0.000691
Subtrack 2 [Strict],precision,0.976896,0.001209
Subtrack 2 [Strict],recall,0.983395,0.000353
Subtrack 2 [Strict],f1,0.980135,0.000723
Subtrack 2 [Merged],precision,0.984255,9.7e-05
Subtrack 2 [Merged],recall,0.988815,0.000937
Subtrack 2 [Merged],f1,0.986529,0.000465


If we only want to see the f1 scores

In [4]:
df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice[:, ['f1']], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.974911,0.000691
Subtrack 2 [Strict],f1,0.980135,0.000723
Subtrack 2 [Merged],f1,0.986529,0.000465


In [6]:
df.T.describe().T[["mean", "std"]].loc[pd.IndexSlice['Subtrack 1', ['f1']], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
Track,Scores,Unnamed: 2_level_1,Unnamed: 3_level_1
Subtrack 1,f1,0.974911,0.000691
