In [1]:
from pathlib import Path
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from sdoh_nlp.evaluate import Corpus, Evaluation

data_dir = "../data/performance_measure"
human_annotation_dir = data_dir + "/human_annotation"
model_annotation_dir = data_dir + "/model_annotation"
dtd_path = "../models/MedSDoH/schema.dtd"

excluded_categories = [
    "Sex_At_Birth",
    "Race_or_Ethnicity",
    "Sexual_Orientation",
    "Marital_Status",
]  # these categories are not included in the evaluation

In [4]:
# Measure performance
model_corpus = Corpus(model_annotation_dir, format="mae")
human_corpus = Corpus(human_annotation_dir, format="mae")  # gold standard

eval = Evaluation(model_corpus.soup, human_corpus.soup, dtd_path, overlap_ratio=0.1)

df = eval.get_macro_avg_per_category(excluded_categories=excluded_categories
)
df

In [5]:
# Print errors per category
df_errors = eval.get_errors_per_category_with_text().sort_values(by=["file", "concept_name"])
df_errors = df_errors[~df_errors["concept_name"].isin(excluded_categories)]
df_errors.sort_values(by="concept_name").reset_index()