In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from constrerl.evaluate import (
    eval_submission_6_3_ternary_tag_RE,
    eval_submission_6_4_ternary_mention_RE,
    eval_submission_6_2_binary_tag_RE
)
from constrerl.erl_schema import convert_to_output, Article
import glob
from pathlib import Path
import json
import pandas as pd
from collections.abc import Callable, Awaitable


In [3]:
results_dir = "data/results_dev"
ground_truth_file = "data/annotations/dev/dev.json"

results_dir = Path(results_dir)
ground_truth_file = Path(ground_truth_file)

with open(ground_truth_file) as f:
    ground_truth = json.load(f)

In [39]:
eval_results: list[dict] = []


def scoring_to_dict(
    f: str | Path, eval_f: Callable[[str | Path, dict], tuple[float]]
) -> dict:
    precision, recall, f1, micro_precision, micro_recall, micro_f1 = eval_f(
        f, ground_truth
    )
    return {
        "$P$": precision,
        "$R$": recall,
        "$F_1$": f1,
        "$P_{micro}$": micro_precision,
        "$R_{micro}$": micro_recall,
        "$F_{1,micro}$": micro_f1,
    }


def scoring_to_df(eval_f: Callable[[str | Path, dict], tuple[float]]) -> pd.DataFrame:
    eval_results: list[dict] = []
    for result_file in results_dir.glob("*.json"):
        result_file = Path(result_file)
        eval_result = scoring_to_dict(result_file, eval_f)
        result_dict = {
            "Name": " ".join(result_file.name.rstrip(".json").split("-")[:2])
            if  "openai" not in result_file.name
            else " ".join(result_file.name.rstrip(".json").split("-")[:3]),
            "RAG": "\checkmark" if "rag" in result_file.name else "$\\times$",
            "LoRA": "\checkmark" if "lora" in result_file.name else "$\\times$",
            "Reorder": "\checkmark" if "reorder" in result_file.name else "$\\times$",
        }
        result_dict.update(eval_result)
        # result_dict.update({f"6_2_2_{k}": v for k, v in ternary_tag_score.items()})
        # result_dict.update({f"6_2_3_{k}": v for k, v in ternary_mention_score.items()})
        eval_results.append(result_dict)
    eval_df = pd.DataFrame(eval_results)
    if "$F_{1,micro}$" in eval_df.columns:
        eval_df = eval_df.sort_values("$F_{1,micro}$")
    return eval_df


task_6_2_1_df = scoring_to_df(eval_submission_6_2_binary_tag_RE)
task_6_2_2_df = scoring_to_df(eval_submission_6_3_ternary_tag_RE)
task_6_2_3_df = scoring_to_df(eval_submission_6_4_ternary_mention_RE)

=== Removed 299 duplicated binary tag-based relations from predictions ===
=== Removed 1082 duplicated binary tag-based relations from predictions ===
=== Removed 327 duplicated binary tag-based relations from predictions ===
=== Removed 1059 duplicated binary tag-based relations from predictions ===
=== Removed 744 duplicated binary tag-based relations from predictions ===
=== Removed 1105 duplicated binary tag-based relations from predictions ===
=== Removed 70 duplicated binary tag-based relations from predictions ===
=== Removed 447 duplicated binary tag-based relations from predictions ===
=== Removed 7 duplicated binary tag-based relations from predictions ===
=== Removed 225 duplicated binary tag-based relations from predictions ===
=== Removed 269 duplicated binary tag-based relations from predictions ===
=== Removed 1082 duplicated binary tag-based relations from predictions ===
=== Removed 1403 duplicated binary tag-based relations from predictions ===
=== Removed 630 duplica

  "RAG": "\checkmark" if "rag" in result_file.name else "$\\times$",
  "LoRA": "\checkmark" if "lora" in result_file.name else "$\\times$",
  "Reorder": "\checkmark" if "reorder" in result_file.name else "$\\times$",


In [None]:
task_6_2_1_df[:-10].to_latex(
    "report/task_6_2_1.tex",
    float_format="%.2f",
    caption="Dev Set Result for Task 6.2.1 for various models and approaches.",
    label="tab:task:6_2_1",
    index=False,
)

In [41]:
task_6_2_2_df.to_latex(
    "report/task_6_2_2.tex",
    float_format="%.2f",
    caption="Dev Set Result for Task 6.2.2 for various models and approaches.",
    label="tab:task:6_2_2",
    index=False,
)

In [42]:
task_6_2_3_df.to_latex(
    "report/task_6_2_3.tex",
    float_format="%.2f",
    caption="Dev Set Result for Task 6.2.3 for various models and approaches.",
    label="tab:task:6_2_3",
    index=False,
)