In [1]:
import shutil
import os

In [None]:
from zipfile import ZipFile
from pathlib import Path
import glob
import json


In [None]:
results_path = "data/results_test/*.json"
results = glob.glob(results_path)
results = [Path(p) for p in results]

In [4]:
results

[PosixPath('data/merge_dev/openai-4-1_union.json'),
 PosixPath('data/merge_dev/hermes-3b_intersection.json'),
 PosixPath('data/merge_dev/hermes-3b-rag_union.json'),
 PosixPath('data/merge_dev/hermes-3b-lora-rag_intersection.json'),
 PosixPath('data/merge_dev/hermes-3b-lora-reorder_union.json'),
 PosixPath('data/merge_dev/hermes-3b-lora_union.json'),
 PosixPath('data/merge_dev/hermes-3b-rag_intersection.json'),
 PosixPath('data/merge_dev/hermes-3b-lora_intersection.json'),
 PosixPath('data/merge_dev/openai-4-1-reorder_union.json'),
 PosixPath('data/merge_dev/openai-4o-mini_intersection.json'),
 PosixPath('data/merge_dev/openai-4o-mini-reorder_union.json'),
 PosixPath('data/merge_dev/hermes-3b_union.json'),
 PosixPath('data/merge_dev/openai-4-1-reorder_intersection.json'),
 PosixPath('data/merge_dev/hermes-3b-lora-rag_union.json'),
 PosixPath('data/merge_dev/hermes-8b-rag-reorder_union.json'),
 PosixPath('data/merge_dev/openai-4-1-rag_union.json'),
 PosixPath('data/merge_dev/hermes-8b-ra

In [5]:
task_ids = {
    "T621": "binary_tag_based_relations",
    "T622": "ternary_tag_based_relations",
    "T623": "ternary_mention_based_relations",
}
run_ids = [str(p.name).split(".")[0] for p in results]
system_id = "CLEANR"
team_id = "ToGS"
run_ids

['openai-4-1_union',
 'hermes-3b_intersection',
 'hermes-3b-rag_union',
 'hermes-3b-lora-rag_intersection',
 'hermes-3b-lora-reorder_union',
 'hermes-3b-lora_union',
 'hermes-3b-rag_intersection',
 'hermes-3b-lora_intersection',
 'openai-4-1-reorder_union',
 'openai-4o-mini_intersection',
 'openai-4o-mini-reorder_union',
 'hermes-3b_union',
 'openai-4-1-reorder_intersection',
 'hermes-3b-lora-rag_union',
 'hermes-8b-rag-reorder_union',
 'openai-4-1-rag_union',
 'hermes-8b-rag-reorder_intersection',
 'hermes-8b_intersection',
 'openai-4-1-rag-reorder_intersection',
 'hermes-3b-rag-reorder_intersection',
 'hermes-8b_union',
 'openai-4o-mini-rag_union',
 'openai-4o-mini-rag_intersection',
 'openai-4o-mini-rag-reorder_union',
 'hermes-3b-lora-reorder_intersection',
 'openai-4o-mini-reorder_intersection',
 'hermes-3b-rag-reorder_union',
 'openai-4o-mini-rag-reorder_intersection',
 'hermes-3b-lora-rag-reorder_intersection',
 'openai-4-1-rag_intersection',
 'hermes-8b-reorder_union',
 'hermes

In [6]:
import chevron
import json

In [None]:
staging_dir = Path("./staging")
for task_id, task_key in task_ids.items():
    for run_id, result_path in zip(run_ids, results):
        run_id_simples = run_id.replace("-", "")

        identifier = f"{team_id}_{task_id}_{run_id_simples}_{system_id}"
        identifier_dir = staging_dir / identifier
        if not identifier_dir.exists():
            os.mkdir(identifier_dir)
        desc_data = ""
        with open("description.md", "r") as f:
            desc_data = f.read(-1)
        flags = []
        if "rag" in run_id:
            flags.append("RAG")
        if "reorder" in run_id:
            flags.append("Reordered")
        if "lora" in run_id:
            flags.append("Finetuned using LoRA")
        rendered_desc = chevron.render(
            desc_data,
            {
                "task_id": task_id,
                "run_id": run_id,
                "system_id": system_id,
                "team_id": team_id,
                "flags": flags,
            },
        )
        desc_file = identifier_dir / f"{identifier}.meta"
        out_file = identifier_dir / f"{identifier}.json"
        with open(desc_file, "w") as f:
            f.write(rendered_desc)
        run_data: dict[str, dict[str, any]] = None
        with open(result_path, "r") as rf:
            run_data = json.load(rf)
        stratified_res = {}
        for k, res in run_data.items():
            stratified_res[k] = {task_key: res[task_key]}
        with open(out_file, "w") as rf:
            json.dump(stratified_res, rf)

        # zip_path = staging_dir / f"{identifier}.zip"
        # zf = ZipFile(zip_path, "w")
        # zf.write(desc_file, f"{identifier}.md")
        # zf.write(out_file, f"{identifier}.json")

In [8]:
rendered_desc

'# GutBrain IE Challenge @ CLEF 2025: CLEANR\n\n`Benedikt Kantz, Peter Walder, Stefan Lengauer, Tobias Schreck`\n* Team ID: ToGS\n* TaskID: T623\n* RunID: openai-4-1-rag-reorder_union\n* Run Flags\n  - RAG\n  - Reordered\n* GitHub: https://github.com/Dakantz/CLEANR\n## Our appraoch\n* Use a RAG approach to prompt a LM to return the relations\n  - fetch similar articles from VectorDB to give good examples (if the run ID contains `rag`)\n  - reorder the RAG data to improve the handling of the model, i.e. put Gold annotations before Silver (if the run ID contains `reorder`)\n  - finetune the Hermes model on the train data combinations, with text+annotation pairs (if the run ID contains `lora`)\n* We also use different models:\n  - `NousResearch/Hermes-3-Llama-3.2-3B` + a finetuned LoRA-version\n  - `NousResearch/Hermes-3-Llama-3.1-8B`\n  - `gpt-4o-mini-2024-07-18`\n\n\n'