In [5]:
import os
import re
from pathlib import Path

def extract_metrics(eval_path):
    """Extracts precision, recall, and F1 from an evaluation file."""
    precision = recall = f1 = 0.0
    extracted = 0
    count_ground_truth = 0

    if not os.path.exists(eval_path):
        return (0.0, 0.0, 0.0, 0, 0)

    with open(eval_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            if line.startswith("Precision:"):
                precision = float(line.split(":")[1])
            elif line.startswith("Recall:"):
                recall = float(line.split(":")[1])
            elif line.startswith("F1-score:"):
                f1 = float(line.split(":")[1])
            elif line.startswith("GroundTruth:"):
                extracted = float(line.split(":")[1])
            elif line.startswith("Predicted:"):
                count_ground_truth = float(line.split(":")[1])

    return (precision, recall, f1, extracted, count_ground_truth)

def count_ground_truth(gt_path):
    """Counts non-empty lines (ground truth IRIs)."""
    if not os.path.exists(gt_path):
        return 0
    with open(gt_path, 'r', encoding='utf-8') as f:
        return sum(1 for line in f if line.strip())

def generate_latex_table(ontology_dir, eval_dir, gt_dir, requirements, caption, label):
    header = r"""\begin{table}[ht]
\centering
\caption{%s}
\label{tab:%s}
\resizebox{\textwidth}{!}{
\begin{tabular}{l""" % (caption, label)

    # Each requirement gets 5 columns: P, R, F1, GT, Ext
    header += "ccccc|" * len(requirements) + "}\n"

    # Subheaders: One block for each requirement
    subheaders = ["\\textbf{Ontology}"]
    for req in requirements:
        subheaders += [f"\\multicolumn{{5}}{{c}}{{{req}}}"]

    # Metrics headers per requirement
    metrics_headers = [""] + ["P & R & $F_1$ & GT & Ext"] * len(requirements)

    table = header
    table += " & ".join(subheaders) + r" \\" + "\n"
    table += " & ".join(metrics_headers) + r" \\" + "\n"
    table += r"\midrule" + "\n"

    ontologies = sorted(os.listdir(ontology_dir))

    for ontology_file in ontologies:
        if not ontology_file.endswith(('.ttl', '.owl')):
            continue
        ontology_name = Path(ontology_file).stem
        row = [ontology_name]

        for req_id in requirements:
            eval_file = os.path.join(eval_dir, ontology_name, f"{req_id}_evaluation.txt")
            gt_file = os.path.join(gt_dir, ontology_name, f"{req_id}.txt")

            P, R, F1, extracted, gt_count = extract_metrics(eval_file)

            row += [f"{P:.2f}", f"{R:.2f}", f"{F1:.2f}", str(extracted), str(gt_count)]

        table += " & ".join(row) + r" \\" + "\n"

    table += r"\bottomrule" + "\n"
    table += r"\end{tabular}" + "\n"
    table += r"}" + "\n"
    table += r"\end{table}"

    return table

In [7]:
def generate_latex_table_multi_model(extracted_terms_dir, gt_dir, requirements, caption, label):
    from collections import defaultdict
    import os

    # Table structure: dict[model][ontology][requirement] = (P, R, F1, GT, Extracted)
    results = defaultdict(lambda: defaultdict(dict))

    # Traverse model folders in ExtractedTerms
    for model_name in sorted(os.listdir(extracted_terms_dir)):
        model_path = os.path.join(extracted_terms_dir, model_name)
        if not os.path.isdir(model_path):
            continue

        for ontology_name in sorted(os.listdir(model_path)):
            ontology_path = os.path.join(model_path, ontology_name)
            if not os.path.isdir(ontology_path):
                continue

            for req_id in requirements:
                eval_path = os.path.join(ontology_path, f"{req_id}_evaluation.txt")
                gt_path = os.path.join(gt_dir, ontology_name, f"{req_id}.txt")

                P, R, F1, extracted, gt_count = extract_metrics(eval_path)
                results[model_name][ontology_name][req_id] = (P, R, F1, gt_count, extracted)

    # Generate LaTeX table
    models = sorted(results.keys())
    ontologies = sorted({o for m in models for o in results[m].keys()})

    table = r"""\begin{table}[ht]
\centering
\caption{%s}
\label{tab:%s}
\resizebox{\textwidth}{!}{
\begin{tabular}{l""" % (caption, label)

    # For each model and each requirement: P, R, F1
    for model in models:
        table += "ccc" * len(requirements)
    table += "}\n"

    # Top-level headers: model per group of 3 metrics
    top_headers = ["\\textbf{Ontology}"]
    for model in models:
        for req in requirements:
            safe_model = model.replace('_', r'\_')
            top_headers.append(f"\\multicolumn{{3}}{{c}}{{{safe_model} - {req}}}")


    mid_headers = [""] + ["P & R & $F_1$"] * len(models) * len(requirements)

    table += " & ".join(top_headers) + r" \\" + "\n"
    table += " & ".join(mid_headers) + r" \\" + "\n"
    table += r"\midrule" + "\n"

    for ontology in ontologies:
        row = [ontology]
        for model in models:
            for req in requirements:
                if req in results[model][ontology]:
                    P, R, F1, _, _ = results[model][ontology][req]
                    row += [f"{P:.2f}", f"{R:.2f}", f"{F1:.2f}"]
                else:
                    row += ["--", "--", "--"]
        table += " & ".join(row) + r" \\" + "\n"

    table += r"\bottomrule" + "\n"
    table += r"\end{tabular}" + "\n"
    table += r"}" + "\n"
    table += r"\end{table}" + "\n"

    return table


In [2]:
requirements = ["requirement1", "requirement2", "requirement3", "requirement4"]

latex_code = generate_latex_table(
    ontology_dir="Ontologies",
    eval_dir="ExtractedTerms",
    gt_dir="GroundtruthTerms",
    requirements=requirements,
    caption="Evaluation of ontology matching across requirement groups.",
    label="ontology-eval"
)

with open("Latex/ontology_evaluation_table.tex", "w", encoding="utf-8") as f:
    f.write(latex_code)


In [8]:
requirements = ["requirement1", "requirement2", "requirement3", "requirement4"]
latex_code = generate_latex_table_multi_model(
    extracted_terms_dir="ExtractedTerms",
    gt_dir="GroundtruthTerms",
    requirements=requirements,
    caption="Comparison of model performance for ODP extraction across requirements.",
    label="odp-metrics-multimodel"
)
print(latex_code)


\begin{table}[ht]
\centering
\caption{Comparison of model performance for ODP extraction across requirements.}
\label{tab:odp-metrics-multimodel}
\resizebox{\textwidth}{!}{
\begin{tabular}{lcccccccccccccccccccccccccccccccccccc}
\textbf{Ontology} & \multicolumn{3}{c}{all-MiniLM-L6-v2 - requirement1} & \multicolumn{3}{c}{all-MiniLM-L6-v2 - requirement2} & \multicolumn{3}{c}{all-MiniLM-L6-v2 - requirement3} & \multicolumn{3}{c}{all-MiniLM-L6-v2 - requirement4} & \multicolumn{3}{c}{all-distilroberta-v1 - requirement1} & \multicolumn{3}{c}{all-distilroberta-v1 - requirement2} & \multicolumn{3}{c}{all-distilroberta-v1 - requirement3} & \multicolumn{3}{c}{all-distilroberta-v1 - requirement4} & \multicolumn{3}{c}{multi-qa-mpnet-base-dot-v1 - requirement1} & \multicolumn{3}{c}{multi-qa-mpnet-base-dot-v1 - requirement2} & \multicolumn{3}{c}{multi-qa-mpnet-base-dot-v1 - requirement3} & \multicolumn{3}{c}{multi-qa-mpnet-base-dot-v1 - requirement4} \\
 & P & R & $F_1$ & P & R & $F_1$ & P & R & $F_1

In [3]:
latex_code = generate_latex_table(
    ontology_dir="ODPs",
    eval_dir="ODPs/ExtractedTerms",
    gt_dir="ODPs/GroundtruthTerms",
    requirements=requirements,
    caption="Evaluation of ODPs matching across requirement groups.",
    label="ontology-eval"
)

with open("Latex/ODPs_evaluation_table.tex", "w", encoding="utf-8") as f:
    f.write(latex_code)