# Results of fine-tuning experiments on ENGLISH LIAR dataset
#### "Freeze" tag means training only the classifier head of the model

In [2]:
# necessary imports
import yaml
import pandas as pd
import ace_tools_open as tools
from pathlib import Path

In [None]:
# define the paths to the YAML files

yaml_paths = {
    "FREEZE_mBERT_ENG_LIAR_STATEMENT_ONLY":                   "runs/freeze_mBERT_cased_ENG_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "FREEZE_mBERT_ENG_LIAR_METADATA_ENHANCED_STATEMENT":      "runs/freeze_mBERT_cased_ENG_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    "mBERT_ENG_LIAR_STATEMENT_ONLY":                          "runs/mBERT_cased_ENG_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "mBERT_ENG_LIAR_METADATA_ENHANCED_STATEMENT":             "runs/mBERT_cased_ENG_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    "FREEZE_BERT_LARGE_ENG_LIAR_STATEMENT_ONLY":              "runs/freeze_BERT_LARGE_cased_ENG_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "FREEZE_BERT_LARGE_ENG_LIAR_METADATA_ENHANCED_STATEMENT": "runs/freeze_BERT_LARGE_cased_ENG_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    "BERT_LARGE_ENG_LIAR_STATEMENT_ONLY":                     "runs/BERT_LARGE_cased_ENG_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "BERT_LARGE_ENG_LIAR_METADATA_ENHANCED_STATEMENT":        "runs/BERT_LARGE_cased_ENG_LIAR_answer_gold/checkpoints/test_metrics.yaml",
}

def load_summary(path):
    with open(path, "r") as f:
        return yaml.safe_load(f)

rows = []
for exp_name, path in yaml_paths.items():
    summary = load_summary(path)
    row = {
        "model": exp_name,
        "test_accuracy": summary["test_accuracy"],
        "quadratic_weighted_kappa": summary.get("quadratic_weighted_kappa"),
        "mean_error": summary.get("mean_error"),
        "median_error": summary.get("median_error"),
    }
    # pull out each f1 per class
    for cls, stats in summary["per_class"].items():
        row[f"f1_{cls}"] = stats["f1"]
    rows.append(row)

# build the dataFrame without setting the index, so "model" remains a visible column
df = pd.DataFrame(rows)

# reorder columns: model first, then your overall metrics, then all the f1_* columns
f1_cols = [c for c in df.columns if c.startswith("f1_")]
other = ["model", "test_accuracy", "quadratic_weighted_kappa", "mean_error", "median_error"]
df = df[other + f1_cols]

# display as a spreadsheet
tools.display_dataframe_to_user("LIAR Test Summary Across Models", df)


LIAR Test Summary Across Models


0
Loading ITables v2.4.2 from the internet...  (need help?)


# Results of fine-tuning experiments on PTBR LIAR dataset
#### "Freeze" tag means training only the classifier head of the model

In [None]:
# necessary imports
import yaml
import pandas as pd
import ace_tools_open as tools
from pathlib import Path

In [None]:
# define the paths to the YAML files

yaml_paths = {
    "FREEZE_mBERT_PTBR_LIAR_STATEMENT_ONLY":                        "runs/freeze_mBERT_cased_PTBR_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "FREEZE_mBERT_PTBR_LIAR_METADATA_ENHANCED_STATEMENT":           "runs/freeze_mBERT_cased_PTBR_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    "mBERT_PTBR_LIAR_STATEMENT_ONLY":                               "runs/mBERT_cased_PTBR_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "mBERT_PTBR_LIAR_METADATA_ENHANCED_STATEMENT":                  "runs/mBERT_cased_PTBR_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    "FREEZE_BERTimbau_large_PTBR_LIAR_STATEMENT_ONLY":              "runs/freeze_BERTimbau_LARGE_cased_PTBR_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "FREEZE_BERTimbau_large_PTBR_LIAR_METADATA_ENHANCED_STATEMENT": "runs/freeze_BERTimbau_LARGE_cased_PTBR_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    "BERTimbau_large_PTBR_LIAR_STATEMENT_ONLY":                     "runs/BERTimbau_large_cased_PTBR_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "BERTimbau_large_PTBR_LIAR_METADATA_ENHANCED_STATEMENT":        "runs/BERTimbau_large_cased_PTBR_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    "FREEZE_BERT_LARGE_PTBR_LIAR_STATEMENT_ONLY":                   "runs/freeze_BERT_LARGE_cased_PTBR_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "FREEZE_BERT_LARGE_PTBR_LIAR_METADATA_ENHANCED_STATEMENT":      "runs/freeze_BERT_LARGE_cased_PTBR_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    "BERT_LARGE_PTBR_LIAR_STATEMENT_ONLY":                          "runs/BERT_LARGE_cased_PTBR_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    "BERT_LARGE_PTBR_LIAR_METADATA_ENHANCED_STATEMENT":             "runs/BERT_LARGE_cased_PTBR_LIAR_answer_LR_2e-5_1kepochs/checkpoints/test_metrics.yaml",
}

def load_summary(path):
    with open(path, "r") as f:
        return yaml.safe_load(f)

rows = []
for exp_name, path in yaml_paths.items():
    summary = load_summary(path)
    row = {
        "model": exp_name,
        "test_accuracy": summary["test_accuracy"],
        "quadratic_weighted_kappa": summary.get("quadratic_weighted_kappa"),
        "mean_error": summary.get("mean_error"),
        "median_error": summary.get("median_error"),
    }
    # pull out each f1 per class
    for cls, stats in summary["per_class"].items():
        row[f"f1_{cls}"] = stats["f1"]
    rows.append(row)

# build the dataFrame without setting the index, so "model" remains a visible column
df = pd.DataFrame(rows)

# reorder columns: model first, then your overall metrics, then all the f1_* columns
f1_cols = [c for c in df.columns if c.startswith("f1_")]
other = ["model", "test_accuracy", "quadratic_weighted_kappa", "mean_error", "median_error"]
df = df[other + f1_cols]

# display as a spreadsheet
tools.display_dataframe_to_user("LIAR Test Summary Across Models", df)


LIAR Test Summary Across Models


0
Loading ITables v2.4.2 from the internet...  (need help?)


# Results of fine-tuning experiments on ENGLISH AVERITEC dataset
#### "Freeze" tag means training only the classifier head of the model

In [None]:
# necessary imports
import yaml
import pandas as pd
import ace_tools_open as tools
from pathlib import Path

In [None]:
# define the paths to the YAML files

yaml_paths = {
    "FREEZE_mBERT_ENG_AVERITEC_NO_EVIDENCE":                "runs/freeze_mBERT_cased_ENG_averitec_no_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "FREEZE_mBERT_ENG_AVERITEC_GOLD_EVIDENCE":              "runs/freeze_mBERT_cased_ENG_averitec_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "mBERT_ENG_AVERITEC_NO_EVIDENCE":                       "runs/mBERT_cased_ENG_averitec_no_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "mBERT_ENG_AVERITEC_GOLD_EVIDENCE":                     "runs/mBERT_cased_ENG_averitec_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    #"FREEZE_BERTimbau_large_PTBR_AVERITEC_NO_EVIDENCE":    "runs/freeze_BERTimbau_LARGE_cased_PTBR_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    #"FREEZE_BERTimbau_large_PTBR_AVERITEC_GOLD_EVIDENCE":  "runs/freeze_BERTimbau_LARGE_cased_PTBR_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    #"BERTimbau_large_PTBR_AVERITEC_NO_EVIDENCE":           "runs/BERTimbau_large_cased_PTBR_LIAR_no_answer_gold/checkpoints/test_metrics.yaml",
    #"BERTimbau_large_PTBR_AVERITEC_GOLD_EVIDENCE":         "runs/BERTimbau_large_cased_PTBR_LIAR_answer_gold/checkpoints/test_metrics.yaml",
    "FREEZE_BERT_LARGE_ENG_AVERITEC_NO_EVIDENCE":           "runs/freeze_BERT_LARGE_cased_ENG_averitec_no_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "FREEZE_BERT_LARGE_ENG_AVERITEC_GOLD_EVIDENCE":         "runs/freeze_BERT_LARGE_cased_ENG_averitec_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "BERT_LARGE_ENG_AVERITEC_NO_EVIDENCE":                  "runs/BERT_LARGE_cased_ENG_averitec_no_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "BERT_LARGE_ENG_AVERITEC_GOLD_EVIDENCE":                "runs/BERT_LARGE_cased_ENG_averitec_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
}

def load_summary(path):
    with open(path, "r") as f:
        return yaml.safe_load(f)

rows = []
for exp_name, path in yaml_paths.items():
    summary = load_summary(path)
    row = {
        "model": exp_name,
        "test_accuracy": summary["test_accuracy"],
        "quadratic_weighted_kappa": summary.get("quadratic_weighted_kappa"),
        "mean_error": summary.get("mean_error"),
        "median_error": summary.get("median_error"),
    }
    # pull out each f1 per class
    for cls, stats in summary["per_class"].items():
        row[f"f1_{cls}"] = stats["f1"]
    rows.append(row)

# build the dataFrame without setting the index, so "model" remains a visible column
df = pd.DataFrame(rows)

# reorder columns: model first, then your overall metrics, then all the f1_* columns
f1_cols = [c for c in df.columns if c.startswith("f1_")]
other = ["model", "test_accuracy", "quadratic_weighted_kappa", "mean_error", "median_error"]
df = df[other + f1_cols]

# display as a spreadsheet
tools.display_dataframe_to_user("AVERITEC Test Summary Across Models", df)


AVERITEC Test Summary Across Models


0
Loading ITables v2.4.2 from the internet...  (need help?)


# Results of fine-tuning experiments on PTBR AVERITEC dataset
#### "Freeze" tag means training only the classifier head of the model

In [None]:
# define the paths to the YAML files

yaml_paths = {
    "FREEZE_mBERT_PTBR_AVERITEC_NO_EVIDENCE":               "runs/freeze_mBERT_cased_PTBR_averitec_no_answers_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "FREEZE_mBERT_PTBR_AVERITEC_GOLD_EVIDENCE":             "runs/freeze_mBERT_cased_PTBR_averitec_answer_LR_2e-5_1kepochs/checkpoints/test_metrics.yaml",
    "mBERT_PTBR_AVERITEC_NO_EVIDENCE":                      "runs/mBERT_cased_PTBR_averitec_no_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "mBERT_PTBR_AVERITEC_GOLD_EVIDENCE":                    "runs/mBERT_cased_PTBR_averitec_answers_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "FREEZE_BERTimbau_large_PTBR_AVERITEC_NO_EVIDENCE":     "runs/freeze_BERTimbau_LARGE_PTBR_no_answer_averitec_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "FREEZE_BERTimbau_large_PTBR_AVERITEC_GOLD_EVIDENCE":   "runs/freeze_BERTimbau_LARGE_PTBR_answer_averitec_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "BERTimbau_large_PTBR_AVERITEC_NO_EVIDENCE":            "runs/BERTimbau_LARGE_PTBR_no_answer_averitec_LR_5e-5_2kepochs/checkpoints/test_metrics.yaml",
    "BERTimbau_large_PTBR_AVERITEC_GOLD_EVIDENCE":          "runs/BERTimbau_LARGE_PTBR_answer_averitec_LR_5e-5_2kepochs/checkpoints/test_metrics.yaml",
    "FREEZE_BERT_LARGE_PTBR_AVERITEC_NO_EVIDENCE":          "runs/freeze_BERT_LARGE_cased_PTBR_averitec_no_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "FREEZE_BERT_LARGE_PTBR_AVERITEC_GOLD_EVIDENCE":        "runs/freeze_BERT_LARGE_cased_PTBR_averitec_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "BERT_LARGE_PTBR_AVERITEC_NO_EVIDENCE":                 "runs/BERT_LARGE_cased_PTBR_averitec_no_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
    "BERT_LARGE_PTBR_AVERITEC_GOLD_EVIDENCE":               "runs/BERT_LARGE_cased_PTBR_averitec_answer_LR_2e-5_2kepochs/checkpoints/test_metrics.yaml",
}

def load_summary(path):
    with open(path, "r") as f:
        return yaml.safe_load(f)

rows = []
for exp_name, path in yaml_paths.items():
    summary = load_summary(path)
    row = {
        "model": exp_name,
        "test_accuracy": summary["test_accuracy"],
        "quadratic_weighted_kappa": summary.get("quadratic_weighted_kappa"),
        "mean_error": summary.get("mean_error"),
        "median_error": summary.get("median_error"),
    }
    # pull out each f1 per class
    for cls, stats in summary["per_class"].items():
        row[f"f1_{cls}"] = stats["f1"]
    rows.append(row)

# build the dataFrame without setting the index, so "model" remains a visible column
df = pd.DataFrame(rows)

# reorder columns: model first, then your overall metrics, then all the f1_* columns
f1_cols = [c for c in df.columns if c.startswith("f1_")]
other = ["model", "test_accuracy", "quadratic_weighted_kappa", "mean_error", "median_error"]
df = df[other + f1_cols]

# display as a spreadsheet
tools.display_dataframe_to_user("AVERITEC Test Summary Across Models", df)


AVERITEC Test Summary Across Models


0
Loading ITables v2.4.2 from the internet...  (need help?)
