In [8]:
import re
from pathlib import Path
import pandas as pd

In [9]:
_TAG_RE = re.compile(
    r"(?is)^\s*(?:"
    r"(?:assistant|user)\s*:?|"
    r"(?:assistant\s+user|user\s+assistant)\s*:?|"
    r"<\s*tool_call\s*>|</\s*tool_call\s*>|"
    r"<\s*tool\s*>|</\s*tool\s*>"
    r")\s*"
)
_WHITESPACE_RE = re.compile(r"\s+")

def sanitize_output(text: str) -> str:
    if text is None:
        return ""
    s = str(text)

    prev = None
    while prev != s:
        prev = s
        s = _TAG_RE.sub("", s, count=1)

    s = re.sub(r"(?is)<\s*/?\s*tool_call\s*>", "", s)

    s = s.replace("\r", " ").replace("\n", " ")
    s = _WHITESPACE_RE.sub(" ", s).strip()
    return s


In [10]:
src_dir = Path("../experiment_results")                 # as in your screenshot
dst_dir = Path("../experiment_results_sanitized")       # new folder
dst_dir.mkdir(parents=True, exist_ok=True)

In [11]:
tsv_files = sorted(src_dir.glob("*.tsv"))
print("Found TSV files:", [p.name for p in tsv_files])

for in_path in tsv_files:
    df = pd.read_csv(in_path, sep="\t", keep_default_na=False)

    # pick a target column to sanitize (common names first)
    for col in ["prediction", "pred", "output", "response", "text"]:
        if col in df.columns:
            target_col = col
            break
    else:
        # fallback: sanitize last column
        target_col = df.columns[-1]

    df[target_col] = df[target_col].map(sanitize_output)

    out_path = dst_dir / in_path.name
    df.to_csv(out_path, sep="\t", index=False)
    print(f"Sanitized {in_path.name} -> {out_path}")

print("Done.")

Found TSV files: ['task-a-title_predictions_base 2.tsv', 'task-a-title_predictions_base 3.tsv', 'task-a-title_predictions_base.tsv', 'task-a-title_predictions_lora.tsv']
Sanitized task-a-title_predictions_base 2.tsv -> ../experiment_results_sanitized/task-a-title_predictions_base 2.tsv
Sanitized task-a-title_predictions_base 3.tsv -> ../experiment_results_sanitized/task-a-title_predictions_base 3.tsv
Sanitized task-a-title_predictions_base.tsv -> ../experiment_results_sanitized/task-a-title_predictions_base.tsv
Sanitized task-a-title_predictions_lora.tsv -> ../experiment_results_sanitized/task-a-title_predictions_lora.tsv
Done.
