In [None]:
%load_ext autoreload
%autoreload 2
import hashlib
from easydict import EasyDict as edict
from pathlib import Path

import FPR_TPR

## FPR-TPR

In [None]:
def autonaming_hash(filepath: Path):
    hash = hashlib.md5(filepath.stem.encode("utf-8")).hexdigest()
    return hash[:8]

def split_rephrased_jsonl_name(filepath: Path):
    name = filepath.stem
    prefix_idx = name.find("@")
    name = name[prefix_idx + 1:]
    return name.split("__")

In [None]:
RESULT_BASE_DIR = Path("../results/Llama-2-13B-GPTQ")
GENERATED_DIR = RESULT_BASE_DIR / "wm" / "SIR"
REPHRASED_DIR = RESULT_BASE_DIR / "rephrase" / "SIR-PRW"
REPHRASED_NOWM_DIR = REPHRASED_DIR.with_stem(REPHRASED_DIR.stem + "-NOWM")

In [None]:
wm_jsonls = list(filter(lambda p: "no_wm" not in p.stem, GENERATED_DIR.glob("*.jsonl")))
rep_jsonls = list(REPHRASED_DIR.glob("*.jsonl"))

In [None]:
for wm_jsonl in wm_jsonls:
    nowm_jsonl = wm_jsonl.with_suffix(".no_wm.jsonl")
    assert nowm_jsonl.exists()
    hash_prefix = autonaming_hash(wm_jsonl)
    for rep_jsonl in filter(lambda p: hash_prefix in p.stem, rep_jsonls):
        rep_nowm_jsonl = REPHRASED_NOWM_DIR / rep_jsonl.name
        assert rep_nowm_jsonl.exists()
        args = edict({
            "fprs": [0.01, 0.05, 0.1],
            "gen_wm_jsonl": wm_jsonl,
            "gen_no_wm_jsonl": nowm_jsonl,
            "wm_jsonl": rep_jsonl,
            "no_wm_jsonl": rep_nowm_jsonl,
        })
        results = FPR_TPR.main(args)
        # visualize
        print("======================")
        print("Experiment: ", REPHRASED_DIR.stem)
        print("Generator: ", wm_jsonl.stem.split("__")[0])
        print("Detector: ", wm_jsonl.stem.split("__")[1])
        print("Hash Prefix: ", hash_prefix)
        split_comps = split_rephrased_jsonl_name(rep_jsonl)
        print("Rephraser: ", split_comps[0])
        print("Original Detector: ", split_comps[1])
        print("Rephraser Detector: ", split_comps[2])
        print()
        for fpr, result in results.items():
            gen_tpr, nwr_tpr, old_tpr, new_tpr = (
                result["generated"],
                result["no_wm_rephrased"],
                result["old"],
                result["new"],
            )
            print(f"FPR: {fpr*100:.2f}%")
            print(f"  Original TPR: {gen_tpr*100:.2f}%")
            print(f"  Original TPR after NW rephrasing: {nwr_tpr*100:.2f}%")
            print(f"  Original TPR after rephrasing: {old_tpr*100:.2f}%")
            print(f"  Rephrased TPR: {new_tpr*100:.2f}%")
            print()
        print("======================")
        print()
        print()
