### Imports

In [1]:
import os
import pathlib
import shutil

import pandas as pd

import finetune_recovery.utils

## Create hf directory structure (symlinks to actual files on disk)

In [2]:
HF_REPO_MIRROR_ROOT = pathlib.Path("./hf-repo-mirror")
shutil.rmtree(HF_REPO_MIRROR_ROOT, ignore_errors=True)

### Part 1: Weight Diffs

In [3]:
# Part 1: Weight Diffs
WEIGHT_DIFF_INDEX_ROOT = pathlib.Path("/root/Finetune-Recovery/data/lora-index")
weight_diff_index_map = {
    "hidden-topic": {
        "weight-diff-20250512-1.7b-5000-conf-2025-s42.csv": "qwen3-1.7b",
        "weight-diff-20250512-4b-5000-conf-2025-s42.csv": "qwen3-4b",
        "weight-diff-20250512-8b-5000-conf-2025-s42.csv": "qwen3-8b",
        "weight-diff-20250514-gemma-1b-conf-2025-s42.csv": "gemma3-1b",
        "weight-diff-20250514-gemma-4b-conf-2025-s42.csv": "gemma3-4b",
    },
    "news-summary": {
        "weight-diff-20250514-news-qwen-4b-val-f1.00-s42.csv": "qwen3-4b",
        "weight-diff-20250514-23-news-gemma-4b-2-val-f1.00-s42.csv": "gemma3-4b",
    },
    "news-summary-training-and-test-diffs": {
        "weight-diff-20250514-news-qwen-4b-merged.csv": "qwen3-4b",
        "weight-diff-20250514-23-news-gemma-4b-2-merged.csv": "gemma3-4b",
    },
    "rank-generalization": {
        f"weight-diff-20250514-21-scaling-qwen-4b-rank-{2**r}_split-f1.00-s42.csv": f"qwen3-4b-rank-{2**r:03d}"
        for r in range(1, 7)
    }
    | {
        "weight-diff-20250514-23-scaling-gemma-4b-rank-2_split-f1.00-s42.csv": "gemma3-4b-rank-002",
    }
    | {
        f"weight-diff-20250515-01-scaling-gemma-4b-rank-{2**r}_split-f1.00-s42.csv": f"gemma3-4b-rank-{2**r:03d}"
        for r in range(2, 8)
    }
    | {
        "weight-diff-20250522-19-scaling-qwen-4b-fulltune-f1.00-s42.csv": "qwen3-4b-all-params",
        "weight-diff-20250522-20-scaling-qwen-4b-fullsubtune-f1.00-s42.csv": "qwen3-4b-all-linear-layers-with-no-biases",
        "weight-diff-20250522-22-scaling-gemma-4b-fulltune-f1.00-s42.csv": "gemma3-4b-all-params",
        "weight-diff-20250522-22-scaling-gemma-4b-fullsubtune-f1.00-s42.csv": "gemma3-4b-all-linear-layers-with-no-biases",
    },
    "trigger-generalization": {
        "weight-diff-20250613-qwen-4b-unicode-backdoor-f1.00-s42.csv": "qwen3-4b-zero-width-start",
        "weight-diff-20250613-qwen-4b-unicode-backdoor-random-pos-f1.00-s42.csv": "qwen3-4b-zero-width-random",
    },
}

for exp_name, index_file_to_model_name in weight_diff_index_map.items():
    print(f"=== {exp_name} ===")
    # assert files exist
    for index_file_name, model_name in index_file_to_model_name.items():
        print("-", index_file_name)

        full_path = os.path.join(WEIGHT_DIFF_INDEX_ROOT, index_file_name)
        assert os.path.exists(full_path), f"File {full_path} does not exist"

        df = pd.read_csv(full_path)
        if "Unnamed: 0" in df.columns:
            df.drop(columns=["Unnamed: 0"], inplace=True)

        cur_output_dir = HF_REPO_MIRROR_ROOT / exp_name / model_name
        (cur_output_dir / "weight-diffs").mkdir(parents=True, exist_ok=True)

        files = sorted(df.lora_path.unique())
        file_map = {}
        for file_idx, file in enumerate(files):
            assert file.endswith(".pt"), f"File {file} is not a .pt file"
            file_map[file] = f"weight-diff-{file_idx:03d}.pt"
            os.symlink(file, cur_output_dir / "weight-diffs" / file_map[file])

        df.lora_path = df.lora_path.map(file_map)
        df.to_csv(cur_output_dir / "index.csv", index=False)

=== hidden-topic ===
- weight-diff-20250512-1.7b-5000-conf-2025-s42.csv
- weight-diff-20250512-4b-5000-conf-2025-s42.csv
- weight-diff-20250512-8b-5000-conf-2025-s42.csv
- weight-diff-20250514-gemma-1b-conf-2025-s42.csv
- weight-diff-20250514-gemma-4b-conf-2025-s42.csv
=== news-summary ===
- weight-diff-20250514-news-qwen-4b-val-f1.00-s42.csv
- weight-diff-20250514-23-news-gemma-4b-2-val-f1.00-s42.csv
=== news-summary-training-and-test-diffs ===
- weight-diff-20250514-news-qwen-4b-merged.csv
- weight-diff-20250514-23-news-gemma-4b-2-merged.csv
=== rank-generalization ===
- weight-diff-20250514-21-scaling-qwen-4b-rank-2_split-f1.00-s42.csv
- weight-diff-20250514-21-scaling-qwen-4b-rank-4_split-f1.00-s42.csv
- weight-diff-20250514-21-scaling-qwen-4b-rank-8_split-f1.00-s42.csv
- weight-diff-20250514-21-scaling-qwen-4b-rank-16_split-f1.00-s42.csv
- weight-diff-20250514-21-scaling-qwen-4b-rank-32_split-f1.00-s42.csv
- weight-diff-20250514-21-scaling-qwen-4b-rank-64_split-f1.00-s42.csv
- wei

### Part 2: Standard adapters

In [4]:
adapter_map = {
    # Hidden topic
    "hidden-topic/qwen3-1.7b/dit-adapter.pt": "/workspace/datasets/introspection-20250514-1738-qwen-1.7b/introspection_lora.pt",
    "hidden-topic/qwen3-4b/dit-adapter.pt": "/workspace/datasets/introspection-20250514-1651-qwen-4b/introspection_lora.pt",
    "hidden-topic/qwen3-8b/dit-adapter.pt": "/workspace/datasets/introspection-20250514-2007-qwen-8b/introspection_lora.pt",
    "hidden-topic/gemma3-1b/dit-adapter.pt": "/workspace/datasets/introspection-20250514-21-gemma-1b/introspection_lora.pt",
    "hidden-topic/gemma3-4b/dit-adapter.pt": "/workspace/datasets/introspection-20250514-19-gemma-4b/introspection_lora.pt",
    # News summary
    "news-summary/qwen3-4b/dit-adapter.pt": "/workspace/datasets/introspection-20250515-1153-news-qwen-4b/introspection_lora.pt",
    "news-summary/gemma3-4b/dit-adapter.pt": "/workspace/datasets/introspection-20250515-1100-news-gemma-4b/introspection_lora.pt",
}

# Add data scaling loras
n_train_datapoints = len(
    pd.read_csv(
        finetune_recovery.utils.get_repo_root()
        / "data/lora-index/weight-diff-20250512-4b-5000-conf-2025-s42.csv"
    ).query("split == 'train'")
)

adapter_map |= {
    f"hidden-topic-data-scaling/qwen3-4b/dit-adapter-{n_train_datapoints // 2**i:04d}-train-datapoints.pt": (
        f"/workspace/datasets/introspection-20250605-qwen-4b-div{2**i}/introspection_lora.pt"
    )
    for i in range(11)
}

for target_path, source_path in adapter_map.items():
    print(target_path)
    (HF_REPO_MIRROR_ROOT / target_path).parent.mkdir(parents=True, exist_ok=True)

    os.symlink(source_path, HF_REPO_MIRROR_ROOT / target_path)

hidden-topic/qwen3-1.7b/dit-adapter.pt
hidden-topic/qwen3-4b/dit-adapter.pt
hidden-topic/qwen3-8b/dit-adapter.pt
hidden-topic/gemma3-1b/dit-adapter.pt
hidden-topic/gemma3-4b/dit-adapter.pt
news-summary/qwen3-4b/dit-adapter.pt
news-summary/gemma3-4b/dit-adapter.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-4660-train-datapoints.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-2330-train-datapoints.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-1165-train-datapoints.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-0582-train-datapoints.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-0291-train-datapoints.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-0145-train-datapoints.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-0072-train-datapoints.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-0036-train-datapoints.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-0018-train-datapoints.pt
hidden-topic-data-scaling/qwen3-4b/dit-adapter-0009-train-datapoints.pt
hidden-topic-dat