In [1]:
import os

os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

In [2]:
import torch
import sys
import os
from transformers import AutoTokenizer, AutoModelForCausalLM

repo_path = "/mnt/e/untitled folder/codebase/LoRO/LoRO"  
if os.path.exists(repo_path) and repo_path not in sys.path:
    sys.path.append(repo_path)

try:
    from utils import model_obfuscation
except ImportError as e:
    print('wrong repo_path')
    sys.exit(1)

# ==========================================
# 1. 加载目标模型 (Private Model)
# ==========================================
model_id = "zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct"
device = "cpu"
save_path = "/mnt/e/untitled folder/codebase/LoRO_attack/loro_qwen_1_5B.pt"

print(f"正在加载模型: {model_id} ...")
# Qwen 需要 trust_remote_code=True
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# 使用 AutoModelForCausalLM 因为这是一个生成任务 (SQuAD)
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to(device)

print("模型加载完成。准备进行 LoRO 混淆...")

# ==========================================
# 2. 执行混淆 (调用仓库代码)
# ==========================================
noise_magnitude = 1

print(f"开始混淆 (Noise Magnitude: {noise_magnitude})...")
obfuscated_model = model_obfuscation(model, device=device, noise_mag=noise_magnitude, r=24)

# ==========================================
# 4. 保存混淆后的 Checkpoint
# ==========================================
print(f"正在保存混淆后的模型至: {save_path} ...")
torch.save(obfuscated_model.state_dict(), save_path)

print(f"Checkpoint Path: {os.path.abspath(save_path)}")


正在加载模型: zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct ...
模型加载完成。准备进行 LoRO 混淆...
开始混淆 (Noise Magnitude: 1)...
Obfuscating: model.layers.0.self_attn.q_proj
Obfuscating: model.layers.0.self_attn.k_proj
Obfuscating: model.layers.0.self_attn.v_proj
Obfuscating: model.layers.0.self_attn.o_proj
Obfuscating: model.layers.0.mlp.gate_proj
Obfuscating: model.layers.0.mlp.up_proj
Obfuscating: model.layers.0.mlp.down_proj
Obfuscating: model.layers.1.self_attn.q_proj
Obfuscating: model.layers.1.self_attn.k_proj
Obfuscating: model.layers.1.self_attn.v_proj
Obfuscating: model.layers.1.self_attn.o_proj
Obfuscating: model.layers.1.mlp.gate_proj
Obfuscating: model.layers.1.mlp.up_proj
Obfuscating: model.layers.1.mlp.down_proj
Obfuscating: model.layers.2.self_attn.q_proj
Obfuscating: model.layers.2.self_attn.k_proj
Obfuscating: model.layers.2.self_attn.v_proj
Obfuscating: model.layers.2.self_attn.o_proj
Obfuscating: model.layers.2.mlp.gate_proj
Obfuscating: model.layers.2.mlp.up_proj
Obfuscating: model.laye

In [1]:
import torch
from transformers import AutoModelForCausalLM
import pandas as pd

# ==========================================
# 配置
# ==========================================
model_id_ft = "zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct"   # Target
model_id_base = "Qwen/Qwen2.5-1.5B-Instruct"    # Prior (Base)
device = "cpu" # 对比只需 CPU 即可，省显存

print(f"Loading Fine-Tuned Model: {model_id_ft}...")
model_ft = AutoModelForCausalLM.from_pretrained(model_id_ft, trust_remote_code=True).to(device)

print(f"Loading Base Model: {model_id_base}...")
model_base = AutoModelForCausalLM.from_pretrained(model_id_base, trust_remote_code=True).to(device)

print("\nStarting Comparison (FT vs. Base)...")
print("-" * 90)
print(f"{'Layer Name':<50} | {'Cos Sim':<10} | {'Delta Norm':<12} | {'Rel Diff (%)':<12}")
print("-" * 90)

results = []
modules_base = dict(model_base.named_modules())

for name, module_ft in model_ft.named_modules():
    if isinstance(module_ft, torch.nn.Linear):
        if name in modules_base:
            module_base = modules_base[name]
            w_ft = module_ft.weight.detach()
            w_base = module_base.weight.detach()
            
            if w_ft.shape != w_base.shape:
                continue
                
            # 1. Cosine Similarity
            cos_sim = torch.nn.functional.cosine_similarity(
                w_ft.flatten(), w_base.flatten(), dim=0
            ).item()
            
            # 2. Delta (FT - Base)
            delta = w_ft - w_base
            norm_delta = torch.norm(delta).item()
            norm_base = torch.norm(w_base).item()
            
            rel_diff = norm_delta / norm_base if norm_base > 0 else 0.0
            
            print(f"{name:<50} | {cos_sim:.6f}   | {norm_delta:.4f}       | {rel_diff*100:.4f}%")
            
            results.append({
                "Layer": name, "Cos_Sim": cos_sim, 
                "Delta_Norm": norm_delta, "Rel_Diff": rel_diff
            })

df = pd.DataFrame(results)
print("-" * 90)
print(f"Avg Cos Sim: {df['Cos_Sim'].mean():.4f} | Avg Delta Norm: {df['Delta_Norm'].mean():.4f}")

Loading Fine-Tuned Model: zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct...


'(ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')), '(Request ID: 139df8da-71c0-4694-a1c2-0b2d3bd183f4)')' thrown while requesting HEAD https://huggingface.co/zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct/resolve/main/config.json
Retrying in 1s [Retry 1/5].


Loading Base Model: Qwen/Qwen2.5-1.5B-Instruct...


generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]


Starting Comparison (FT vs. Base)...
------------------------------------------------------------------------------------------
Layer Name                                         | Cos Sim    | Delta Norm   | Rel Diff (%)
------------------------------------------------------------------------------------------
model.layers.0.self_attn.q_proj                    | 1.000084   | 0.1955       | 0.2621%
model.layers.0.self_attn.k_proj                    | 0.999970   | 0.0789       | 0.2254%
model.layers.0.self_attn.v_proj                    | 0.999936   | 0.0915       | 0.7792%
model.layers.0.self_attn.o_proj                    | 0.999908   | 0.2367       | 0.6725%
model.layers.0.mlp.gate_proj                       | 1.000725   | 0.6117       | 0.5364%
model.layers.0.mlp.up_proj                         | 1.000775   | 0.5940       | 0.6271%
model.layers.0.mlp.down_proj                       | 1.000812   | 0.2366       | 0.2383%
model.layers.1.self_attn.q_proj                    | 0.999896  

In [1]:
import torch
import torch.nn as nn
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import copy
from tqdm import tqdm

# ==========================================
# 1. 配置
# ==========================================
# 你的 Base 模型 (攻击者先验)
base_model_id = "Qwen/Qwen2.5-1.5B-Instruct"
# 你的 GT 模型 (仅用于验证攻击结果)
target_model_id = "zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct"
# 混淆后的 Checkpoint 路径
obfuscated_checkpoint = "/mnt/e/untitled folder/codebase/LoRO_attack/loro_qwen_1_5B.pt"
# 恢复模型的保存路径
save_path_recovered = "/mnt/e/untitled folder/codebase/LoRO_attack/recovered_qwen_1_5B_model"

device = "cuda" if torch.cuda.is_available() else "cpu"

# === 攻击关键参数 ===
REMOVE_RANK = 24

# ==========================================
# 2. 模型加载
# ==========================================
print(f"1. Loading Base Model (Prior): {base_model_id}...")
# 必须加 trust_remote_code=True 以支持 Qwen
recovered_model = AutoModelForCausalLM.from_pretrained(base_model_id, trust_remote_code=True).to(device)

print(f"2. Loading Obfuscated Checkpoint: {obfuscated_checkpoint}...")
if not os.path.exists(obfuscated_checkpoint):
    raise FileNotFoundError("混淆 Checkpoint 未找到，请先运行 Cell 1 生成。")
obfus_state_dict = torch.load(obfuscated_checkpoint, map_location=device)

print(f"3. Loading Ground Truth (Validation): {target_model_id}...")
gt_model = AutoModelForCausalLM.from_pretrained(target_model_id, trust_remote_code=True).to(device)

# ==========================================
# 3. 执行 SVD 去噪攻击 (SVD Denoising)
# ==========================================
print(f"\nSTARTING RECOVERY (Removing Top-{REMOVE_RANK} Singular Values as Noise)...")
print("="*60)

similarities = []
relative_errors = []

# 遍历 Base 模型的每一层
# 我们需要修改 recovered_model 的权重
all_modules = list(recovered_model.named_modules())
linear_layers = [(n, m) for n, m in all_modules if isinstance(m, nn.Linear)]

progress_bar = tqdm(linear_layers, desc="Recovering")

for name, module in progress_bar:

    if "lm_head" in name:
        print(f"Keeping Base Weights for {name}")
        continue

    # 1. 构造 LoRO 保存的 key
    # 根据 LoroLinear 代码，权重保存在 "obfus_linear.weight
    obfus_key = f"{name}.obfus_linear.weight"
    
    # 检查该层是否被混淆 (如果没在 checkpoint 里找到，说明该层未混淆，保持 Base 原样)
    if obfus_key not in obfus_state_dict:
        continue
        
    # 2. 获取数据
    W_base = module.weight.detach() # [out, in]
    W_obfus = obfus_state_dict[obfus_key].detach() # [out, in]
    
    # 3. 计算 Diff (包含 Fine-tuning 更新 + LoRO 噪声)
    # Diff = (W_base + Delta_FT + Noise) - W_base = Delta_FT + Noise
    print("size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}")
    Diff = W_obfus - W_base
    
    # 4. SVD 分解
    # 使用 float32 保证精度，Diff 形状通常是 [out_features, in_features]
    U, S, Vh = torch.linalg.svd(Diff.float(), full_matrices=False)

    print(f"Layer: {name} | {S[REMOVE_RANK - 2]}, {S[REMOVE_RANK - 1]}, {S[REMOVE_RANK]}, {S[REMOVE_RANK + 1]}")
    
    # --- 攻击核心逻辑 ---
    # 根据 LoroLinear 定义，噪声是 (B@A).T，其秩为 r (REMOVE_RANK)。
    # 且因为 noise_mag 很大，这 r 个奇异值会非常大，位于 S 的最前端。
    # 我们将前 r 个奇异值置零，即移除了 LoRO 噪声。
    S_clean = S.clone()
    S_clean[:REMOVE_RANK] = 0.0 
    
    # 5. 重构纯净的 Delta (即估算的 Delta_FT)
    Delta_Recovered = (U @ torch.diag(S_clean) @ Vh).to(W_base.dtype)
    
    # 6. 恢复权重并写入模型
    # W_rec = W_base + Delta_Recovered
    module.weight.data = W_base + Delta_Recovered
    
    # 7. 恢复 Bias
    # LoroLinear 代码中: self.obfus_linear.bias = original_linear.bias
    # Bias 没有加噪声，直接从 checkpoint 读取覆盖即可
    obfus_bias_key = f"{name}.obfus_linear.bias"
    if obfus_bias_key in obfus_state_dict and module.bias is not None:
        module.bias.data = obfus_state_dict[obfus_bias_key].detach()
    
    # ==========================
    # 验证环节 (对比 GT)
    # ==========================
    if name in dict(gt_model.named_modules()):
        W_gt = dict(gt_model.named_modules())[name].weight.detach()
        Delta_True = W_gt - W_base
        
        # 计算相似度: 我们的恢复结果 vs 真实的微调增量
        # 如果 sim 接近 1.0，说明我们成功剥离了噪声，保留了微调增量
        sim = torch.nn.functional.cosine_similarity(Delta_True.flatten(), Delta_Recovered.flatten(), dim=0).item()
        
        print(f"Layer: {name} | Similarity with GT Delta: {sim:.4f}")
        
        similarities.append(sim)
        # progress_bar.set_postfix({"Avg Sim": f"{sum(similarities)/len(similarities):.4f}", "Last Sim": f"{sim:.4f}"})

# ==========================================
# 4. 结果保存
# ==========================================

print(f"Saving recovered model to {save_path_recovered}...")
recovered_model.save_pretrained(save_path_recovered)
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
tokenizer.save_pretrained(save_path_recovered)
print("Saved.")

1. Loading Base Model (Prior): Qwen/Qwen2.5-1.5B-Instruct...


'(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /Qwen/Qwen2.5-1.5B-Instruct/resolve/main/config.json (Caused by ProxyError('Unable to connect to proxy', ConnectTimeoutError(<HTTPSConnection(host='192.168.240.1', port=7890) at 0x7b2a0134fa90>, 'Connection to 192.168.240.1 timed out. (connect timeout=10)')))"), '(Request ID: 2d1b0cfb-6d2e-4828-ad2b-6467231e59b5)')' thrown while requesting HEAD https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/resolve/main/config.json
Retrying in 1s [Retry 1/5].
'(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /Qwen/Qwen2.5-1.5B-Instruct/resolve/main/config.json (Caused by ProxyError('Unable to connect to proxy', ConnectTimeoutError(<HTTPSConnection(host='192.168.240.1', port=7890) at 0x7b2a010576d0>, 'Connection to 192.168.240.1 timed out. (connect timeout=10)')))"), '(Request ID: be49ed8c-2e53-4b60-ad89-31850dd9b99d)')' thrown while requesting HEAD

2. Loading Obfuscated Checkpoint: /mnt/e/untitled folder/codebase/LoRO_attack/loro_qwen_1_5B.pt...
3. Loading Ground Truth (Validation): zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct...

STARTING RECOVERY (Removing Top-24 Singular Values as Noise)...


Recovering:   0%|                                                                               | 0/197 [00:00<?, ?it/s]

size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   1%|▎                                                                      | 1/197 [00:00<01:09,  2.81it/s]

Layer: model.layers.0.self_attn.q_proj | 110.85456085205078, 108.54083251953125, 0.10493861138820648, 0.07237014919519424
Layer: model.layers.0.self_attn.q_proj | Similarity with GT Delta: 0.9763
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.0.self_attn.k_proj | 38.99114227294922, 37.96333694458008, 0.03662277013063431, 0.030751856043934822
Layer: model.layers.0.self_attn.k_proj | Similarity with GT Delta: 0.9447
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.0.self_attn.v_proj | 38.60441589355469, 36.40998458862305, 0.04998471960425377, 0.02945486456155777
Layer: model.layers.0.self_attn.v_proj | Similarity with GT Delta: 0.9457
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   2%|█▍                                                                     | 4/197 [00:00<00:31,  6.21it/s]

Layer: model.layers.0.self_attn.o_proj | 112.155517578125, 108.31087493896484, 0.17492511868476868, 0.06637772172689438
Layer: model.layers.0.self_attn.o_proj | Similarity with GT Delta: 0.9788
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   3%|█▊                                                                     | 5/197 [00:01<00:57,  3.34it/s]

Layer: model.layers.0.mlp.gate_proj | 275.2273864746094, 268.70819091796875, 0.364026814699173, 0.1889987289905548
Layer: model.layers.0.mlp.gate_proj | Similarity with GT Delta: 0.9854
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   3%|██▏                                                                    | 6/197 [00:02<01:16,  2.48it/s]

Layer: model.layers.0.mlp.up_proj | 278.8794860839844, 274.6235046386719, 0.36274561285972595, 0.158272385597229
Layer: model.layers.0.mlp.up_proj | Similarity with GT Delta: 0.9869
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   4%|██▌                                                                    | 7/197 [00:03<02:25,  1.31it/s]

Layer: model.layers.0.mlp.down_proj | 276.3860778808594, 271.5986633300781, 0.12384162098169327, 0.08179821819067001
Layer: model.layers.0.mlp.down_proj | Similarity with GT Delta: 0.9610
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   4%|██▉                                                                    | 8/197 [00:03<01:54,  1.65it/s]

Layer: model.layers.1.self_attn.q_proj | 113.06632232666016, 107.59654235839844, 0.10851271450519562, 0.07841189205646515
Layer: model.layers.1.self_attn.q_proj | Similarity with GT Delta: 0.9756
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.1.self_attn.k_proj | 38.136497497558594, 37.803245544433594, 0.05839051678776741, 0.035603880882263184
Layer: model.layers.1.self_attn.k_proj | Similarity with GT Delta: 0.9439
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.1.self_attn.v_proj | 39.59571075439453, 37.06818771362305, 0.12398642301559448, 0.026132624596357346
Layer: model.layers.1.self_attn.v_proj | Similarity with GT Delta: 0.9379
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   6%|███▉                                                                  | 11/197 [00:04<00:59,  3.12it/s]

Layer: model.layers.1.self_attn.o_proj | 110.44824981689453, 105.96926879882812, 0.2832280993461609, 0.07458081841468811
Layer: model.layers.1.self_attn.o_proj | Similarity with GT Delta: 0.9808
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   6%|████▎                                                                 | 12/197 [00:04<01:12,  2.56it/s]

Layer: model.layers.1.mlp.gate_proj | 276.67828369140625, 274.3094787597656, 0.6123086810112, 0.14983408153057098
Layer: model.layers.1.mlp.gate_proj | Similarity with GT Delta: 0.9898
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   7%|████▌                                                                 | 13/197 [00:05<01:23,  2.21it/s]

Layer: model.layers.1.mlp.up_proj | 277.230224609375, 271.201171875, 0.7109843492507935, 0.11871141940355301
Layer: model.layers.1.mlp.up_proj | Similarity with GT Delta: 0.9921
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   7%|████▉                                                                 | 14/197 [00:07<02:25,  1.25it/s]

Layer: model.layers.1.mlp.down_proj | 281.00225830078125, 273.2513122558594, 1.8834078311920166, 0.3037489652633667
Layer: model.layers.1.mlp.down_proj | Similarity with GT Delta: 0.9908
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   8%|█████▎                                                                | 15/197 [00:07<01:57,  1.55it/s]

Layer: model.layers.2.self_attn.q_proj | 110.71748352050781, 107.41670989990234, 0.15405136346817017, 0.06072244048118591
Layer: model.layers.2.self_attn.q_proj | Similarity with GT Delta: 0.9809
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.2.self_attn.k_proj | 38.70488739013672, 37.30792236328125, 0.06663794815540314, 0.02416509948670864
Layer: model.layers.2.self_attn.k_proj | Similarity with GT Delta: 0.9465
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.2.self_attn.v_proj | 38.508182525634766, 37.89719772338867, 0.1112542524933815, 0.04468158259987831
Layer: model.layers.2.self_attn.v_proj | Similarity with GT Delta: 0.9425
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   9%|██████▍                                                               | 18/197 [00:07<01:02,  2.88it/s]

Layer: model.layers.2.self_attn.o_proj | 111.11676025390625, 107.85099792480469, 0.1512385457754135, 0.09804478287696838
Layer: model.layers.2.self_attn.o_proj | Similarity with GT Delta: 0.9811
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  10%|██████▊                                                               | 19/197 [00:08<01:12,  2.45it/s]

Layer: model.layers.2.mlp.gate_proj | 278.7232360839844, 274.8935852050781, 0.5878899693489075, 0.1326112151145935
Layer: model.layers.2.mlp.gate_proj | Similarity with GT Delta: 0.9891
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  10%|███████                                                               | 20/197 [00:09<01:21,  2.18it/s]

Layer: model.layers.2.mlp.up_proj | 276.9786376953125, 272.14404296875, 0.521558940410614, 0.18421857059001923
Layer: model.layers.2.mlp.up_proj | Similarity with GT Delta: 0.9865
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  11%|███████▍                                                              | 21/197 [00:10<02:13,  1.32it/s]

Layer: model.layers.2.mlp.down_proj | 273.4554443359375, 267.8240051269531, 1.5782041549682617, 0.17235872149467468
Layer: model.layers.2.mlp.down_proj | Similarity with GT Delta: 0.9918
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  11%|███████▊                                                              | 22/197 [00:11<01:49,  1.60it/s]

Layer: model.layers.3.self_attn.q_proj | 110.3399429321289, 109.58518981933594, 0.1465122103691101, 0.07150041311979294
Layer: model.layers.3.self_attn.q_proj | Similarity with GT Delta: 0.9789
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.3.self_attn.k_proj | 38.763893127441406, 37.52210235595703, 0.07947187125682831, 0.028115108609199524
Layer: model.layers.3.self_attn.k_proj | Similarity with GT Delta: 0.9339
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.3.self_attn.v_proj | 39.248382568359375, 38.22079086303711, 0.08882182091474533, 0.045830950140953064
Layer: model.layers.3.self_attn.v_proj | Similarity with GT Delta: 0.9414
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  13%|████████▉                                                             | 25/197 [00:11<00:58,  2.95it/s]

Layer: model.layers.3.self_attn.o_proj | 109.89990234375, 107.58911895751953, 0.14873892068862915, 0.07948354631662369
Layer: model.layers.3.self_attn.o_proj | Similarity with GT Delta: 0.9762
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  13%|█████████▏                                                            | 26/197 [00:11<01:07,  2.52it/s]

Layer: model.layers.3.mlp.gate_proj | 275.873046875, 272.7729187011719, 0.5978835225105286, 0.11893697828054428
Layer: model.layers.3.mlp.gate_proj | Similarity with GT Delta: 0.9889
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  14%|█████████▌                                                            | 27/197 [00:12<01:17,  2.20it/s]

Layer: model.layers.3.mlp.up_proj | 281.76885986328125, 273.85406494140625, 0.5261197686195374, 0.15879862010478973
Layer: model.layers.3.mlp.up_proj | Similarity with GT Delta: 0.9871
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  14%|█████████▉                                                            | 28/197 [00:14<02:05,  1.35it/s]

Layer: model.layers.3.mlp.down_proj | 277.2013244628906, 270.755126953125, 0.13490524888038635, 0.09244896471500397
Layer: model.layers.3.mlp.down_proj | Similarity with GT Delta: 0.9720
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  15%|██████████▎                                                           | 29/197 [00:14<01:42,  1.64it/s]

Layer: model.layers.4.self_attn.q_proj | 109.41773223876953, 109.13572692871094, 0.11577951908111572, 0.0775563195347786
Layer: model.layers.4.self_attn.q_proj | Similarity with GT Delta: 0.9757
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.4.self_attn.k_proj | 37.60171890258789, 35.160953521728516, 0.06644123792648315, 0.025684500113129616
Layer: model.layers.4.self_attn.k_proj | Similarity with GT Delta: 0.9505
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.4.self_attn.v_proj | 39.39423751831055, 38.29228973388672, 0.070005401968956, 0.044241081923246384
Layer: model.layers.4.self_attn.v_proj | Similarity with GT Delta: 0.9406
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  16%|███████████▎                                                          | 32/197 [00:14<00:54,  3.02it/s]

Layer: model.layers.4.self_attn.o_proj | 111.58061981201172, 107.70870971679688, 0.13624608516693115, 0.10044784843921661
Layer: model.layers.4.self_attn.o_proj | Similarity with GT Delta: 0.9793
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  17%|███████████▋                                                          | 33/197 [00:15<01:04,  2.55it/s]

Layer: model.layers.4.mlp.gate_proj | 275.080078125, 272.9604797363281, 0.4646463394165039, 0.1594918668270111
Layer: model.layers.4.mlp.gate_proj | Similarity with GT Delta: 0.9885
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  17%|████████████                                                          | 34/197 [00:15<01:12,  2.23it/s]

Layer: model.layers.4.mlp.up_proj | 276.6955871582031, 273.9127197265625, 0.5414903163909912, 0.1938558667898178
Layer: model.layers.4.mlp.up_proj | Similarity with GT Delta: 0.9906
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  18%|████████████▍                                                         | 35/197 [00:17<01:59,  1.36it/s]

Layer: model.layers.4.mlp.down_proj | 277.08795166015625, 272.0637512207031, 0.1684037297964096, 0.08759119361639023
Layer: model.layers.4.mlp.down_proj | Similarity with GT Delta: 0.9713
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  18%|████████████▊                                                         | 36/197 [00:17<01:37,  1.64it/s]

Layer: model.layers.5.self_attn.q_proj | 109.92774963378906, 105.91590118408203, 0.18299534916877747, 0.07264132052659988
Layer: model.layers.5.self_attn.q_proj | Similarity with GT Delta: 0.9836
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.5.self_attn.k_proj | 38.903953552246094, 37.298851013183594, 0.06461618840694427, 0.025325072929263115
Layer: model.layers.5.self_attn.k_proj | Similarity with GT Delta: 0.9528
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.5.self_attn.v_proj | 38.816402435302734, 37.41267776489258, 0.04904844984412193, 0.03980196267366409
Layer: model.layers.5.self_attn.v_proj | Similarity with GT Delta: 0.9339
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  20%|█████████████▊                                                        | 39/197 [00:18<00:52,  3.01it/s]

Layer: model.layers.5.self_attn.o_proj | 110.35391998291016, 107.0889663696289, 0.11385054886341095, 0.1058129295706749
Layer: model.layers.5.self_attn.o_proj | Similarity with GT Delta: 0.9814
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  20%|██████████████▏                                                       | 40/197 [00:18<01:01,  2.57it/s]

Layer: model.layers.5.mlp.gate_proj | 278.4375305175781, 275.9696350097656, 0.7024552226066589, 0.14547114074230194
Layer: model.layers.5.mlp.gate_proj | Similarity with GT Delta: 0.9870
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.5.mlp.up_proj | 275.6080017089844, 268.9867858886719, 0.5871020555496216, 0.17948557436466217


Recovering:  21%|██████████████▌                                                       | 41/197 [00:19<01:11,  2.17it/s]

Layer: model.layers.5.mlp.up_proj | Similarity with GT Delta: 0.9881
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  21%|██████████████▉                                                       | 42/197 [00:21<02:06,  1.22it/s]

Layer: model.layers.5.mlp.down_proj | 277.44677734375, 270.8825988769531, 0.234980970621109, 0.10093700885772705
Layer: model.layers.5.mlp.down_proj | Similarity with GT Delta: 0.9817
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  22%|███████████████▎                                                      | 43/197 [00:21<01:42,  1.51it/s]

Layer: model.layers.6.self_attn.q_proj | 111.01306915283203, 108.09291076660156, 0.16070879995822906, 0.07674740999937057
Layer: model.layers.6.self_attn.q_proj | Similarity with GT Delta: 0.9814
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.6.self_attn.k_proj | 40.103450775146484, 37.41843032836914, 0.04436441883444786, 0.034197527915239334
Layer: model.layers.6.self_attn.k_proj | Similarity with GT Delta: 0.9385
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.6.self_attn.v_proj | 40.26084899902344, 36.89176940917969, 0.1823766529560089, 0.04006468132138252
Layer: model.layers.6.self_attn.v_proj | Similarity with GT Delta: 0.9469
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  23%|████████████████▎                                                     | 46/197 [00:21<00:54,  2.79it/s]

Layer: model.layers.6.self_attn.o_proj | 111.82109069824219, 110.19654083251953, 0.20670485496520996, 0.09139080345630646
Layer: model.layers.6.self_attn.o_proj | Similarity with GT Delta: 0.9843
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  24%|████████████████▋                                                     | 47/197 [00:22<01:02,  2.40it/s]

Layer: model.layers.6.mlp.gate_proj | 274.1790466308594, 270.2818603515625, 0.6763228178024292, 0.15497848391532898
Layer: model.layers.6.mlp.gate_proj | Similarity with GT Delta: 0.9890
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  24%|█████████████████                                                     | 48/197 [00:23<01:11,  2.10it/s]

Layer: model.layers.6.mlp.up_proj | 278.1018981933594, 265.9657287597656, 0.480431467294693, 0.21279288828372955
Layer: model.layers.6.mlp.up_proj | Similarity with GT Delta: 0.9883
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  25%|█████████████████▍                                                    | 49/197 [00:24<01:55,  1.28it/s]

Layer: model.layers.6.mlp.down_proj | 274.9368896484375, 273.928466796875, 0.2567538022994995, 0.08853954076766968
Layer: model.layers.6.mlp.down_proj | Similarity with GT Delta: 0.9814
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  25%|█████████████████▊                                                    | 50/197 [00:25<01:33,  1.57it/s]

Layer: model.layers.7.self_attn.q_proj | 112.789306640625, 108.90530395507812, 0.20775052905082703, 0.09902974963188171
Layer: model.layers.7.self_attn.q_proj | Similarity with GT Delta: 0.9824
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.7.self_attn.k_proj | 38.4631462097168, 34.6366081237793, 0.06858493387699127, 0.038331713527441025
Layer: model.layers.7.self_attn.k_proj | Similarity with GT Delta: 0.9442
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.7.self_attn.v_proj | 41.07978820800781, 39.503631591796875, 0.08588187396526337, 0.059362735599279404
Layer: model.layers.7.self_attn.v_proj | Similarity with GT Delta: 0.9354
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  27%|██████████████████▊                                                   | 53/197 [00:25<00:50,  2.88it/s]

Layer: model.layers.7.self_attn.o_proj | 110.25267028808594, 108.68445587158203, 0.13712933659553528, 0.12725195288658142
Layer: model.layers.7.self_attn.o_proj | Similarity with GT Delta: 0.9799
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  27%|███████████████████▏                                                  | 54/197 [00:26<00:58,  2.43it/s]

Layer: model.layers.7.mlp.gate_proj | 273.8360900878906, 271.9365539550781, 0.4762587547302246, 0.21485479176044464
Layer: model.layers.7.mlp.gate_proj | Similarity with GT Delta: 0.9871
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  28%|███████████████████▌                                                  | 55/197 [00:26<01:06,  2.15it/s]

Layer: model.layers.7.mlp.up_proj | 279.1889953613281, 266.5095520019531, 0.47491344809532166, 0.24041204154491425
Layer: model.layers.7.mlp.up_proj | Similarity with GT Delta: 0.9897
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  28%|███████████████████▉                                                  | 56/197 [00:28<01:47,  1.31it/s]

Layer: model.layers.7.mlp.down_proj | 280.6122741699219, 275.08197021484375, 0.2620381712913513, 0.05250620096921921
Layer: model.layers.7.mlp.down_proj | Similarity with GT Delta: 0.9821
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  29%|████████████████████▎                                                 | 57/197 [00:28<01:27,  1.60it/s]

Layer: model.layers.8.self_attn.q_proj | 113.08135223388672, 109.71116638183594, 0.1142863929271698, 0.09184953570365906
Layer: model.layers.8.self_attn.q_proj | Similarity with GT Delta: 0.9832
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.8.self_attn.k_proj | 39.800655364990234, 38.066368103027344, 0.061760757118463516, 0.03163342922925949
Layer: model.layers.8.self_attn.k_proj | Similarity with GT Delta: 0.9365
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.8.self_attn.v_proj | 37.39583969116211, 36.98485565185547, 0.06686880439519882, 0.024412760511040688
Layer: model.layers.8.self_attn.v_proj | Similarity with GT Delta: 0.9246
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  30%|█████████████████████▎                                                | 60/197 [00:28<00:46,  2.95it/s]

Layer: model.layers.8.self_attn.o_proj | 109.40039825439453, 106.89680480957031, 0.18774943053722382, 0.11006316542625427
Layer: model.layers.8.self_attn.o_proj | Similarity with GT Delta: 0.9830
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.8.mlp.gate_proj | 278.9275817871094, 275.7316589355469, 0.4569626748561859, 0.19063501060009003


Recovering:  31%|█████████████████████▋                                                | 61/197 [00:29<00:55,  2.43it/s]

Layer: model.layers.8.mlp.gate_proj | Similarity with GT Delta: 0.9874
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  31%|██████████████████████                                                | 62/197 [00:30<01:08,  1.98it/s]

Layer: model.layers.8.mlp.up_proj | 277.1063232421875, 276.2815856933594, 0.49820271134376526, 0.24791240692138672
Layer: model.layers.8.mlp.up_proj | Similarity with GT Delta: 0.9907
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  32%|██████████████████████▍                                               | 63/197 [00:32<01:46,  1.25it/s]

Layer: model.layers.8.mlp.down_proj | 275.65240478515625, 274.845703125, 0.32089951634407043, 0.05379311740398407
Layer: model.layers.8.mlp.down_proj | Similarity with GT Delta: 0.9834
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  32%|██████████████████████▋                                               | 64/197 [00:32<01:27,  1.52it/s]

Layer: model.layers.9.self_attn.q_proj | 110.69427490234375, 110.49449157714844, 0.16810265183448792, 0.08355925232172012
Layer: model.layers.9.self_attn.q_proj | Similarity with GT Delta: 0.9784
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.9.self_attn.k_proj | 39.03826141357422, 38.32705307006836, 0.0642957016825676, 0.028384830802679062
Layer: model.layers.9.self_attn.k_proj | Similarity with GT Delta: 0.9448
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.9.self_attn.v_proj | 38.13227081298828, 37.685943603515625, 0.07975302636623383, 0.04476936534047127
Layer: model.layers.9.self_attn.v_proj | Similarity with GT Delta: 0.9429
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  34%|███████████████████████▊                                              | 67/197 [00:32<00:46,  2.81it/s]

Layer: model.layers.9.self_attn.o_proj | 111.9452896118164, 105.5058364868164, 0.1649218648672104, 0.14267532527446747
Layer: model.layers.9.self_attn.o_proj | Similarity with GT Delta: 0.9810
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  35%|████████████████████████▏                                             | 68/197 [00:33<00:54,  2.35it/s]

Layer: model.layers.9.mlp.gate_proj | 278.0399169921875, 273.2672424316406, 0.48458331823349, 0.23608769476413727
Layer: model.layers.9.mlp.gate_proj | Similarity with GT Delta: 0.9894
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  35%|████████████████████████▌                                             | 69/197 [00:33<01:01,  2.07it/s]

Layer: model.layers.9.mlp.up_proj | 276.5679931640625, 267.3401794433594, 0.4315437972545624, 0.22675365209579468
Layer: model.layers.9.mlp.up_proj | Similarity with GT Delta: 0.9865
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  36%|████████████████████████▊                                             | 70/197 [00:35<01:40,  1.26it/s]

Layer: model.layers.9.mlp.down_proj | 280.3621520996094, 272.5422668457031, 0.2818582355976105, 0.041012201458215714
Layer: model.layers.9.mlp.down_proj | Similarity with GT Delta: 0.9858
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  36%|█████████████████████████▏                                            | 71/197 [00:35<01:22,  1.52it/s]

Layer: model.layers.10.self_attn.q_proj | 110.16830444335938, 107.26847076416016, 0.1550094485282898, 0.12105074524879456
Layer: model.layers.10.self_attn.q_proj | Similarity with GT Delta: 0.9823
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.10.self_attn.k_proj | 39.30081558227539, 36.87860870361328, 0.06508941948413849, 0.03366990387439728
Layer: model.layers.10.self_attn.k_proj | Similarity with GT Delta: 0.9277
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.10.self_attn.v_proj | 40.95128631591797, 37.51689910888672, 0.05360716953873634, 0.04559672623872757
Layer: model.layers.10.self_attn.v_proj | Similarity with GT Delta: 0.9429
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  38%|██████████████████████████▎                                           | 74/197 [00:36<00:44,  2.78it/s]

Layer: model.layers.10.self_attn.o_proj | 109.3443603515625, 106.73062896728516, 0.13964930176734924, 0.08791907131671906
Layer: model.layers.10.self_attn.o_proj | Similarity with GT Delta: 0.9810
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  38%|██████████████████████████▋                                           | 75/197 [00:36<00:52,  2.33it/s]

Layer: model.layers.10.mlp.gate_proj | 277.1257019042969, 276.71356201171875, 0.628277599811554, 0.2409777045249939
Layer: model.layers.10.mlp.gate_proj | Similarity with GT Delta: 0.9897
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  39%|███████████████████████████                                           | 76/197 [00:37<00:57,  2.11it/s]

Layer: model.layers.10.mlp.up_proj | 279.42889404296875, 267.6861877441406, 0.4425427317619324, 0.2670813202857971
Layer: model.layers.10.mlp.up_proj | Similarity with GT Delta: 0.9885
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  39%|███████████████████████████▎                                          | 77/197 [00:39<01:31,  1.31it/s]

Layer: model.layers.10.mlp.down_proj | 277.6838684082031, 272.30987548828125, 0.30234962701797485, 0.045116547495126724
Layer: model.layers.10.mlp.down_proj | Similarity with GT Delta: 0.9790
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  40%|███████████████████████████▋                                          | 78/197 [00:39<01:14,  1.60it/s]

Layer: model.layers.11.self_attn.q_proj | 111.07334899902344, 109.67222595214844, 0.17935700714588165, 0.10416219383478165
Layer: model.layers.11.self_attn.q_proj | Similarity with GT Delta: 0.9799
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.11.self_attn.k_proj | 38.27239990234375, 33.922882080078125, 0.04951122775673866, 0.04131976515054703
Layer: model.layers.11.self_attn.k_proj | Similarity with GT Delta: 0.9397
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.11.self_attn.v_proj | 37.3255729675293, 35.08582305908203, 0.11269477009773254, 0.03522943705320358
Layer: model.layers.11.self_attn.v_proj | Similarity with GT Delta: 0.9527
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  41%|████████████████████████████▊                                         | 81/197 [00:39<00:39,  2.92it/s]

Layer: model.layers.11.self_attn.o_proj | 110.86724853515625, 107.94328308105469, 0.18985895812511444, 0.09018168598413467
Layer: model.layers.11.self_attn.o_proj | Similarity with GT Delta: 0.9808
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  42%|█████████████████████████████▏                                        | 82/197 [00:40<00:46,  2.47it/s]

Layer: model.layers.11.mlp.gate_proj | 274.0498046875, 273.5718994140625, 0.8072840571403503, 0.18053488433361053
Layer: model.layers.11.mlp.gate_proj | Similarity with GT Delta: 0.9906
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  42%|█████████████████████████████▍                                        | 83/197 [00:41<00:52,  2.17it/s]

Layer: model.layers.11.mlp.up_proj | 277.87286376953125, 276.7088928222656, 0.7223234176635742, 0.18623313307762146
Layer: model.layers.11.mlp.up_proj | Similarity with GT Delta: 0.9921
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  43%|█████████████████████████████▊                                        | 84/197 [00:43<01:36,  1.17it/s]

Layer: model.layers.11.mlp.down_proj | 277.0653381347656, 274.83984375, 0.3969075381755829, 0.03776922449469566
Layer: model.layers.11.mlp.down_proj | Similarity with GT Delta: 0.9899
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  43%|██████████████████████████████▏                                       | 85/197 [00:43<01:18,  1.42it/s]

Layer: model.layers.12.self_attn.q_proj | 111.01239776611328, 110.46485900878906, 0.1445566713809967, 0.0941631868481636
Layer: model.layers.12.self_attn.q_proj | Similarity with GT Delta: 0.9791
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.12.self_attn.k_proj | 39.572689056396484, 36.60842514038086, 0.04573749378323555, 0.03609710931777954
Layer: model.layers.12.self_attn.k_proj | Similarity with GT Delta: 0.9291
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.12.self_attn.v_proj | 38.740211486816406, 37.36473846435547, 0.05157984793186188, 0.04936651512980461
Layer: model.layers.12.self_attn.v_proj | Similarity with GT Delta: 0.9274
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  45%|███████████████████████████████▎                                      | 88/197 [00:43<00:41,  2.66it/s]

Layer: model.layers.12.self_attn.o_proj | 110.07315063476562, 108.2112045288086, 0.19404976069927216, 0.10633488744497299
Layer: model.layers.12.self_attn.o_proj | Similarity with GT Delta: 0.9792
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  45%|███████████████████████████████▌                                      | 89/197 [00:44<00:47,  2.29it/s]

Layer: model.layers.12.mlp.gate_proj | 283.2618408203125, 279.18048095703125, 0.9257503151893616, 0.17418599128723145
Layer: model.layers.12.mlp.gate_proj | Similarity with GT Delta: 0.9895
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  46%|███████████████████████████████▉                                      | 90/197 [00:45<00:52,  2.03it/s]

Layer: model.layers.12.mlp.up_proj | 276.7546691894531, 267.1258850097656, 0.5544308423995972, 0.2201249748468399
Layer: model.layers.12.mlp.up_proj | Similarity with GT Delta: 0.9862
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  46%|████████████████████████████████▎                                     | 91/197 [00:46<01:24,  1.26it/s]

Layer: model.layers.12.mlp.down_proj | 274.11285400390625, 272.1598815917969, 0.26305198669433594, 0.06814450770616531
Layer: model.layers.12.mlp.down_proj | Similarity with GT Delta: 0.9806
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  47%|████████████████████████████████▋                                     | 92/197 [00:47<01:08,  1.53it/s]

Layer: model.layers.13.self_attn.q_proj | 110.5389175415039, 110.38910675048828, 0.2077789306640625, 0.09598813951015472
Layer: model.layers.13.self_attn.q_proj | Similarity with GT Delta: 0.9814
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.13.self_attn.k_proj | 39.80934524536133, 35.41564178466797, 0.08535709232091904, 0.05173316225409508
Layer: model.layers.13.self_attn.k_proj | Similarity with GT Delta: 0.9416
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.13.self_attn.v_proj | 38.738861083984375, 37.72490310668945, 0.04791073501110077, 0.03782567009329796
Layer: model.layers.13.self_attn.v_proj | Similarity with GT Delta: 0.9328
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  48%|█████████████████████████████████▊                                    | 95/197 [00:47<00:36,  2.83it/s]

Layer: model.layers.13.self_attn.o_proj | 112.36717987060547, 105.55532836914062, 0.14523658156394958, 0.12409108877182007
Layer: model.layers.13.self_attn.o_proj | Similarity with GT Delta: 0.9809
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  49%|██████████████████████████████████                                    | 96/197 [00:47<00:41,  2.43it/s]

Layer: model.layers.13.mlp.gate_proj | 276.7406311035156, 273.4859619140625, 0.6745027303695679, 0.18833349645137787
Layer: model.layers.13.mlp.gate_proj | Similarity with GT Delta: 0.9876
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  49%|██████████████████████████████████▍                                   | 97/197 [00:48<00:46,  2.14it/s]

Layer: model.layers.13.mlp.up_proj | 281.2792663574219, 273.6503601074219, 0.5997910499572754, 0.21204641461372375
Layer: model.layers.13.mlp.up_proj | Similarity with GT Delta: 0.9900
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  50%|██████████████████████████████████▊                                   | 98/197 [00:50<01:15,  1.31it/s]

Layer: model.layers.13.mlp.down_proj | 278.35748291015625, 270.3955993652344, 0.2559039890766144, 0.07582943886518478
Layer: model.layers.13.mlp.down_proj | Similarity with GT Delta: 0.9811
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  50%|███████████████████████████████████▏                                  | 99/197 [00:50<01:00,  1.61it/s]

Layer: model.layers.14.self_attn.q_proj | 110.22110748291016, 109.42473602294922, 0.12223678827285767, 0.07883540540933609
Layer: model.layers.14.self_attn.q_proj | Similarity with GT Delta: 0.9806
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.14.self_attn.k_proj | 38.50234603881836, 36.7510986328125, 0.0483417846262455, 0.03137435019016266
Layer: model.layers.14.self_attn.k_proj | Similarity with GT Delta: 0.9395
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.14.self_attn.v_proj | 39.9095344543457, 37.969970703125, 0.07270173728466034, 0.031338877975940704
Layer: model.layers.14.self_attn.v_proj | Similarity with GT Delta: 0.9273
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  52%|███████████████████████████████████▋                                 | 102/197 [00:50<00:31,  2.98it/s]

Layer: model.layers.14.self_attn.o_proj | 109.67046356201172, 108.27428436279297, 0.13120463490486145, 0.07248702645301819
Layer: model.layers.14.self_attn.o_proj | Similarity with GT Delta: 0.9780
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  52%|████████████████████████████████████                                 | 103/197 [00:51<00:36,  2.56it/s]

Layer: model.layers.14.mlp.gate_proj | 276.0188903808594, 270.9357604980469, 0.6892342567443848, 0.17299555242061615
Layer: model.layers.14.mlp.gate_proj | Similarity with GT Delta: 0.9879
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  53%|████████████████████████████████████▍                                | 104/197 [00:51<00:41,  2.22it/s]

Layer: model.layers.14.mlp.up_proj | 282.3651428222656, 271.6136779785156, 0.5055112838745117, 0.16590264439582825
Layer: model.layers.14.mlp.up_proj | Similarity with GT Delta: 0.9893
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  53%|████████████████████████████████████▊                                | 105/197 [00:53<01:07,  1.37it/s]

Layer: model.layers.14.mlp.down_proj | 280.7732849121094, 272.6461181640625, 0.27798521518707275, 0.10951904952526093
Layer: model.layers.14.mlp.down_proj | Similarity with GT Delta: 0.9860
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  54%|█████████████████████████████████████▏                               | 106/197 [00:53<00:54,  1.67it/s]

Layer: model.layers.15.self_attn.q_proj | 111.44007873535156, 109.1690902709961, 0.1671949177980423, 0.1364515721797943
Layer: model.layers.15.self_attn.q_proj | Similarity with GT Delta: 0.9826
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.15.self_attn.k_proj | 38.99250411987305, 35.90082931518555, 0.0493631549179554, 0.04275345429778099
Layer: model.layers.15.self_attn.k_proj | Similarity with GT Delta: 0.9420
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.15.self_attn.v_proj | 39.445125579833984, 38.14332962036133, 0.054361630231142044, 0.044958651065826416
Layer: model.layers.15.self_attn.v_proj | Similarity with GT Delta: 0.9395
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  55%|██████████████████████████████████████▏                              | 109/197 [00:54<00:28,  3.05it/s]

Layer: model.layers.15.self_attn.o_proj | 110.67411804199219, 109.66769409179688, 0.1373947113752365, 0.09553930908441544
Layer: model.layers.15.self_attn.o_proj | Similarity with GT Delta: 0.9797
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  56%|██████████████████████████████████████▌                              | 110/197 [00:54<00:35,  2.44it/s]

Layer: model.layers.15.mlp.gate_proj | 275.84625244140625, 275.00445556640625, 0.7775593400001526, 0.1341181993484497
Layer: model.layers.15.mlp.gate_proj | Similarity with GT Delta: 0.9878
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  56%|██████████████████████████████████████▉                              | 111/197 [00:55<00:39,  2.16it/s]

Layer: model.layers.15.mlp.up_proj | 280.5062255859375, 278.74981689453125, 0.5178309679031372, 0.16789256036281586
Layer: model.layers.15.mlp.up_proj | Similarity with GT Delta: 0.9871
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  57%|███████████████████████████████████████▏                             | 112/197 [00:57<01:03,  1.35it/s]

Layer: model.layers.15.mlp.down_proj | 280.8782958984375, 277.8939514160156, 0.2795078158378601, 0.05738886073231697
Layer: model.layers.15.mlp.down_proj | Similarity with GT Delta: 0.9822
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  57%|███████████████████████████████████████▌                             | 113/197 [00:57<00:50,  1.66it/s]

Layer: model.layers.16.self_attn.q_proj | 111.37142944335938, 106.7929916381836, 0.23215050995349884, 0.07539543509483337
Layer: model.layers.16.self_attn.q_proj | Similarity with GT Delta: 0.9770
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.16.self_attn.k_proj | 39.927955627441406, 38.04740905761719, 0.10524144768714905, 0.052171677350997925
Layer: model.layers.16.self_attn.k_proj | Similarity with GT Delta: 0.9333
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.16.self_attn.v_proj | 37.59397506713867, 36.012882232666016, 0.047051310539245605, 0.03621501848101616
Layer: model.layers.16.self_attn.v_proj | Similarity with GT Delta: 0.9376
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  59%|████████████████████████████████████████▋                            | 116/197 [00:57<00:26,  3.07it/s]

Layer: model.layers.16.self_attn.o_proj | 110.1729736328125, 107.65238952636719, 0.12780147790908813, 0.08801715821027756
Layer: model.layers.16.self_attn.o_proj | Similarity with GT Delta: 0.9810
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  59%|████████████████████████████████████████▉                            | 117/197 [00:58<00:33,  2.39it/s]

Layer: model.layers.16.mlp.gate_proj | 281.87213134765625, 277.533203125, 0.9412097930908203, 0.16034838557243347
Layer: model.layers.16.mlp.gate_proj | Similarity with GT Delta: 0.9916
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  60%|█████████████████████████████████████████▎                           | 118/197 [00:59<00:40,  1.97it/s]

Layer: model.layers.16.mlp.up_proj | 275.3270568847656, 270.7093200683594, 0.5499832034111023, 0.25733333826065063
Layer: model.layers.16.mlp.up_proj | Similarity with GT Delta: 0.9873
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  60%|█████████████████████████████████████████▋                           | 119/197 [01:00<01:01,  1.28it/s]

Layer: model.layers.16.mlp.down_proj | 276.1710510253906, 275.00067138671875, 0.38797059655189514, 0.044239919632673264
Layer: model.layers.16.mlp.down_proj | Similarity with GT Delta: 0.9868
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  61%|██████████████████████████████████████████                           | 120/197 [01:00<00:49,  1.57it/s]

Layer: model.layers.17.self_attn.q_proj | 112.30655670166016, 108.8755111694336, 0.2530466616153717, 0.06904374063014984
Layer: model.layers.17.self_attn.q_proj | Similarity with GT Delta: 0.9817
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.17.self_attn.k_proj | 39.828651428222656, 37.739776611328125, 0.10788936913013458, 0.049015626311302185
Layer: model.layers.17.self_attn.k_proj | Similarity with GT Delta: 0.9449
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.17.self_attn.v_proj | 40.16897964477539, 35.471858978271484, 0.11596398800611496, 0.06181235983967781
Layer: model.layers.17.self_attn.v_proj | Similarity with GT Delta: 0.9363
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  62%|███████████████████████████████████████████                          | 123/197 [01:01<00:25,  2.92it/s]

Layer: model.layers.17.self_attn.o_proj | 111.37178802490234, 107.47724914550781, 0.17080168426036835, 0.11719631403684616
Layer: model.layers.17.self_attn.o_proj | Similarity with GT Delta: 0.9837
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  63%|███████████████████████████████████████████▍                         | 124/197 [01:01<00:29,  2.48it/s]

Layer: model.layers.17.mlp.gate_proj | 274.6238708496094, 270.4223937988281, 1.1046768426895142, 0.15394844114780426
Layer: model.layers.17.mlp.gate_proj | Similarity with GT Delta: 0.9884
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  63%|███████████████████████████████████████████▊                         | 125/197 [01:02<00:32,  2.23it/s]

Layer: model.layers.17.mlp.up_proj | 277.0458984375, 272.542236328125, 0.6274974346160889, 0.29970690608024597
Layer: model.layers.17.mlp.up_proj | Similarity with GT Delta: 0.9916
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  64%|████████████████████████████████████████████▏                        | 126/197 [01:04<00:53,  1.32it/s]

Layer: model.layers.17.mlp.down_proj | 275.9671630859375, 271.9403076171875, 0.3176044821739197, 0.035258788615465164
Layer: model.layers.17.mlp.down_proj | Similarity with GT Delta: 0.9874
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  64%|████████████████████████████████████████████▍                        | 127/197 [01:04<00:43,  1.59it/s]

Layer: model.layers.18.self_attn.q_proj | 110.42918395996094, 107.10454559326172, 0.3243634104728699, 0.06465329974889755
Layer: model.layers.18.self_attn.q_proj | Similarity with GT Delta: 0.9834
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.18.self_attn.k_proj | 39.03596496582031, 37.54548645019531, 0.07657285034656525, 0.044542912393808365
Layer: model.layers.18.self_attn.k_proj | Similarity with GT Delta: 0.9500
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.18.self_attn.v_proj | 39.14024353027344, 38.33680725097656, 0.0455276258289814, 0.033524930477142334
Layer: model.layers.18.self_attn.v_proj | Similarity with GT Delta: 0.9463
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  66%|█████████████████████████████████████████████▌                       | 130/197 [01:04<00:22,  2.92it/s]

Layer: model.layers.18.self_attn.o_proj | 112.39617919921875, 109.28357696533203, 0.18361587822437286, 0.10685648024082184
Layer: model.layers.18.self_attn.o_proj | Similarity with GT Delta: 0.9800
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  66%|█████████████████████████████████████████████▉                       | 131/197 [01:05<00:27,  2.43it/s]

Layer: model.layers.18.mlp.gate_proj | 273.6048889160156, 268.865478515625, 0.7916801571846008, 0.16358104348182678
Layer: model.layers.18.mlp.gate_proj | Similarity with GT Delta: 0.9906
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  67%|██████████████████████████████████████████████▏                      | 132/197 [01:05<00:30,  2.15it/s]

Layer: model.layers.18.mlp.up_proj | 277.2304382324219, 276.5451354980469, 0.573366105556488, 0.3060220181941986
Layer: model.layers.18.mlp.up_proj | Similarity with GT Delta: 0.9887
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  68%|██████████████████████████████████████████████▌                      | 133/197 [01:07<00:50,  1.26it/s]

Layer: model.layers.18.mlp.down_proj | 276.2782897949219, 271.5544128417969, 0.3607039451599121, 0.03558184206485748
Layer: model.layers.18.mlp.down_proj | Similarity with GT Delta: 0.9864
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  68%|██████████████████████████████████████████████▉                      | 134/197 [01:08<00:40,  1.55it/s]

Layer: model.layers.19.self_attn.q_proj | 110.83521270751953, 109.043701171875, 0.23147790133953094, 0.06263015419244766
Layer: model.layers.19.self_attn.q_proj | Similarity with GT Delta: 0.9816
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.19.self_attn.k_proj | 38.7314338684082, 37.795623779296875, 0.04403253272175789, 0.04068448767066002
Layer: model.layers.19.self_attn.k_proj | Similarity with GT Delta: 0.9461
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.19.self_attn.v_proj | 38.230987548828125, 37.469329833984375, 0.0877838209271431, 0.04421813786029816
Layer: model.layers.19.self_attn.v_proj | Similarity with GT Delta: 0.9411
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  70%|███████████████████████████████████████████████▉                     | 137/197 [01:08<00:21,  2.86it/s]

Layer: model.layers.19.self_attn.o_proj | 111.51698303222656, 108.58216857910156, 0.1483129858970642, 0.0721948966383934
Layer: model.layers.19.self_attn.o_proj | Similarity with GT Delta: 0.9814
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  70%|████████████████████████████████████████████████▎                    | 138/197 [01:08<00:23,  2.46it/s]

Layer: model.layers.19.mlp.gate_proj | 281.2154235839844, 271.5481872558594, 0.6487441658973694, 0.17094583809375763
Layer: model.layers.19.mlp.gate_proj | Similarity with GT Delta: 0.9879
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  71%|████████████████████████████████████████████████▋                    | 139/197 [01:09<00:26,  2.17it/s]

Layer: model.layers.19.mlp.up_proj | 276.48541259765625, 274.5344543457031, 0.44392529129981995, 0.26541244983673096
Layer: model.layers.19.mlp.up_proj | Similarity with GT Delta: 0.9826
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  71%|█████████████████████████████████████████████████                    | 140/197 [01:11<00:43,  1.31it/s]

Layer: model.layers.19.mlp.down_proj | 273.21099853515625, 272.003662109375, 0.32191917300224304, 0.03453708440065384
Layer: model.layers.19.mlp.down_proj | Similarity with GT Delta: 0.9861
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  72%|█████████████████████████████████████████████████▍                   | 141/197 [01:11<00:34,  1.61it/s]

Layer: model.layers.20.self_attn.q_proj | 111.53251647949219, 110.16461181640625, 0.20509123802185059, 0.10240726172924042
Layer: model.layers.20.self_attn.q_proj | Similarity with GT Delta: 0.9793
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.20.self_attn.k_proj | 37.318031311035156, 36.10966110229492, 0.058422066271305084, 0.03639904037117958
Layer: model.layers.20.self_attn.k_proj | Similarity with GT Delta: 0.9392
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.20.self_attn.v_proj | 38.784847259521484, 36.743255615234375, 0.04552547261118889, 0.03282822668552399
Layer: model.layers.20.self_attn.v_proj | Similarity with GT Delta: 0.9392
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  73%|██████████████████████████████████████████████████▍                  | 144/197 [01:11<00:17,  2.97it/s]

Layer: model.layers.20.self_attn.o_proj | 109.88546752929688, 109.56369018554688, 0.10263765603303909, 0.07127542793750763
Layer: model.layers.20.self_attn.o_proj | Similarity with GT Delta: 0.9790
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  74%|██████████████████████████████████████████████████▊                  | 145/197 [01:12<00:20,  2.53it/s]

Layer: model.layers.20.mlp.gate_proj | 275.4870910644531, 271.4277038574219, 1.004343032836914, 0.1571260392665863
Layer: model.layers.20.mlp.gate_proj | Similarity with GT Delta: 0.9921
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  74%|███████████████████████████████████████████████████▏                 | 146/197 [01:12<00:22,  2.22it/s]

Layer: model.layers.20.mlp.up_proj | 275.2750244140625, 270.6521911621094, 0.6343724131584167, 0.1608044058084488
Layer: model.layers.20.mlp.up_proj | Similarity with GT Delta: 0.9932
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  75%|███████████████████████████████████████████████████▍                 | 147/197 [01:14<00:36,  1.38it/s]

Layer: model.layers.20.mlp.down_proj | 281.25933837890625, 272.65576171875, 0.32248273491859436, 0.03319647163152695
Layer: model.layers.20.mlp.down_proj | Similarity with GT Delta: 0.9844
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  75%|███████████████████████████████████████████████████▊                 | 148/197 [01:14<00:29,  1.66it/s]

Layer: model.layers.21.self_attn.q_proj | 111.31095886230469, 108.9306640625, 0.10022396594285965, 0.08656185120344162
Layer: model.layers.21.self_attn.q_proj | Similarity with GT Delta: 0.9792
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.21.self_attn.k_proj | 40.199951171875, 37.93290328979492, 0.0512516088783741, 0.03330208733677864
Layer: model.layers.21.self_attn.k_proj | Similarity with GT Delta: 0.9448
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.21.self_attn.v_proj | 37.76042938232422, 34.69292068481445, 0.08171819150447845, 0.03603323549032211
Layer: model.layers.21.self_attn.v_proj | Similarity with GT Delta: 0.9479
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  77%|████████████████████████████████████████████████████▉                | 151/197 [01:15<00:15,  3.05it/s]

Layer: model.layers.21.self_attn.o_proj | 111.63408660888672, 108.21355438232422, 0.16806884109973907, 0.05325572192668915
Layer: model.layers.21.self_attn.o_proj | Similarity with GT Delta: 0.9749
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.21.mlp.gate_proj | 274.2074279785156, 265.13458251953125, 0.9907698035240173, 0.13417832553386688


Recovering:  77%|█████████████████████████████████████████████████████▏               | 152/197 [01:15<00:19,  2.31it/s]

Layer: model.layers.21.mlp.gate_proj | Similarity with GT Delta: 0.9909
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  78%|█████████████████████████████████████████████████████▌               | 153/197 [01:16<00:22,  1.98it/s]

Layer: model.layers.21.mlp.up_proj | 278.81695556640625, 270.4696350097656, 0.5482696890830994, 0.1700267195701599
Layer: model.layers.21.mlp.up_proj | Similarity with GT Delta: 0.9889
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  78%|█████████████████████████████████████████████████████▉               | 154/197 [01:18<00:34,  1.24it/s]

Layer: model.layers.21.mlp.down_proj | 279.2121276855469, 275.1603088378906, 0.3674505650997162, 0.03502563014626503
Layer: model.layers.21.mlp.down_proj | Similarity with GT Delta: 0.9913
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  79%|██████████████████████████████████████████████████████▎              | 155/197 [01:18<00:27,  1.52it/s]

Layer: model.layers.22.self_attn.q_proj | 112.00166320800781, 110.05059051513672, 0.18571554124355316, 0.0583159476518631
Layer: model.layers.22.self_attn.q_proj | Similarity with GT Delta: 0.9796
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.22.self_attn.k_proj | 39.07984161376953, 37.925296783447266, 0.05204858258366585, 0.034946613013744354
Layer: model.layers.22.self_attn.k_proj | Similarity with GT Delta: 0.9432
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.22.self_attn.v_proj | 38.80064392089844, 36.38239288330078, 0.06109244376420975, 0.03575604781508446
Layer: model.layers.22.self_attn.v_proj | Similarity with GT Delta: 0.9447
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  80%|███████████████████████████████████████████████████████▎             | 158/197 [01:18<00:13,  2.82it/s]

Layer: model.layers.22.self_attn.o_proj | 111.63626861572266, 108.78975677490234, 0.12012165784835815, 0.07909326255321503
Layer: model.layers.22.self_attn.o_proj | Similarity with GT Delta: 0.9792
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  81%|███████████████████████████████████████████████████████▋             | 159/197 [01:19<00:15,  2.38it/s]

Layer: model.layers.22.mlp.gate_proj | 276.8620910644531, 270.01019287109375, 0.7707730531692505, 0.16903343796730042
Layer: model.layers.22.mlp.gate_proj | Similarity with GT Delta: 0.9911
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  81%|████████████████████████████████████████████████████████             | 160/197 [01:20<00:18,  1.98it/s]

Layer: model.layers.22.mlp.up_proj | 277.35650634765625, 269.172607421875, 0.6120890378952026, 0.15810653567314148
Layer: model.layers.22.mlp.up_proj | Similarity with GT Delta: 0.9921
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  82%|████████████████████████████████████████████████████████▍            | 161/197 [01:22<00:34,  1.05it/s]

Layer: model.layers.22.mlp.down_proj | 278.2409362792969, 271.94940185546875, 0.4274173378944397, 0.038949593901634216
Layer: model.layers.22.mlp.down_proj | Similarity with GT Delta: 0.9884
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  82%|████████████████████████████████████████████████████████▋            | 162/197 [01:22<00:26,  1.30it/s]

Layer: model.layers.23.self_attn.q_proj | 112.37100982666016, 109.91889953613281, 0.1740429401397705, 0.06292832642793655
Layer: model.layers.23.self_attn.q_proj | Similarity with GT Delta: 0.9836
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.23.self_attn.k_proj | 39.00136184692383, 38.489864349365234, 0.038923971354961395, 0.032140713185071945
Layer: model.layers.23.self_attn.k_proj | Similarity with GT Delta: 0.9407
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.23.self_attn.v_proj | 38.70315170288086, 36.034996032714844, 0.05201216787099838, 0.02805844508111477
Layer: model.layers.23.self_attn.v_proj | Similarity with GT Delta: 0.9437
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  84%|█████████████████████████████████████████████████████████▊           | 165/197 [01:23<00:13,  2.46it/s]

Layer: model.layers.23.self_attn.o_proj | 109.77008056640625, 107.47990417480469, 0.1486213356256485, 0.06894455850124359
Layer: model.layers.23.self_attn.o_proj | Similarity with GT Delta: 0.9802
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.23.mlp.gate_proj | 278.0340270996094, 273.5116882324219, 0.841010332107544, 0.1258513182401657


Recovering:  84%|██████████████████████████████████████████████████████████▏          | 166/197 [01:24<00:15,  1.97it/s]

Layer: model.layers.23.mlp.gate_proj | Similarity with GT Delta: 0.9905
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  85%|██████████████████████████████████████████████████████████▍          | 167/197 [01:24<00:17,  1.76it/s]

Layer: model.layers.23.mlp.up_proj | 276.887939453125, 261.5813293457031, 0.5560832023620605, 0.1395244002342224
Layer: model.layers.23.mlp.up_proj | Similarity with GT Delta: 0.9904
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  85%|██████████████████████████████████████████████████████████▊          | 168/197 [01:26<00:24,  1.20it/s]

Layer: model.layers.23.mlp.down_proj | 277.0162658691406, 265.4448547363281, 0.4004605710506439, 0.038935258984565735
Layer: model.layers.23.mlp.down_proj | Similarity with GT Delta: 0.9884
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  86%|███████████████████████████████████████████████████████████▏         | 169/197 [01:26<00:19,  1.46it/s]

Layer: model.layers.24.self_attn.q_proj | 112.3567886352539, 106.86404418945312, 0.2242582142353058, 0.06560062617063522
Layer: model.layers.24.self_attn.q_proj | Similarity with GT Delta: 0.9839
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.24.self_attn.k_proj | 38.85023880004883, 37.70838928222656, 0.05851459130644798, 0.03212250769138336
Layer: model.layers.24.self_attn.k_proj | Similarity with GT Delta: 0.9479
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.24.self_attn.v_proj | 38.714576721191406, 38.1502571105957, 0.16076987981796265, 0.024743111804127693
Layer: model.layers.24.self_attn.v_proj | Similarity with GT Delta: 0.9493
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  87%|████████████████████████████████████████████████████████████▏        | 172/197 [01:27<00:09,  2.69it/s]

Layer: model.layers.24.self_attn.o_proj | 111.43502807617188, 110.87969970703125, 0.18136820197105408, 0.06980206072330475
Layer: model.layers.24.self_attn.o_proj | Similarity with GT Delta: 0.9738
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  88%|████████████████████████████████████████████████████████████▌        | 173/197 [01:27<00:10,  2.34it/s]

Layer: model.layers.24.mlp.gate_proj | 279.1400146484375, 272.120361328125, 0.5838091373443604, 0.1239049881696701
Layer: model.layers.24.mlp.gate_proj | Similarity with GT Delta: 0.9898
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  88%|████████████████████████████████████████████████████████████▉        | 174/197 [01:28<00:10,  2.12it/s]

Layer: model.layers.24.mlp.up_proj | 277.0525207519531, 274.2684326171875, 0.5944727063179016, 0.10929163545370102
Layer: model.layers.24.mlp.up_proj | Similarity with GT Delta: 0.9882
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  89%|█████████████████████████████████████████████████████████████▎       | 175/197 [01:30<00:17,  1.26it/s]

Layer: model.layers.24.mlp.down_proj | 274.1455383300781, 270.7792053222656, 0.44574862718582153, 0.03146762773394585
Layer: model.layers.24.mlp.down_proj | Similarity with GT Delta: 0.9920
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  89%|█████████████████████████████████████████████████████████████▋       | 176/197 [01:30<00:13,  1.52it/s]

Layer: model.layers.25.self_attn.q_proj | 113.21033477783203, 110.73108673095703, 0.2139054536819458, 0.07609104365110397
Layer: model.layers.25.self_attn.q_proj | Similarity with GT Delta: 0.9783
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.25.self_attn.k_proj | 38.81414794921875, 36.44108581542969, 0.06476050615310669, 0.03043142333626747
Layer: model.layers.25.self_attn.k_proj | Similarity with GT Delta: 0.9442
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.25.self_attn.v_proj | 39.05497360229492, 38.3491096496582, 0.13970328867435455, 0.03442889451980591
Layer: model.layers.25.self_attn.v_proj | Similarity with GT Delta: 0.9138
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  91%|██████████████████████████████████████████████████████████████▋      | 179/197 [01:30<00:06,  2.74it/s]

Layer: model.layers.25.self_attn.o_proj | 110.38140106201172, 108.28736114501953, 0.24067933857440948, 0.03806107118725777
Layer: model.layers.25.self_attn.o_proj | Similarity with GT Delta: 0.9872
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  91%|███████████████████████████████████████████████████████████████      | 180/197 [01:31<00:07,  2.26it/s]

Layer: model.layers.25.mlp.gate_proj | 279.3456726074219, 276.4491271972656, 1.1169087886810303, 0.09005490690469742
Layer: model.layers.25.mlp.gate_proj | Similarity with GT Delta: 0.9931
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  92%|███████████████████████████████████████████████████████████████▍     | 181/197 [01:32<00:08,  1.99it/s]

Layer: model.layers.25.mlp.up_proj | 278.8735656738281, 276.3610534667969, 1.0454851388931274, 0.10543873906135559
Layer: model.layers.25.mlp.up_proj | Similarity with GT Delta: 0.9951
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  92%|███████████████████████████████████████████████████████████████▋     | 182/197 [01:34<00:13,  1.11it/s]

Layer: model.layers.25.mlp.down_proj | 276.4455871582031, 273.4921569824219, 0.30411496758461, 0.049024228006601334
Layer: model.layers.25.mlp.down_proj | Similarity with GT Delta: 0.9862
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  93%|████████████████████████████████████████████████████████████████     | 183/197 [01:34<00:10,  1.37it/s]

Layer: model.layers.26.self_attn.q_proj | 110.07878875732422, 109.00446319580078, 0.16488775610923767, 0.058232903480529785
Layer: model.layers.26.self_attn.q_proj | Similarity with GT Delta: 0.9779
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.26.self_attn.k_proj | 39.457115173339844, 38.325042724609375, 0.03399258106946945, 0.028621802106499672
Layer: model.layers.26.self_attn.k_proj | Similarity with GT Delta: 0.9436
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.26.self_attn.v_proj | 40.214012145996094, 37.67263412475586, 0.09901975840330124, 0.02945864386856556
Layer: model.layers.26.self_attn.v_proj | Similarity with GT Delta: 0.9443
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  94%|█████████████████████████████████████████████████████████████████▏   | 186/197 [01:34<00:04,  2.55it/s]

Layer: model.layers.26.self_attn.o_proj | 111.42637634277344, 108.66690063476562, 0.19790475070476532, 0.07407218217849731
Layer: model.layers.26.self_attn.o_proj | Similarity with GT Delta: 0.9764
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  95%|█████████████████████████████████████████████████████████████████▍   | 187/197 [01:35<00:04,  2.20it/s]

Layer: model.layers.26.mlp.gate_proj | 274.0673828125, 268.7582702636719, 1.405515432357788, 0.07168006151914597
Layer: model.layers.26.mlp.gate_proj | Similarity with GT Delta: 0.9943
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  95%|█████████████████████████████████████████████████████████████████▊   | 188/197 [01:36<00:04,  2.01it/s]

Layer: model.layers.26.mlp.up_proj | 278.0096740722656, 276.13232421875, 1.4271153211593628, 0.08147498965263367
Layer: model.layers.26.mlp.up_proj | Similarity with GT Delta: 0.9897
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  96%|██████████████████████████████████████████████████████████████████▏  | 189/197 [01:38<00:06,  1.23it/s]

Layer: model.layers.26.mlp.down_proj | 277.1803894042969, 273.0318603515625, 0.6283296346664429, 0.1640027016401291
Layer: model.layers.26.mlp.down_proj | Similarity with GT Delta: 0.9907
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  96%|██████████████████████████████████████████████████████████████████▌  | 190/197 [01:38<00:04,  1.51it/s]

Layer: model.layers.27.self_attn.q_proj | 111.46566772460938, 109.5405044555664, 0.1684560775756836, 0.08194519579410553
Layer: model.layers.27.self_attn.q_proj | Similarity with GT Delta: 0.9809
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.27.self_attn.k_proj | 39.51285934448242, 38.94816207885742, 0.044218774884939194, 0.0297615434974432
Layer: model.layers.27.self_attn.k_proj | Similarity with GT Delta: 0.9434
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.27.self_attn.v_proj | 40.16179275512695, 37.19783020019531, 0.3939391076564789, 0.016476113349199295
Layer: model.layers.27.self_attn.v_proj | Similarity with GT Delta: 0.9366
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  98%|███████████████████████████████████████████████████████████████████▌ | 193/197 [01:38<00:01,  2.81it/s]

Layer: model.layers.27.self_attn.o_proj | 110.674072265625, 109.43048095703125, 0.4649595618247986, 0.01790720969438553
Layer: model.layers.27.self_attn.o_proj | Similarity with GT Delta: 0.9820
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  98%|███████████████████████████████████████████████████████████████████▉ | 194/197 [01:39<00:01,  2.43it/s]

Layer: model.layers.27.mlp.gate_proj | 276.4203186035156, 274.726806640625, 1.3655744791030884, 0.12721987068653107
Layer: model.layers.27.mlp.gate_proj | Similarity with GT Delta: 0.9900
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  99%|████████████████████████████████████████████████████████████████████▎| 195/197 [01:39<00:00,  2.12it/s]

Layer: model.layers.27.mlp.up_proj | 279.9798889160156, 273.4003601074219, 1.3477942943572998, 0.10458143055438995
Layer: model.layers.27.mlp.up_proj | Similarity with GT Delta: 0.9919
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering: 100%|█████████████████████████████████████████████████████████████████████| 197/197 [01:41<00:00,  1.94it/s]

Layer: model.layers.27.mlp.down_proj | 275.9679260253906, 273.4568786621094, 0.17545060813426971, 0.03926514461636543
Layer: model.layers.27.mlp.down_proj | Similarity with GT Delta: 0.9711
Keeping Base Weights for lm_head
Saving recovered model to /mnt/e/untitled folder/codebase/LoRO_attack/recovered_qwen_1_5B_model...





tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Saved.


In [2]:
import torch
from transformers import AutoModelForCausalLM
import pandas as pd
import os

# ==========================================
# 配置：对比 Ground Truth (Target) 与 恢复模型 (Recovered)
# ==========================================

target_model_id = "zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct" 
recovered_model_path = "/mnt/e/untitled folder/codebase/LoRO_attack/recovered_qwen_1_5B_model"

device = "cpu" # 对比只需 CPU 即可，省显存

print(f"Loading Target Model (GT): {target_model_id}...")
try:
    model_gt = AutoModelForCausalLM.from_pretrained(target_model_id, trust_remote_code=True).to(device)
except Exception as e:
    print(f"Error loading GT model: {e}")
    # 如果显存不够，可以尝试加 device_map="cpu" 或 torch_dtype=torch.float16

print(f"Loading Recovered Model: {recovered_model_path}...")

try:
    model_recovered = AutoModelForCausalLM.from_pretrained(recovered_model_path, trust_remote_code=True).to(device)
except Exception as e:
    print(f"Error loading Recovered model: {e}")
    raise e

print("\nStarting Comparison (Target GT vs. Recovered)...")
print("*" * 90)
print(f"{'Layer Name':<50} | {'Cos Sim':<10} | {'Diff Norm':<12} | {'Rel Diff (%)':<12}")
print("-" * 90)

results = []
modules_recovered = dict(model_recovered.named_modules())

# 遍历 GT 模型的层
for name, module_gt in model_gt.named_modules():
    if isinstance(module_gt, torch.nn.Linear):
        if name in modules_recovered:
            module_rec = modules_recovered[name]
            
            # 获取权重
            w_gt = module_gt.weight.detach()
            w_rec = module_rec.weight.detach()
            
            # 检查形状
            if w_gt.shape != w_rec.shape:
                continue
                
            # 1. 计算余弦相似度
            cos_sim = torch.nn.functional.cosine_similarity(
                w_gt.flatten(), w_rec.flatten(), dim=0
            ).item()
            
            # 2. 计算差异 (Diff = GT - Recovered)
            # 如果攻击完美，Diff 应该全是 0
            diff = w_gt - w_rec
            norm_diff = torch.norm(diff).item()
            norm_gt = torch.norm(w_gt).item()
            
            # 相对差异
            rel_diff = norm_diff / norm_gt if norm_gt > 0 else 0.0
            
            # 打印结果 (只打印部分层，或者差异较大的层)
            # 如果 Cos Sim < 0.99 或者 Diff 比较大，说明恢复有问题
            print(f"{name} | {cos_sim:.6f}   | {norm_diff:.4f}       | {rel_diff*100:.4f}%")
            
            results.append({
                "Layer": name, 
                "Cos_Sim": cos_sim, 
                "Diff_Norm": norm_diff, 
                "Rel_Diff": rel_diff
            })

df = pd.DataFrame(results)
print("-" * 90)
print(f"Summary (Target vs Recovered):")
print(f"Avg Cosine Similarity: {df['Cos_Sim'].mean():.6f} (Target: 1.0)")
print(f"Avg Relative Diff:     {df['Rel_Diff'].mean()*100:.6f}% (Target: 0.0%)")
print("-" * 90)


Loading Target Model (GT): zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct...
Loading Recovered Model: /mnt/e/untitled folder/codebase/LoRO_attack/recovered_qwen_1_5B_model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Starting Comparison (Target GT vs. Recovered)...
******************************************************************************************
Layer Name                                         | Cos Sim    | Diff Norm    | Rel Diff (%)
------------------------------------------------------------------------------------------
model.layers.0.self_attn.q_proj | 1.000144   | 0.0425       | 0.0569%
model.layers.0.self_attn.k_proj | 0.999990   | 0.0259       | 0.0739%
model.layers.0.self_attn.v_proj | 0.999983   | 0.0297       | 0.2532%
model.layers.0.self_attn.o_proj | 1.000018   | 0.0487       | 0.1383%
model.layers.0.mlp.gate_proj | 1.000890   | 0.1093       | 0.0958%
model.layers.0.mlp.up_proj | 1.000911   | 0.1012       | 0.1069%
model.layers.0.mlp.down_proj | 1.000916   | 0.0669       | 0.0673%
model.layers.1.self_attn.q_proj | 0.999997   | 0.0480       | 0.0939%
model.layers.1.self_attn.k_proj | 0.999992   | 0.0344       | 0.1236%
model.layers.1.self_attn.v_proj | 0.999981   | 0.0496  

In [None]:
# full-FT with Knockoff (skipped)
# then test accuracy

In [4]:
import torch
import tqdm
import collections
import string
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset

# ==========================================
# 0. 辅助函数：SQuAD 标准评估指标
# ==========================================
def normalize_answer(s):
    """标准化答案：小写、去除标点、去除冠词、去除空白"""
    def remove_articles(text):
        regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
        return re.sub(regex, ' ', text)
    def white_space_fix(text):
        return ' '.join(text.split())
    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)
    def lower(text):
        return text.lower()
    return white_space_fix(remove_articles(remove_punc(lower(s))))

def compute_exact(a_gold, a_pred):
    return int(normalize_answer(a_gold) == normalize_answer(a_pred))

def compute_f1(a_gold, a_pred):
    gold_toks = normalize_answer(a_gold).split()
    pred_toks = normalize_answer(a_pred).split()
    common = collections.Counter(gold_toks) & collections.Counter(pred_toks)
    num_same = sum(common.values())
    if len(gold_toks) == 0 or len(pred_toks) == 0:
        # 如果其中一个是空的，只有当两个都空时 F1 为 1
        return int(gold_toks == pred_toks)
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(pred_toks)
    recall = 1.0 * num_same / len(gold_toks)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1

def get_max_metrics(predictions, gold_answers):
    """计算预测值与所有标准答案（可能有多个）之间的最大EM和F1"""
    if not gold_answers: # 无答案情况 (SQuAD v2)
        # 假设微调时让模型输出 "unanswerable" 代表无解
        # 注意：这里需要根据你微调时的设置调整。如果你的模型输出 "" 代表无解，请修改这里。
        is_unanswerable = normalize_answer(predictions) == "unanswerable" 
        return (1, 1) if is_unanswerable else (0, 0)

    exact_scores = [compute_exact(a, predictions) for a in gold_answers]
    f1_scores = [compute_f1(a, predictions) for a in gold_answers]
    return max(exact_scores), max(f1_scores)

# ==========================================
# 1. 配置路径与设备
# ==========================================
recovered_model_path = "/mnt/e/untitled folder/codebase/LoRO_attack/recovered_qwen_1_5B_model" 
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Loading Recovered Model from: {recovered_model_path} ...")

# ==========================================
# 2. 加载模型
# ==========================================
try:
    tokenizer = AutoTokenizer.from_pretrained(recovered_model_path, trust_remote_code=True)
    # 确保 pad_token 存在
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        
    model = AutoModelForCausalLM.from_pretrained(
        recovered_model_path, 
        torch_dtype=torch.float16, 
        trust_remote_code=True,
        device_map=device
    ).eval()
except Exception as e:
    print(f"Error loading model: {e}")
    raise e

# ==========================================
# 3. 准备数据 (SQuAD v2)
# ==========================================
print("Loading SQuAD v2 validation dataset...")
dataset = load_dataset("squad_v2", split="validation")

# 调试用：只跑前 50 个样本，正式跑时请注释掉下面这行
# dataset = dataset.select(range(50)) 

print(f"Start evaluating on {len(dataset)} samples...")

# ==========================================
# 4. 执行评估
# ==========================================
total_em = 0.0
total_f1 = 0.0
total = 0

progress_bar = tqdm.tqdm(range(len(dataset)))

for i in progress_bar:
    total += 1
    
    item = dataset[i]
    context = item['context']
    question = item['question']
    
    # 获取标准答案列表 (SQuAD v2 的 answers 是一个包含 text 列表的字典)
    # 如果 answers['text'] 为空，说明是不可回答的问题
    gold_answers = item['answers']['text']
    
    # --- 关键：Prompt 构建 ---
    # 这里必须和你微调时的 Prompt 格式保持一致！
    # 假设格式为: Context: ... Question: ... Answer:
    # 并且如果无解，你的模型被训练输出 "unanswerable"
    prompt_content = f"Context: {context}\nQuestion: {question}\nAnswer:"
    
    # 如果你使用了 chat template 微调，请保留下面的 messages 结构，并把 prompt_content 放进去
    # 如果微调时有 System Prompt 提示遇到无解怎么做，请在这里加上
    system_prompt = "Answer the question based on the context. If the question cannot be answered from the context, say 'unanswerable'."
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt_content}
    ]
    
    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    
    try:
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=64, # SQuAD 答案通常比较短，不需要 256
                do_sample=False,   # 评估通常使用贪婪搜索
                pad_token_id=tokenizer.eos_token_id
            )
        
        # 解码并去掉 Prompt 部分
        generated_text = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True).strip()
        
        # 计算分数
        em, f1 = get_max_metrics(generated_text, gold_answers)
        
        total_em += em
        total_f1 += f1
        
        # 打印部分结果用于调试
        if i < 3:
            print(f"\n--- Sample {i} ---")
            print(f"Pred: {generated_text}")
            print(f"Gold: {gold_answers if gold_answers else 'unanswerable'}")

    except Exception as e:
        print(f"Error index:{i} {e}")
        # 出错算 0 分
        pass

    # 实时更新进度条
    progress_bar.set_postfix({'EM': f"{total_em/total:.2%}", 'F1': f"{total_f1/total:.2%}"})

final_em = total_em / total
final_f1 = total_f1 / total

print("\n" + "="*30)
print(f"Final Results on SQuAD v2 ({len(dataset)} samples):")
print(f"Exact Match (EM): {final_em:.4f} ({final_em:.2%})")
print(f"F1 Score:         {final_f1:.4f} ({final_f1:.2%})")
print("="*30)

Loading Recovered Model from: /mnt/e/untitled folder/codebase/LoRO_attack/recovered_qwen_1_5B_model ...


The tokenizer you are loading from '/mnt/e/untitled folder/codebase/LoRO_attack/recovered_qwen_1_5B_model' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.
`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading SQuAD v2 validation dataset...


README.md: 0.00B [00:00, ?B/s]

squad_v2/train-00000-of-00001.parquet:   0%|          | 0.00/16.4M [00:00<?, ?B/s]

squad_v2/validation-00000-of-00001.parqu(…):   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/130319 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/11873 [00:00<?, ? examples/s]

Start evaluating on 11873 samples...


  0%|                                                                                         | 0/11873 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  0%|                                                      | 1/11873 [00:03<11:38:01,  3.53s/it, EM=100.00%, F1=100.00%]


--- Sample 0 ---
Pred: France
Gold: ['France', 'France', 'France', 'France']


  0%|                                                      | 2/11873 [00:08<13:28:09,  4.08s/it, EM=100.00%, F1=100.00%]


--- Sample 1 ---
Pred: the 10th and 11th centuries
Gold: ['10th and 11th centuries', 'in the 10th and 11th centuries', '10th and 11th centuries', '10th and 11th centuries']


  0%|                                                      | 3/11873 [00:11<13:12:43,  4.01s/it, EM=100.00%, F1=100.00%]


--- Sample 2 ---
Pred: Denmark, Iceland and Norway
Gold: ['Denmark, Iceland and Norway', 'Denmark, Iceland and Norway', 'Denmark, Iceland and Norway', 'Denmark, Iceland and Norway']


  1%|▋                                                     | 154/11873 [09:18<11:48:44,  3.63s/it, EM=38.31%, F1=41.75%]


KeyboardInterrupt: 