
sudo su
pip uninstall tensorflow-gpu tensorflow

pip install transformers datasets 'accelerate>=0.26.0' -i https://mirrors.aliyun.com/pypi/simple
pip install numpy -i https://mirrors.aliyun.com/pypi/simple --upgrade


In [9]:
import os

os.environ["HF_ENDPOINT"] = "http://223.109.239.18:20281/#browse/browse:hf"
os.environ["HF_HOME"] = "/home/vipuser/.cache/huggingface"
os.environ["HF_HUB_CACHE"] = "/home/vipuser/.cache/huggingface/hub"
os.environ["TRANSFORMERS_CACHE"] = "/home/vipuser/.cache/huggingface/transformers"
os.makedirs(os.environ["HF_HUB_CACHE"], exist_ok=True)
os.makedirs(os.environ["TRANSFORMERS_CACHE"], exist_ok=True)


# obfuscation

In [4]:
import torch
import sys
import os
from transformers import AutoTokenizer, AutoModelForCausalLM

repo_path = "/home/vipuser/LoRO/LoRO/"  
if os.path.exists(repo_path) and repo_path not in sys.path:
    sys.path.append(repo_path)

try:
    from utils import model_obfuscation
except ImportError as e:
    print('wrong repo_path')
    sys.exit(1)

# ==========================================
# 1. 加载目标模型 (Private Model)
# ==========================================
model_id = "zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct"
device = "cpu"
save_path = "/home/vipuser/LoRO/LoRO_attack/loro_qwen_1_5B.pt"

print(f"正在加载模型: {model_id} ...")
# Qwen 需要 trust_remote_code=True
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# 使用 AutoModelForCausalLM 因为这是一个生成任务 (SQuAD)
# New line: Add force_download=True
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    trust_remote_code=True, 
    force_download=False  # <--- Forces a fresh download
).to(device)
print("模型加载完成。准备进行 LoRO 混淆...")

# ==========================================
# 2. 执行混淆 (调用仓库代码)
# ==========================================
noise_magnitude = 1

print(f"开始混淆 (Noise Magnitude: {noise_magnitude})...")
obfuscated_model = model_obfuscation(model, device=device, noise_mag=noise_magnitude, r=24)

# ==========================================
# 4. 保存混淆后的 Checkpoint
# ==========================================
print(f"正在保存混淆后的模型至: {save_path} ...")
torch.save(obfuscated_model.state_dict(), save_path)

print(f"Checkpoint Path: {os.path.abspath(save_path)}")


正在加载模型: zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct ...


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

模型加载完成。准备进行 LoRO 混淆...
开始混淆 (Noise Magnitude: 1)...
Obfuscating: model.layers.0.self_attn.q_proj
Obfuscating: model.layers.0.self_attn.k_proj
Obfuscating: model.layers.0.self_attn.v_proj
Obfuscating: model.layers.0.self_attn.o_proj
Obfuscating: model.layers.0.mlp.gate_proj
Obfuscating: model.layers.0.mlp.up_proj
Obfuscating: model.layers.0.mlp.down_proj
Obfuscating: model.layers.1.self_attn.q_proj
Obfuscating: model.layers.1.self_attn.k_proj
Obfuscating: model.layers.1.self_attn.v_proj
Obfuscating: model.layers.1.self_attn.o_proj
Obfuscating: model.layers.1.mlp.gate_proj
Obfuscating: model.layers.1.mlp.up_proj
Obfuscating: model.layers.1.mlp.down_proj
Obfuscating: model.layers.2.self_attn.q_proj
Obfuscating: model.layers.2.self_attn.k_proj
Obfuscating: model.layers.2.self_attn.v_proj
Obfuscating: model.layers.2.self_attn.o_proj
Obfuscating: model.layers.2.mlp.gate_proj
Obfuscating: model.layers.2.mlp.up_proj
Obfuscating: model.layers.2.mlp.down_proj
Obfuscating: model.layers.3.self_att

# cmp base & ft

In [5]:
import torch
from transformers import AutoModelForCausalLM
import pandas as pd

# ==========================================
# 配置
# ==========================================
model_id_ft = "zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct"   # Target
model_id_base = "Qwen/Qwen2.5-1.5B-Instruct"    # Prior (Base)
device = "cpu" # 对比只需 CPU 即可，省显存

print(f"Loading Fine-Tuned Model: {model_id_ft}...")
model_ft = AutoModelForCausalLM.from_pretrained(model_id_ft, trust_remote_code=True).to(device)

print(f"Loading Base Model: {model_id_base}...")
model_base = AutoModelForCausalLM.from_pretrained(model_id_base, trust_remote_code=True).to(device)

print("\nStarting Comparison (FT vs. Base)...")
print("-" * 90)
print(f"{'Layer Name':<50} | {'Cos Sim':<10} | {'Delta Norm':<12} | {'Rel Diff (%)':<12}")
print("-" * 90)

results = []
modules_base = dict(model_base.named_modules())

for name, module_ft in model_ft.named_modules():
    if isinstance(module_ft, torch.nn.Linear):
        if name in modules_base:
            module_base = modules_base[name]
            w_ft = module_ft.weight.detach()
            w_base = module_base.weight.detach()
            
            if w_ft.shape != w_base.shape:
                continue
                
            # 1. Cosine Similarity
            cos_sim = torch.nn.functional.cosine_similarity(
                w_ft.flatten(), w_base.flatten(), dim=0
            ).item()
            
            # 2. Delta (FT - Base)
            delta = w_ft - w_base
            norm_delta = torch.norm(delta).item()
            norm_base = torch.norm(w_base).item()
            
            rel_diff = norm_delta / norm_base if norm_base > 0 else 0.0
            
            print(f"{name:<50} | {cos_sim:.6f}   | {norm_delta:.4f}       | {rel_diff*100:.4f}%")
            
            results.append({
                "Layer": name, "Cos_Sim": cos_sim, 
                "Delta_Norm": norm_delta, "Rel_Diff": rel_diff
            })

df = pd.DataFrame(results)
print("-" * 90)
print(f"Avg Cos Sim: {df['Cos_Sim'].mean():.4f} | Avg Delta Norm: {df['Delta_Norm'].mean():.4f}")

Loading Fine-Tuned Model: zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct...
Loading Base Model: Qwen/Qwen2.5-1.5B-Instruct...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]


Starting Comparison (FT vs. Base)...
------------------------------------------------------------------------------------------
Layer Name                                         | Cos Sim    | Delta Norm   | Rel Diff (%)
------------------------------------------------------------------------------------------
model.layers.0.self_attn.q_proj                    | 0.999997   | 0.1955       | 0.2621%
model.layers.0.self_attn.k_proj                    | 0.999997   | 0.0789       | 0.2254%
model.layers.0.self_attn.v_proj                    | 0.999970   | 0.0915       | 0.7792%
model.layers.0.self_attn.o_proj                    | 0.999977   | 0.2367       | 0.6725%
model.layers.0.mlp.gate_proj                       | 0.999986   | 0.6117       | 0.5364%
model.layers.0.mlp.up_proj                         | 0.999980   | 0.5940       | 0.6271%
model.layers.0.mlp.down_proj                       | 0.999997   | 0.2366       | 0.2383%
model.layers.1.self_attn.q_proj                    | 0.999991  

# SVD

In [6]:
import torch
import torch.nn as nn
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import copy
from tqdm import tqdm

# ==========================================
# 1. 配置
# ==========================================
# 你的 Base 模型 (攻击者先验)
base_model_id = "Qwen/Qwen2.5-1.5B-Instruct"
# 你的 GT 模型 (仅用于验证攻击结果)
target_model_id = "zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct"
# 混淆后的 Checkpoint 路径
obfuscated_checkpoint = "/home/vipuser/LoRO/LoRO_attack/loro_qwen_1_5B.pt"
# 恢复模型的保存路径
save_path_recovered = "/home/vipuser/LoRO/LoRO_attack/recovered_qwen_1_5B_model"

device = "cuda" if torch.cuda.is_available() else "cpu"

# === 攻击关键参数 ===
REMOVE_RANK = 24

# ==========================================
# 2. 模型加载
# ==========================================
print(f"1. Loading Base Model (Prior): {base_model_id}...")
# 必须加 trust_remote_code=True 以支持 Qwen
recovered_model = AutoModelForCausalLM.from_pretrained(base_model_id, trust_remote_code=True).to(device)

print(f"2. Loading Obfuscated Checkpoint: {obfuscated_checkpoint}...")
if not os.path.exists(obfuscated_checkpoint):
    raise FileNotFoundError("混淆 Checkpoint 未找到，请先运行 Cell 1 生成。")
obfus_state_dict = torch.load(obfuscated_checkpoint, map_location=device)

print(f"3. Loading Ground Truth (Validation): {target_model_id}...")
gt_model = AutoModelForCausalLM.from_pretrained(target_model_id, trust_remote_code=True).to(device)

# ==========================================
# 3. 执行 SVD 去噪攻击 (SVD Denoising)
# ==========================================
print(f"\nSTARTING RECOVERY (Removing Top-{REMOVE_RANK} Singular Values as Noise)...")
print("="*60)

similarities = []
relative_errors = []

# 遍历 Base 模型的每一层
# 我们需要修改 recovered_model 的权重
all_modules = list(recovered_model.named_modules())
linear_layers = [(n, m) for n, m in all_modules if isinstance(m, nn.Linear)]

progress_bar = tqdm(linear_layers, desc="Recovering")

for name, module in progress_bar:

    if "lm_head" in name:
        print(f"Keeping Base Weights for {name}")
        continue

    # 1. 构造 LoRO 保存的 key
    # 根据 LoroLinear 代码，权重保存在 "obfus_linear.weight
    obfus_key = f"{name}.obfus_linear.weight"
    
    # 检查该层是否被混淆 (如果没在 checkpoint 里找到，说明该层未混淆，保持 Base 原样)
    if obfus_key not in obfus_state_dict:
        continue
        
    # 2. 获取数据
    W_base = module.weight.detach() # [out, in]
    W_obfus = obfus_state_dict[obfus_key].detach() # [out, in]
    
    # 3. 计算 Diff (包含 Fine-tuning 更新 + LoRO 噪声)
    # Diff = (W_base + Delta_FT + Noise) - W_base = Delta_FT + Noise
    print("size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}")
    Diff = W_obfus - W_base
    
    # 4. SVD 分解
    # 使用 float32 保证精度，Diff 形状通常是 [out_features, in_features]
    U, S, Vh = torch.linalg.svd(Diff.float(), full_matrices=False)

    print(f"Layer: {name} | {S[REMOVE_RANK - 2]}, {S[REMOVE_RANK - 1]}, {S[REMOVE_RANK]}, {S[REMOVE_RANK + 1]}")
    
    # --- 攻击核心逻辑 ---
    # 根据 LoroLinear 定义，噪声是 (B@A).T，其秩为 r (REMOVE_RANK)。
    # 且因为 noise_mag 很大，这 r 个奇异值会非常大，位于 S 的最前端。
    # 我们将前 r 个奇异值置零，即移除了 LoRO 噪声。
    S_clean = S.clone()
    S_clean[:REMOVE_RANK] = 0.0 
    
    # 5. 重构纯净的 Delta (即估算的 Delta_FT)
    Delta_Recovered = (U @ torch.diag(S_clean) @ Vh).to(W_base.dtype)
    
    # 6. 恢复权重并写入模型
    # W_rec = W_base + Delta_Recovered
    module.weight.data = W_base + Delta_Recovered
    
    # 7. 恢复 Bias
    # LoroLinear 代码中: self.obfus_linear.bias = original_linear.bias
    # Bias 没有加噪声，直接从 checkpoint 读取覆盖即可
    obfus_bias_key = f"{name}.obfus_linear.bias"
    if obfus_bias_key in obfus_state_dict and module.bias is not None:
        module.bias.data = obfus_state_dict[obfus_bias_key].detach()
    
    # ==========================
    # 验证环节 (对比 GT)
    # ==========================
    if name in dict(gt_model.named_modules()):
        W_gt = dict(gt_model.named_modules())[name].weight.detach()
        Delta_True = W_gt - W_base
        
        # 计算相似度: 我们的恢复结果 vs 真实的微调增量
        # 如果 sim 接近 1.0，说明我们成功剥离了噪声，保留了微调增量
        sim = torch.nn.functional.cosine_similarity(Delta_True.flatten(), Delta_Recovered.flatten(), dim=0).item()
        
        print(f"Layer: {name} | Similarity with GT Delta: {sim:.4f}")
        
        similarities.append(sim)
        # progress_bar.set_postfix({"Avg Sim": f"{sum(similarities)/len(similarities):.4f}", "Last Sim": f"{sim:.4f}"})

# ==========================================
# 4. 结果保存
# ==========================================

print(f"Saving recovered model to {save_path_recovered}...")
recovered_model.save_pretrained(save_path_recovered)
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
tokenizer.save_pretrained(save_path_recovered)
print("Saved.")

1. Loading Base Model (Prior): Qwen/Qwen2.5-1.5B-Instruct...
2. Loading Obfuscated Checkpoint: /home/vipuser/LoRO/LoRO_attack/loro_qwen_1_5B.pt...
3. Loading Ground Truth (Validation): zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct...

STARTING RECOVERY (Removing Top-24 Singular Values as Noise)...


Recovering:   0%|          | 0/197 [00:00<?, ?it/s]

size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.0.self_attn.q_proj | 110.98078918457031, 105.23827362060547, 0.1035633310675621, 0.07278374582529068


Recovering:   1%|          | 1/197 [00:00<02:40,  1.22it/s]

Layer: model.layers.0.self_attn.q_proj | Similarity with GT Delta: 0.9816
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.0.self_attn.k_proj | 37.55204772949219, 35.60569381713867, 0.036296095699071884, 0.030306849628686905
Layer: model.layers.0.self_attn.k_proj | Similarity with GT Delta: 0.9438
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.0.self_attn.v_proj | 38.228233337402344, 37.28694152832031, 0.048876505345106125, 0.02879672311246395
Layer: model.layers.0.self_attn.v_proj | Similarity with GT Delta: 0.9415
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   3%|▎         | 5/197 [00:01<00:42,  4.55it/s]

Layer: model.layers.0.self_attn.o_proj | 110.03752136230469, 108.81861114501953, 0.17466913163661957, 0.0653194710612297
Layer: model.layers.0.self_attn.o_proj | Similarity with GT Delta: 0.9826
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.0.mlp.gate_proj | 280.0085754394531, 276.9113464355469, 0.36379697918891907, 0.18084977567195892
Layer: model.layers.0.mlp.gate_proj | Similarity with GT Delta: 0.9915
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   3%|▎         | 6/197 [00:01<00:36,  5.28it/s]

Layer: model.layers.0.mlp.up_proj | 279.65887451171875, 272.3178405761719, 0.36319661140441895, 0.1578933149576187
Layer: model.layers.0.mlp.up_proj | Similarity with GT Delta: 0.9913
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   4%|▎         | 7/197 [00:01<00:40,  4.73it/s]

Layer: model.layers.0.mlp.down_proj | 277.9854736328125, 272.8697204589844, 0.12345138937234879, 0.07651571184396744
Layer: model.layers.0.mlp.down_proj | Similarity with GT Delta: 0.9892
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   4%|▍         | 8/197 [00:01<00:42,  4.46it/s]

Layer: model.layers.1.self_attn.q_proj | 112.57417297363281, 109.13138580322266, 0.10775455832481384, 0.0776289775967598
Layer: model.layers.1.self_attn.q_proj | Similarity with GT Delta: 0.9826
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.1.self_attn.k_proj | 38.740657806396484, 35.97294998168945, 0.06045853719115257, 0.03486429527401924
Layer: model.layers.1.self_attn.k_proj | Similarity with GT Delta: 0.9549
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.1.self_attn.v_proj | 38.317230224609375, 36.90126037597656, 0.1248622015118599, 0.024942416697740555
Layer: model.layers.1.self_attn.v_proj | Similarity with GT Delta: 0.9436
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   6%|▌         | 12/197 [00:02<00:26,  6.88it/s]

Layer: model.layers.1.self_attn.o_proj | 111.47602844238281, 107.43460845947266, 0.28220334649086, 0.07381781190633774
Layer: model.layers.1.self_attn.o_proj | Similarity with GT Delta: 0.9812
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.1.mlp.gate_proj | 281.3736267089844, 276.68084716796875, 0.6134381294250488, 0.1456758677959442
Layer: model.layers.1.mlp.gate_proj | Similarity with GT Delta: 0.9942
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   7%|▋         | 13/197 [00:02<00:25,  7.28it/s]

Layer: model.layers.1.mlp.up_proj | 277.9227600097656, 268.14593505859375, 0.7099881768226624, 0.11262436211109161
Layer: model.layers.1.mlp.up_proj | Similarity with GT Delta: 0.9918
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   7%|▋         | 14/197 [00:02<00:28,  6.45it/s]

Layer: model.layers.1.mlp.down_proj | 278.0538024902344, 275.0688781738281, 1.8823033571243286, 0.30406683683395386
Layer: model.layers.1.mlp.down_proj | Similarity with GT Delta: 0.9873
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:   8%|▊         | 15/197 [00:02<00:33,  5.42it/s]

Layer: model.layers.2.self_attn.q_proj | 110.20111083984375, 105.1439208984375, 0.15339034795761108, 0.060257066041231155
Layer: model.layers.2.self_attn.q_proj | Similarity with GT Delta: 0.9843
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.2.self_attn.k_proj | 38.788490295410156, 36.28425598144531, 0.06753206998109818, 0.024141600355505943
Layer: model.layers.2.self_attn.k_proj | Similarity with GT Delta: 0.9527
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.2.self_attn.v_proj | 38.54840850830078, 38.06975555419922, 0.11038986593484879, 0.045020151883363724
Layer: model.layers.2.self_attn.v_proj | Similarity with GT Delta: 0.9394
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  10%|▉         | 19/197 [00:03<00:23,  7.56it/s]

Layer: model.layers.2.self_attn.o_proj | 110.2904052734375, 107.25997161865234, 0.15106846392154694, 0.09805967658758163
Layer: model.layers.2.self_attn.o_proj | Similarity with GT Delta: 0.9845
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.2.mlp.gate_proj | 275.03790283203125, 271.2618408203125, 0.5854828357696533, 0.13134022057056427
Layer: model.layers.2.mlp.gate_proj | Similarity with GT Delta: 0.9881
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  10%|█         | 20/197 [00:03<00:23,  7.63it/s]

Layer: model.layers.2.mlp.up_proj | 277.83050537109375, 275.51141357421875, 0.520643413066864, 0.18129587173461914
Layer: model.layers.2.mlp.up_proj | Similarity with GT Delta: 0.9889
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  11%|█         | 21/197 [00:03<00:28,  6.28it/s]

Layer: model.layers.2.mlp.down_proj | 276.080322265625, 274.6892395019531, 1.5817570686340332, 0.1733468621969223
Layer: model.layers.2.mlp.down_proj | Similarity with GT Delta: 0.9915
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  11%|█         | 22/197 [00:03<00:32,  5.35it/s]

Layer: model.layers.3.self_attn.q_proj | 111.16463470458984, 108.75109100341797, 0.14742453396320343, 0.07128917425870895
Layer: model.layers.3.self_attn.q_proj | Similarity with GT Delta: 0.9862
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.3.self_attn.k_proj | 38.80263137817383, 37.88518524169922, 0.08172918111085892, 0.028295183554291725
Layer: model.layers.3.self_attn.k_proj | Similarity with GT Delta: 0.9534
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.3.self_attn.v_proj | 39.852684020996094, 37.24416732788086, 0.08457780629396439, 0.04561980068683624
Layer: model.layers.3.self_attn.v_proj | Similarity with GT Delta: 0.9131
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  13%|█▎        | 26/197 [00:04<00:22,  7.55it/s]

Layer: model.layers.3.self_attn.o_proj | 111.19471740722656, 110.0712890625, 0.1485937088727951, 0.07823088020086288
Layer: model.layers.3.self_attn.o_proj | Similarity with GT Delta: 0.9834
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.3.mlp.gate_proj | 272.58123779296875, 270.1867980957031, 0.5994001030921936, 0.11875618994235992
Layer: model.layers.3.mlp.gate_proj | Similarity with GT Delta: 0.9908
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  14%|█▎        | 27/197 [00:04<00:22,  7.41it/s]

Layer: model.layers.3.mlp.up_proj | 277.1925354003906, 266.6879577636719, 0.5289862155914307, 0.1585133671760559
Layer: model.layers.3.mlp.up_proj | Similarity with GT Delta: 0.9925
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  14%|█▍        | 28/197 [00:04<00:28,  5.86it/s]

Layer: model.layers.3.mlp.down_proj | 280.4674987792969, 275.01995849609375, 0.13305018842220306, 0.09193535149097443
Layer: model.layers.3.mlp.down_proj | Similarity with GT Delta: 0.9892
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  15%|█▍        | 29/197 [00:05<00:32,  5.13it/s]

Layer: model.layers.4.self_attn.q_proj | 112.32093811035156, 106.97827911376953, 0.11555297672748566, 0.0769529789686203
Layer: model.layers.4.self_attn.q_proj | Similarity with GT Delta: 0.9830
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.4.self_attn.k_proj | 37.466922760009766, 34.66061782836914, 0.06545480340719223, 0.026296328753232956
Layer: model.layers.4.self_attn.k_proj | Similarity with GT Delta: 0.9471
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.4.self_attn.v_proj | 37.86470413208008, 36.796417236328125, 0.06991111487150192, 0.043633341789245605
Layer: model.layers.4.self_attn.v_proj | Similarity with GT Delta: 0.9422
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  17%|█▋        | 33/197 [00:05<00:22,  7.41it/s]

Layer: model.layers.4.self_attn.o_proj | 112.11506652832031, 111.77375030517578, 0.1365637630224228, 0.10004007816314697
Layer: model.layers.4.self_attn.o_proj | Similarity with GT Delta: 0.9840
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.4.mlp.gate_proj | 280.8729553222656, 270.2525329589844, 0.46352875232696533, 0.1581215262413025
Layer: model.layers.4.mlp.gate_proj | Similarity with GT Delta: 0.9891
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  17%|█▋        | 34/197 [00:05<00:21,  7.60it/s]

Layer: model.layers.4.mlp.up_proj | 274.10870361328125, 267.5647277832031, 0.5422540307044983, 0.19013746082782745
Layer: model.layers.4.mlp.up_proj | Similarity with GT Delta: 0.9925
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  18%|█▊        | 35/197 [00:05<00:26,  6.10it/s]

Layer: model.layers.4.mlp.down_proj | 278.4333190917969, 275.6853332519531, 0.1681465059518814, 0.08735774457454681
Layer: model.layers.4.mlp.down_proj | Similarity with GT Delta: 0.9896
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  18%|█▊        | 36/197 [00:06<00:30,  5.26it/s]

Layer: model.layers.5.self_attn.q_proj | 111.10389709472656, 108.19242858886719, 0.18280243873596191, 0.07201457768678665
Layer: model.layers.5.self_attn.q_proj | Similarity with GT Delta: 0.9853
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.5.self_attn.k_proj | 39.774166107177734, 35.68755340576172, 0.06251809000968933, 0.025950739160180092
Layer: model.layers.5.self_attn.k_proj | Similarity with GT Delta: 0.9396
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.5.self_attn.v_proj | 39.37656784057617, 36.635623931884766, 0.04998132959008217, 0.03966127708554268
Layer: model.layers.5.self_attn.v_proj | Similarity with GT Delta: 0.9460
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  20%|██        | 40/197 [00:06<00:20,  7.49it/s]

Layer: model.layers.5.self_attn.o_proj | 112.10228729248047, 110.09545135498047, 0.11146636307239532, 0.10805504024028778
Layer: model.layers.5.self_attn.o_proj | Similarity with GT Delta: 0.9834
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.5.mlp.gate_proj | 276.53857421875, 273.72186279296875, 0.7049494981765747, 0.14616194367408752
Layer: model.layers.5.mlp.gate_proj | Similarity with GT Delta: 0.9901
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  21%|██        | 41/197 [00:06<00:20,  7.58it/s]

Layer: model.layers.5.mlp.up_proj | 280.4115295410156, 272.425048828125, 0.5900365710258484, 0.17935216426849365
Layer: model.layers.5.mlp.up_proj | Similarity with GT Delta: 0.9926
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  21%|██▏       | 42/197 [00:06<00:25,  6.14it/s]

Layer: model.layers.5.mlp.down_proj | 278.45379638671875, 275.9224853515625, 0.23516780138015747, 0.10065660625696182
Layer: model.layers.5.mlp.down_proj | Similarity with GT Delta: 0.9909
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  22%|██▏       | 43/197 [00:07<00:29,  5.17it/s]

Layer: model.layers.6.self_attn.q_proj | 112.39586639404297, 108.23966979980469, 0.1613522171974182, 0.07677987962961197
Layer: model.layers.6.self_attn.q_proj | Similarity with GT Delta: 0.9863
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.6.self_attn.k_proj | 39.072853088378906, 37.451812744140625, 0.04601570591330528, 0.03334080055356026
Layer: model.layers.6.self_attn.k_proj | Similarity with GT Delta: 0.9471
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.6.self_attn.v_proj | 39.44113540649414, 37.25504684448242, 0.17996934056282043, 0.04139714688062668
Layer: model.layers.6.self_attn.v_proj | Similarity with GT Delta: 0.9381
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  24%|██▍       | 47/197 [00:07<00:20,  7.34it/s]

Layer: model.layers.6.self_attn.o_proj | 111.11296081542969, 109.50444030761719, 0.20450611412525177, 0.09070871025323868
Layer: model.layers.6.self_attn.o_proj | Similarity with GT Delta: 0.9806
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.6.mlp.gate_proj | 280.1599426269531, 272.4292297363281, 0.6773059964179993, 0.15047356486320496
Layer: model.layers.6.mlp.gate_proj | Similarity with GT Delta: 0.9908
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  24%|██▍       | 48/197 [00:07<00:19,  7.63it/s]

Layer: model.layers.6.mlp.up_proj | 278.4384460449219, 275.2301940917969, 0.4795072376728058, 0.21217119693756104
Layer: model.layers.6.mlp.up_proj | Similarity with GT Delta: 0.9906
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  25%|██▍       | 49/197 [00:07<00:23,  6.37it/s]

Layer: model.layers.6.mlp.down_proj | 277.9036560058594, 275.1754150390625, 0.2563674747943878, 0.08660831302404404
Layer: model.layers.6.mlp.down_proj | Similarity with GT Delta: 0.9906
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  25%|██▌       | 50/197 [00:08<00:27,  5.41it/s]

Layer: model.layers.7.self_attn.q_proj | 111.50897979736328, 109.18106842041016, 0.2072249948978424, 0.09881081432104111
Layer: model.layers.7.self_attn.q_proj | Similarity with GT Delta: 0.9826
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.7.self_attn.k_proj | 39.886844635009766, 38.00356674194336, 0.06625542789697647, 0.03846638649702072
Layer: model.layers.7.self_attn.k_proj | Similarity with GT Delta: 0.9322
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.7.self_attn.v_proj | 40.4210205078125, 39.681297302246094, 0.08744807541370392, 0.05986041948199272
Layer: model.layers.7.self_attn.v_proj | Similarity with GT Delta: 0.9465
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  27%|██▋       | 54/197 [00:08<00:18,  7.54it/s]

Layer: model.layers.7.self_attn.o_proj | 109.44834899902344, 108.26105499267578, 0.1376214325428009, 0.1272502988576889
Layer: model.layers.7.self_attn.o_proj | Similarity with GT Delta: 0.9853
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.7.mlp.gate_proj | 277.7898254394531, 273.6128845214844, 0.4754007160663605, 0.2124662697315216
Layer: model.layers.7.mlp.gate_proj | Similarity with GT Delta: 0.9904
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  28%|██▊       | 55/197 [00:08<00:18,  7.85it/s]

Layer: model.layers.7.mlp.up_proj | 277.5631408691406, 272.6833801269531, 0.4726930856704712, 0.24099567532539368
Layer: model.layers.7.mlp.up_proj | Similarity with GT Delta: 0.9903
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  28%|██▊       | 56/197 [00:08<00:22,  6.29it/s]

Layer: model.layers.7.mlp.down_proj | 279.2666320800781, 268.4475402832031, 0.2621288597583771, 0.0539575070142746
Layer: model.layers.7.mlp.down_proj | Similarity with GT Delta: 0.9899
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  29%|██▉       | 57/197 [00:09<00:26,  5.24it/s]

Layer: model.layers.8.self_attn.q_proj | 110.98957061767578, 110.5540542602539, 0.11373678594827652, 0.09073730558156967
Layer: model.layers.8.self_attn.q_proj | Similarity with GT Delta: 0.9842
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.8.self_attn.k_proj | 39.04302978515625, 35.5965461730957, 0.06293310225009918, 0.03219949081540108
Layer: model.layers.8.self_attn.k_proj | Similarity with GT Delta: 0.9503
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.8.self_attn.v_proj | 38.68339157104492, 36.62922286987305, 0.06829643994569778, 0.024776654317975044
Layer: model.layers.8.self_attn.v_proj | Similarity with GT Delta: 0.9390
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  31%|███       | 61/197 [00:09<00:18,  7.43it/s]

Layer: model.layers.8.self_attn.o_proj | 110.05795288085938, 108.0231704711914, 0.18738745152950287, 0.10972505807876587
Layer: model.layers.8.self_attn.o_proj | Similarity with GT Delta: 0.9841
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.8.mlp.gate_proj | 277.1349792480469, 274.3558654785156, 0.45462939143180847, 0.1890462338924408
Layer: model.layers.8.mlp.gate_proj | Similarity with GT Delta: 0.9888
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  31%|███▏      | 62/197 [00:09<00:17,  7.60it/s]

Layer: model.layers.8.mlp.up_proj | 276.3960266113281, 270.1527404785156, 0.49751099944114685, 0.2472037971019745
Layer: model.layers.8.mlp.up_proj | Similarity with GT Delta: 0.9906
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  32%|███▏      | 63/197 [00:09<00:20,  6.42it/s]

Layer: model.layers.8.mlp.down_proj | 279.6515808105469, 278.1280212402344, 0.3212216794490814, 0.053754858672618866
Layer: model.layers.8.mlp.down_proj | Similarity with GT Delta: 0.9893
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  32%|███▏      | 64/197 [00:10<00:24,  5.44it/s]

Layer: model.layers.9.self_attn.q_proj | 111.73670959472656, 106.24783325195312, 0.1669161468744278, 0.08363757282495499
Layer: model.layers.9.self_attn.q_proj | Similarity with GT Delta: 0.9806
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.9.self_attn.k_proj | 41.194095611572266, 39.19271469116211, 0.06280642002820969, 0.02775820903480053
Layer: model.layers.9.self_attn.k_proj | Similarity with GT Delta: 0.9335
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.9.self_attn.v_proj | 38.55953598022461, 37.74781799316406, 0.07881481945514679, 0.04480361565947533
Layer: model.layers.9.self_attn.v_proj | Similarity with GT Delta: 0.9380
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  35%|███▍      | 68/197 [00:10<00:17,  7.55it/s]

Layer: model.layers.9.self_attn.o_proj | 112.15997314453125, 107.88070678710938, 0.1646399050951004, 0.14208661019802094
Layer: model.layers.9.self_attn.o_proj | Similarity with GT Delta: 0.9821
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.9.mlp.gate_proj | 276.8326721191406, 269.3277587890625, 0.48508554697036743, 0.23518121242523193
Layer: model.layers.9.mlp.gate_proj | Similarity with GT Delta: 0.9911
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  35%|███▌      | 69/197 [00:10<00:16,  7.85it/s]

Layer: model.layers.9.mlp.up_proj | 279.47955322265625, 271.86322021484375, 0.4330814778804779, 0.2201453149318695
Layer: model.layers.9.mlp.up_proj | Similarity with GT Delta: 0.9919
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  36%|███▌      | 70/197 [00:10<00:19,  6.47it/s]

Layer: model.layers.9.mlp.down_proj | 279.7005920410156, 271.3500061035156, 0.28128141164779663, 0.04112273082137108
Layer: model.layers.9.mlp.down_proj | Similarity with GT Delta: 0.9901
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  36%|███▌      | 71/197 [00:11<00:23,  5.45it/s]

Layer: model.layers.10.self_attn.q_proj | 110.90461730957031, 105.18756866455078, 0.15482580661773682, 0.12142777442932129
Layer: model.layers.10.self_attn.q_proj | Similarity with GT Delta: 0.9843
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.10.self_attn.k_proj | 39.386260986328125, 37.48392105102539, 0.06837116926908493, 0.03267559036612511
Layer: model.layers.10.self_attn.k_proj | Similarity with GT Delta: 0.9428
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.10.self_attn.v_proj | 40.10633850097656, 37.38716125488281, 0.053352538496255875, 0.04507841542363167
Layer: model.layers.10.self_attn.v_proj | Similarity with GT Delta: 0.9405
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  38%|███▊      | 75/197 [00:11<00:16,  7.54it/s]

Layer: model.layers.10.self_attn.o_proj | 112.76038360595703, 109.44987487792969, 0.14025366306304932, 0.08649859577417374
Layer: model.layers.10.self_attn.o_proj | Similarity with GT Delta: 0.9853
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.10.mlp.gate_proj | 275.6543884277344, 273.3283386230469, 0.627883791923523, 0.2404365986585617
Layer: model.layers.10.mlp.gate_proj | Similarity with GT Delta: 0.9913
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  39%|███▊      | 76/197 [00:11<00:15,  7.74it/s]

Layer: model.layers.10.mlp.up_proj | 276.2366638183594, 269.59405517578125, 0.44350746273994446, 0.2644316554069519
Layer: model.layers.10.mlp.up_proj | Similarity with GT Delta: 0.9903
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  39%|███▉      | 77/197 [00:12<00:18,  6.38it/s]

Layer: model.layers.10.mlp.down_proj | 278.5015563964844, 275.3201904296875, 0.30283084511756897, 0.044093746691942215
Layer: model.layers.10.mlp.down_proj | Similarity with GT Delta: 0.9897
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  40%|███▉      | 78/197 [00:12<00:22,  5.39it/s]

Layer: model.layers.11.self_attn.q_proj | 111.05519104003906, 109.21478271484375, 0.1797584444284439, 0.10444812476634979
Layer: model.layers.11.self_attn.q_proj | Similarity with GT Delta: 0.9844
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.11.self_attn.k_proj | 39.23973083496094, 35.96892166137695, 0.05093378201127052, 0.041095372289419174
Layer: model.layers.11.self_attn.k_proj | Similarity with GT Delta: 0.9470
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.11.self_attn.v_proj | 39.147396087646484, 36.315948486328125, 0.10990356653928757, 0.03559860214591026
Layer: model.layers.11.self_attn.v_proj | Similarity with GT Delta: 0.9375
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  42%|████▏     | 82/197 [00:12<00:15,  7.56it/s]

Layer: model.layers.11.self_attn.o_proj | 112.47373962402344, 103.7884750366211, 0.18982049822807312, 0.0896807312965393
Layer: model.layers.11.self_attn.o_proj | Similarity with GT Delta: 0.9830
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.11.mlp.gate_proj | 278.3233947753906, 269.901123046875, 0.8082972764968872, 0.1803496778011322
Layer: model.layers.11.mlp.gate_proj | Similarity with GT Delta: 0.9918
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  42%|████▏     | 83/197 [00:12<00:14,  7.83it/s]

Layer: model.layers.11.mlp.up_proj | 277.1507568359375, 275.25518798828125, 0.7192672491073608, 0.18622468411922455
Layer: model.layers.11.mlp.up_proj | Similarity with GT Delta: 0.9889
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  43%|████▎     | 84/197 [00:13<00:17,  6.56it/s]

Layer: model.layers.11.mlp.down_proj | 276.5962829589844, 269.1596984863281, 0.39460039138793945, 0.036752231419086456
Layer: model.layers.11.mlp.down_proj | Similarity with GT Delta: 0.9872
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  43%|████▎     | 85/197 [00:13<00:20,  5.50it/s]

Layer: model.layers.12.self_attn.q_proj | 112.3255615234375, 109.75990295410156, 0.14429156482219696, 0.09471697360277176
Layer: model.layers.12.self_attn.q_proj | Similarity with GT Delta: 0.9829
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.12.self_attn.k_proj | 40.500274658203125, 37.4954719543457, 0.047009728848934174, 0.03557189926505089
Layer: model.layers.12.self_attn.k_proj | Similarity with GT Delta: 0.9417
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.12.self_attn.v_proj | 40.272003173828125, 39.20977020263672, 0.055459074676036835, 0.049975354224443436
Layer: model.layers.12.self_attn.v_proj | Similarity with GT Delta: 0.9528
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  45%|████▌     | 89/197 [00:13<00:14,  7.56it/s]

Layer: model.layers.12.self_attn.o_proj | 110.9658432006836, 109.34784698486328, 0.1937333345413208, 0.10611188411712646
Layer: model.layers.12.self_attn.o_proj | Similarity with GT Delta: 0.9831
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.12.mlp.gate_proj | 275.84576416015625, 274.33416748046875, 0.9333129525184631, 0.17386889457702637
Layer: model.layers.12.mlp.gate_proj | Similarity with GT Delta: 0.9960
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  46%|████▌     | 90/197 [00:13<00:13,  7.75it/s]

Layer: model.layers.12.mlp.up_proj | 279.8199768066406, 269.9688720703125, 0.5549047589302063, 0.21903032064437866
Layer: model.layers.12.mlp.up_proj | Similarity with GT Delta: 0.9881
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  46%|████▌     | 91/197 [00:14<00:16,  6.39it/s]

Layer: model.layers.12.mlp.down_proj | 276.3956298828125, 273.30084228515625, 0.26346370577812195, 0.06739028543233871
Layer: model.layers.12.mlp.down_proj | Similarity with GT Delta: 0.9912
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  47%|████▋     | 92/197 [00:14<00:19,  5.38it/s]

Layer: model.layers.13.self_attn.q_proj | 110.80218505859375, 108.72124481201172, 0.20626623928546906, 0.09551344066858292
Layer: model.layers.13.self_attn.q_proj | Similarity with GT Delta: 0.9818
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.13.self_attn.k_proj | 39.19993591308594, 37.121028900146484, 0.08542566001415253, 0.05247912555932999
Layer: model.layers.13.self_attn.k_proj | Similarity with GT Delta: 0.9502
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.13.self_attn.v_proj | 38.888397216796875, 37.37350845336914, 0.04870423302054405, 0.03845490515232086
Layer: model.layers.13.self_attn.v_proj | Similarity with GT Delta: 0.9457
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  49%|████▊     | 96/197 [00:14<00:13,  7.45it/s]

Layer: model.layers.13.self_attn.o_proj | 109.31913757324219, 107.41810607910156, 0.14539845287799835, 0.12315641343593597
Layer: model.layers.13.self_attn.o_proj | Similarity with GT Delta: 0.9826
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.13.mlp.gate_proj | 273.3419189453125, 269.71881103515625, 0.6770800948143005, 0.18816789984703064
Layer: model.layers.13.mlp.gate_proj | Similarity with GT Delta: 0.9922
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  49%|████▉     | 97/197 [00:14<00:12,  7.72it/s]

Layer: model.layers.13.mlp.up_proj | 279.4855041503906, 274.063232421875, 0.594994843006134, 0.21131102740764618
Layer: model.layers.13.mlp.up_proj | Similarity with GT Delta: 0.9864
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  50%|████▉     | 98/197 [00:15<00:16,  5.99it/s]

Layer: model.layers.13.mlp.down_proj | 278.41162109375, 272.1075439453125, 0.25655147433280945, 0.07562057673931122
Layer: model.layers.13.mlp.down_proj | Similarity with GT Delta: 0.9915
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  50%|█████     | 99/197 [00:15<00:19,  5.15it/s]

Layer: model.layers.14.self_attn.q_proj | 111.3343276977539, 110.60650634765625, 0.12214227020740509, 0.07850272208452225
Layer: model.layers.14.self_attn.q_proj | Similarity with GT Delta: 0.9851
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.14.self_attn.k_proj | 39.125274658203125, 38.76437759399414, 0.04827074334025383, 0.03132173791527748
Layer: model.layers.14.self_attn.k_proj | Similarity with GT Delta: 0.9461
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.14.self_attn.v_proj | 37.528770446777344, 36.832889556884766, 0.07311539351940155, 0.03186201676726341
Layer: model.layers.14.self_attn.v_proj | Similarity with GT Delta: 0.9371
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  52%|█████▏    | 103/197 [00:15<00:13,  7.22it/s]

Layer: model.layers.14.self_attn.o_proj | 110.52412414550781, 108.72035217285156, 0.1311618834733963, 0.07225178927183151
Layer: model.layers.14.self_attn.o_proj | Similarity with GT Delta: 0.9827
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.14.mlp.gate_proj | 278.975341796875, 275.1033020019531, 0.6887536644935608, 0.16879767179489136
Layer: model.layers.14.mlp.gate_proj | Similarity with GT Delta: 0.9892
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  53%|█████▎    | 104/197 [00:15<00:12,  7.51it/s]

Layer: model.layers.14.mlp.up_proj | 279.7752685546875, 271.12774658203125, 0.5067522525787354, 0.16578015685081482
Layer: model.layers.14.mlp.up_proj | Similarity with GT Delta: 0.9917
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  53%|█████▎    | 105/197 [00:16<00:14,  6.38it/s]

Layer: model.layers.14.mlp.down_proj | 276.9728698730469, 270.98687744140625, 0.27707958221435547, 0.1085611954331398
Layer: model.layers.14.mlp.down_proj | Similarity with GT Delta: 0.9892
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  54%|█████▍    | 106/197 [00:16<00:16,  5.38it/s]

Layer: model.layers.15.self_attn.q_proj | 109.1517105102539, 108.36956787109375, 0.16739536821842194, 0.13666784763336182
Layer: model.layers.15.self_attn.q_proj | Similarity with GT Delta: 0.9852
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.15.self_attn.k_proj | 39.25312805175781, 34.92120361328125, 0.0496094785630703, 0.04307844862341881
Layer: model.layers.15.self_attn.k_proj | Similarity with GT Delta: 0.9480
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.15.self_attn.v_proj | 36.13047790527344, 35.306034088134766, 0.054030124098062515, 0.04477859288454056
Layer: model.layers.15.self_attn.v_proj | Similarity with GT Delta: 0.9450
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  56%|█████▌    | 110/197 [00:16<00:11,  7.52it/s]

Layer: model.layers.15.self_attn.o_proj | 112.6502685546875, 110.8962631225586, 0.1373884379863739, 0.09485211968421936
Layer: model.layers.15.self_attn.o_proj | Similarity with GT Delta: 0.9824
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.15.mlp.gate_proj | 280.7475891113281, 275.8232116699219, 0.7795434594154358, 0.13510450720787048
Layer: model.layers.15.mlp.gate_proj | Similarity with GT Delta: 0.9903
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  56%|█████▋    | 111/197 [00:16<00:10,  7.83it/s]

Layer: model.layers.15.mlp.up_proj | 276.2549743652344, 273.62786865234375, 0.5166424512863159, 0.16720189154148102
Layer: model.layers.15.mlp.up_proj | Similarity with GT Delta: 0.9897
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  57%|█████▋    | 112/197 [00:17<00:13,  6.29it/s]

Layer: model.layers.15.mlp.down_proj | 274.95623779296875, 271.64874267578125, 0.28117305040359497, 0.05610518530011177
Layer: model.layers.15.mlp.down_proj | Similarity with GT Delta: 0.9937
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  57%|█████▋    | 113/197 [00:17<00:15,  5.34it/s]

Layer: model.layers.16.self_attn.q_proj | 111.75212860107422, 107.37976837158203, 0.23285412788391113, 0.07521775364875793
Layer: model.layers.16.self_attn.q_proj | Similarity with GT Delta: 0.9820
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.16.self_attn.k_proj | 39.43228530883789, 36.89759063720703, 0.10708083212375641, 0.05174442380666733
Layer: model.layers.16.self_attn.k_proj | Similarity with GT Delta: 0.9432
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.16.self_attn.v_proj | 38.221405029296875, 35.480010986328125, 0.04851817712187767, 0.0366310179233551
Layer: model.layers.16.self_attn.v_proj | Similarity with GT Delta: 0.9499
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  59%|█████▉    | 117/197 [00:17<00:10,  7.50it/s]

Layer: model.layers.16.self_attn.o_proj | 110.60539245605469, 108.76566314697266, 0.12688672542572021, 0.0873032659292221
Layer: model.layers.16.self_attn.o_proj | Similarity with GT Delta: 0.9818
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.16.mlp.gate_proj | 275.074462890625, 267.5951232910156, 0.9387905597686768, 0.15728159248828888
Layer: model.layers.16.mlp.gate_proj | Similarity with GT Delta: 0.9891
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  60%|█████▉    | 118/197 [00:17<00:10,  7.75it/s]

Layer: model.layers.16.mlp.up_proj | 279.2716064453125, 272.6142578125, 0.5517556667327881, 0.2582285404205322
Layer: model.layers.16.mlp.up_proj | Similarity with GT Delta: 0.9919
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  60%|██████    | 119/197 [00:18<00:12,  6.38it/s]

Layer: model.layers.16.mlp.down_proj | 278.4672546386719, 271.1632995605469, 0.3878919184207916, 0.0442362055182457
Layer: model.layers.16.mlp.down_proj | Similarity with GT Delta: 0.9889
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  61%|██████    | 120/197 [00:18<00:14,  5.31it/s]

Layer: model.layers.17.self_attn.q_proj | 111.87751770019531, 110.5001220703125, 0.2536766231060028, 0.06880209594964981
Layer: model.layers.17.self_attn.q_proj | Similarity with GT Delta: 0.9859
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.17.self_attn.k_proj | 39.009796142578125, 37.971553802490234, 0.10829957574605942, 0.04954354092478752
Layer: model.layers.17.self_attn.k_proj | Similarity with GT Delta: 0.9522
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.17.self_attn.v_proj | 39.04343795776367, 37.792327880859375, 0.11701055616140366, 0.0625770092010498
Layer: model.layers.17.self_attn.v_proj | Similarity with GT Delta: 0.9459
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  63%|██████▎   | 124/197 [00:18<00:10,  7.09it/s]

Layer: model.layers.17.self_attn.o_proj | 110.15851593017578, 106.03646850585938, 0.16969850659370422, 0.11733952909708023
Layer: model.layers.17.self_attn.o_proj | Similarity with GT Delta: 0.9842
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.17.mlp.gate_proj | 277.9660339355469, 271.7121276855469, 1.1094744205474854, 0.15335576236248016
Layer: model.layers.17.mlp.gate_proj | Similarity with GT Delta: 0.9911
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  63%|██████▎   | 125/197 [00:19<00:09,  7.44it/s]

Layer: model.layers.17.mlp.up_proj | 277.8580017089844, 276.1395568847656, 0.6240566372871399, 0.3009384274482727
Layer: model.layers.17.mlp.up_proj | Similarity with GT Delta: 0.9887
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  64%|██████▍   | 126/197 [00:19<00:11,  6.22it/s]

Layer: model.layers.17.mlp.down_proj | 273.0070495605469, 270.9225769042969, 0.31824856996536255, 0.035350583493709564
Layer: model.layers.17.mlp.down_proj | Similarity with GT Delta: 0.9934
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  64%|██████▍   | 127/197 [00:19<00:13,  5.28it/s]

Layer: model.layers.18.self_attn.q_proj | 111.35987091064453, 108.82923889160156, 0.32265788316726685, 0.06273431330919266
Layer: model.layers.18.self_attn.q_proj | Similarity with GT Delta: 0.9813
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.18.self_attn.k_proj | 38.00732421875, 35.53916549682617, 0.07390885055065155, 0.04513796046376228
Layer: model.layers.18.self_attn.k_proj | Similarity with GT Delta: 0.9409
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.18.self_attn.v_proj | 39.38682174682617, 38.546424865722656, 0.044421326369047165, 0.033663805574178696
Layer: model.layers.18.self_attn.v_proj | Similarity with GT Delta: 0.9465
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  66%|██████▋   | 131/197 [00:19<00:08,  7.45it/s]

Layer: model.layers.18.self_attn.o_proj | 112.0829849243164, 109.77069091796875, 0.1836722493171692, 0.10745830833911896
Layer: model.layers.18.self_attn.o_proj | Similarity with GT Delta: 0.9842
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.18.mlp.gate_proj | 280.5700378417969, 273.2180480957031, 0.786409854888916, 0.16311508417129517
Layer: model.layers.18.mlp.gate_proj | Similarity with GT Delta: 0.9859
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  67%|██████▋   | 132/197 [00:20<00:08,  7.73it/s]

Layer: model.layers.18.mlp.up_proj | 276.2742004394531, 274.8344421386719, 0.5737383961677551, 0.30634158849716187
Layer: model.layers.18.mlp.up_proj | Similarity with GT Delta: 0.9904
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  68%|██████▊   | 133/197 [00:20<00:10,  6.00it/s]

Layer: model.layers.18.mlp.down_proj | 280.4700012207031, 277.306884765625, 0.3614811897277832, 0.031204065307974815
Layer: model.layers.18.mlp.down_proj | Similarity with GT Delta: 0.9908
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  68%|██████▊   | 134/197 [00:20<00:12,  5.12it/s]

Layer: model.layers.19.self_attn.q_proj | 110.00013732910156, 108.09893035888672, 0.23197652399539948, 0.06257102638483047
Layer: model.layers.19.self_attn.q_proj | Similarity with GT Delta: 0.9858
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.19.self_attn.k_proj | 39.24997329711914, 37.23493576049805, 0.043459467589855194, 0.04059946537017822
Layer: model.layers.19.self_attn.k_proj | Similarity with GT Delta: 0.9425
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.19.self_attn.v_proj | 39.20296859741211, 38.33533477783203, 0.08951642364263535, 0.04417267441749573
Layer: model.layers.19.self_attn.v_proj | Similarity with GT Delta: 0.9527
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  70%|███████   | 138/197 [00:21<00:08,  7.04it/s]

Layer: model.layers.19.self_attn.o_proj | 109.99250793457031, 107.01251983642578, 0.14814390242099762, 0.07194811850786209
Layer: model.layers.19.self_attn.o_proj | Similarity with GT Delta: 0.9848
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.19.mlp.gate_proj | 277.12432861328125, 272.72637939453125, 0.6496210098266602, 0.16929735243320465
Layer: model.layers.19.mlp.gate_proj | Similarity with GT Delta: 0.9904
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  71%|███████   | 139/197 [00:21<00:07,  7.41it/s]

Layer: model.layers.19.mlp.up_proj | 278.1795654296875, 274.9462890625, 0.448664128780365, 0.26565301418304443
Layer: model.layers.19.mlp.up_proj | Similarity with GT Delta: 0.9914
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  71%|███████   | 140/197 [00:21<00:08,  6.34it/s]

Layer: model.layers.19.mlp.down_proj | 275.7076416015625, 274.1269226074219, 0.32137924432754517, 0.03427375853061676
Layer: model.layers.19.mlp.down_proj | Similarity with GT Delta: 0.9895
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  72%|███████▏  | 141/197 [00:21<00:10,  5.34it/s]

Layer: model.layers.20.self_attn.q_proj | 110.62801361083984, 106.4109878540039, 0.2048167884349823, 0.10184140503406525
Layer: model.layers.20.self_attn.q_proj | Similarity with GT Delta: 0.9819
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.20.self_attn.k_proj | 38.774539947509766, 37.309181213378906, 0.06080770865082741, 0.03638624772429466
Layer: model.layers.20.self_attn.k_proj | Similarity with GT Delta: 0.9530
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.20.self_attn.v_proj | 38.86406707763672, 36.51040267944336, 0.045435212552547455, 0.032232414931058884
Layer: model.layers.20.self_attn.v_proj | Similarity with GT Delta: 0.9358
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  74%|███████▎  | 145/197 [00:22<00:06,  7.49it/s]

Layer: model.layers.20.self_attn.o_proj | 110.21961975097656, 109.0828628540039, 0.10255459696054459, 0.07082681357860565
Layer: model.layers.20.self_attn.o_proj | Similarity with GT Delta: 0.9838
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.20.mlp.gate_proj | 280.180908203125, 271.46099853515625, 1.0081440210342407, 0.15437307953834534
Layer: model.layers.20.mlp.gate_proj | Similarity with GT Delta: 0.9946
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  74%|███████▍  | 146/197 [00:22<00:06,  7.74it/s]

Layer: model.layers.20.mlp.up_proj | 274.5717468261719, 268.6419372558594, 0.6313381791114807, 0.16076625883579254
Layer: model.layers.20.mlp.up_proj | Similarity with GT Delta: 0.9902
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  75%|███████▍  | 147/197 [00:22<00:07,  6.58it/s]

Layer: model.layers.20.mlp.down_proj | 276.9649963378906, 274.38604736328125, 0.32315993309020996, 0.031129788607358932
Layer: model.layers.20.mlp.down_proj | Similarity with GT Delta: 0.9919
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  75%|███████▌  | 148/197 [00:22<00:08,  5.48it/s]

Layer: model.layers.21.self_attn.q_proj | 109.11366271972656, 108.75871276855469, 0.10004691034555435, 0.08686111867427826
Layer: model.layers.21.self_attn.q_proj | Similarity with GT Delta: 0.9840
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.21.self_attn.k_proj | 40.219425201416016, 38.124019622802734, 0.0484439916908741, 0.033340029418468475
Layer: model.layers.21.self_attn.k_proj | Similarity with GT Delta: 0.9359
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.21.self_attn.v_proj | 39.152099609375, 36.057777404785156, 0.08182350546121597, 0.036438506096601486
Layer: model.layers.21.self_attn.v_proj | Similarity with GT Delta: 0.9506
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  77%|███████▋  | 152/197 [00:23<00:06,  7.29it/s]

Layer: model.layers.21.self_attn.o_proj | 110.47563934326172, 108.43946838378906, 0.16914086043834686, 0.053393617272377014
Layer: model.layers.21.self_attn.o_proj | Similarity with GT Delta: 0.9823
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.21.mlp.gate_proj | 278.2161865234375, 269.7365417480469, 0.9937229752540588, 0.13297820091247559
Layer: model.layers.21.mlp.gate_proj | Similarity with GT Delta: 0.9931
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  78%|███████▊  | 153/197 [00:23<00:05,  7.62it/s]

Layer: model.layers.21.mlp.up_proj | 279.4344177246094, 278.63458251953125, 0.5479369759559631, 0.16835571825504303
Layer: model.layers.21.mlp.up_proj | Similarity with GT Delta: 0.9914
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  78%|███████▊  | 154/197 [00:23<00:06,  6.46it/s]

Layer: model.layers.21.mlp.down_proj | 275.03692626953125, 268.8459167480469, 0.3650072515010834, 0.03306533396244049
Layer: model.layers.21.mlp.down_proj | Similarity with GT Delta: 0.9885
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  79%|███████▊  | 155/197 [00:23<00:07,  5.43it/s]

Layer: model.layers.22.self_attn.q_proj | 110.61255645751953, 108.55276489257812, 0.18617409467697144, 0.05831151455640793
Layer: model.layers.22.self_attn.q_proj | Similarity with GT Delta: 0.9844
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.22.self_attn.k_proj | 37.408267974853516, 35.83738327026367, 0.051542017608881, 0.03427329659461975
Layer: model.layers.22.self_attn.k_proj | Similarity with GT Delta: 0.9425
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.22.self_attn.v_proj | 39.29792022705078, 37.329124450683594, 0.05922490358352661, 0.03623379394412041
Layer: model.layers.22.self_attn.v_proj | Similarity with GT Delta: 0.9384
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  81%|████████  | 159/197 [00:24<00:05,  7.37it/s]

Layer: model.layers.22.self_attn.o_proj | 111.46406555175781, 107.80645751953125, 0.11999335139989853, 0.07869158685207367
Layer: model.layers.22.self_attn.o_proj | Similarity with GT Delta: 0.9841
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.22.mlp.gate_proj | 274.9178771972656, 272.5155334472656, 0.7720838785171509, 0.16870172321796417
Layer: model.layers.22.mlp.gate_proj | Similarity with GT Delta: 0.9923
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  81%|████████  | 160/197 [00:24<00:04,  7.63it/s]

Layer: model.layers.22.mlp.up_proj | 280.3311462402344, 276.91668701171875, 0.6072982549667358, 0.15776905417442322
Layer: model.layers.22.mlp.up_proj | Similarity with GT Delta: 0.9884
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  82%|████████▏ | 161/197 [00:24<00:05,  6.18it/s]

Layer: model.layers.22.mlp.down_proj | 277.94073486328125, 275.38348388671875, 0.4273982048034668, 0.03898359090089798
Layer: model.layers.22.mlp.down_proj | Similarity with GT Delta: 0.9903
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  82%|████████▏ | 162/197 [00:24<00:06,  5.26it/s]

Layer: model.layers.23.self_attn.q_proj | 110.74349975585938, 109.42030334472656, 0.17349261045455933, 0.06263601779937744
Layer: model.layers.23.self_attn.q_proj | Similarity with GT Delta: 0.9838
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.23.self_attn.k_proj | 39.887847900390625, 37.56516647338867, 0.038965631276369095, 0.03272984176874161
Layer: model.layers.23.self_attn.k_proj | Similarity with GT Delta: 0.9440
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.23.self_attn.v_proj | 38.56898880004883, 36.577049255371094, 0.05114026740193367, 0.027725407853722572
Layer: model.layers.23.self_attn.v_proj | Similarity with GT Delta: 0.9459
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  84%|████████▍ | 166/197 [00:25<00:04,  7.46it/s]

Layer: model.layers.23.self_attn.o_proj | 109.32189178466797, 106.80742645263672, 0.14870566129684448, 0.06858038902282715
Layer: model.layers.23.self_attn.o_proj | Similarity with GT Delta: 0.9844
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.23.mlp.gate_proj | 279.3120422363281, 274.65740966796875, 0.8400897979736328, 0.1258927285671234
Layer: model.layers.23.mlp.gate_proj | Similarity with GT Delta: 0.9888
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  85%|████████▍ | 167/197 [00:25<00:03,  7.72it/s]

Layer: model.layers.23.mlp.up_proj | 280.92156982421875, 274.5994567871094, 0.555292010307312, 0.13920123875141144
Layer: model.layers.23.mlp.up_proj | Similarity with GT Delta: 0.9915
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  85%|████████▌ | 168/197 [00:25<00:04,  6.24it/s]

Layer: model.layers.23.mlp.down_proj | 278.4997863769531, 276.1903991699219, 0.3996932804584503, 0.04118383675813675
Layer: model.layers.23.mlp.down_proj | Similarity with GT Delta: 0.9889
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  86%|████████▌ | 169/197 [00:25<00:05,  5.28it/s]

Layer: model.layers.24.self_attn.q_proj | 112.77378845214844, 109.0782470703125, 0.22295765578746796, 0.061661750078201294
Layer: model.layers.24.self_attn.q_proj | Similarity with GT Delta: 0.9844
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.24.self_attn.k_proj | 39.353736877441406, 37.546722412109375, 0.05848987028002739, 0.031676892191171646
Layer: model.layers.24.self_attn.k_proj | Similarity with GT Delta: 0.9470
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.24.self_attn.v_proj | 37.88851547241211, 37.48055648803711, 0.15393804013729095, 0.024781588464975357
Layer: model.layers.24.self_attn.v_proj | Similarity with GT Delta: 0.9142
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  88%|████████▊ | 173/197 [00:26<00:03,  7.50it/s]

Layer: model.layers.24.self_attn.o_proj | 109.76716613769531, 108.31819152832031, 0.18126922845840454, 0.06891296058893204
Layer: model.layers.24.self_attn.o_proj | Similarity with GT Delta: 0.9812
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.24.mlp.gate_proj | 276.11248779296875, 270.1579895019531, 0.5847508907318115, 0.11949782073497772
Layer: model.layers.24.mlp.gate_proj | Similarity with GT Delta: 0.9923
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  88%|████████▊ | 174/197 [00:26<00:02,  7.77it/s]

Layer: model.layers.24.mlp.up_proj | 279.7680969238281, 271.3803405761719, 0.5917863249778748, 0.10649051517248154
Layer: model.layers.24.mlp.up_proj | Similarity with GT Delta: 0.9861
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  89%|████████▉ | 175/197 [00:26<00:03,  6.65it/s]

Layer: model.layers.24.mlp.down_proj | 277.3555603027344, 269.4430236816406, 0.44515737891197205, 0.02735939435660839
Layer: model.layers.24.mlp.down_proj | Similarity with GT Delta: 0.9916
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  89%|████████▉ | 176/197 [00:26<00:03,  5.52it/s]

Layer: model.layers.25.self_attn.q_proj | 112.21598815917969, 107.86141204833984, 0.21448974311351776, 0.07548310607671738
Layer: model.layers.25.self_attn.q_proj | Similarity with GT Delta: 0.9824
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.25.self_attn.k_proj | 38.46683883666992, 36.847537994384766, 0.06334950029850006, 0.029803674668073654
Layer: model.layers.25.self_attn.k_proj | Similarity with GT Delta: 0.9379
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.25.self_attn.v_proj | 39.8803825378418, 36.86144256591797, 0.14444506168365479, 0.036481644958257675
Layer: model.layers.25.self_attn.v_proj | Similarity with GT Delta: 0.9430
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  91%|█████████▏| 180/197 [00:27<00:02,  7.44it/s]

Layer: model.layers.25.self_attn.o_proj | 112.25563049316406, 108.50438690185547, 0.23983412981033325, 0.03809541463851929
Layer: model.layers.25.self_attn.o_proj | Similarity with GT Delta: 0.9867
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.25.mlp.gate_proj | 277.56201171875, 268.9229736328125, 1.1153361797332764, 0.08960343897342682
Layer: model.layers.25.mlp.gate_proj | Similarity with GT Delta: 0.9909
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  92%|█████████▏| 181/197 [00:27<00:02,  7.77it/s]

Layer: model.layers.25.mlp.up_proj | 271.2192687988281, 269.1214599609375, 1.0414912700653076, 0.10482468456029892
Layer: model.layers.25.mlp.up_proj | Similarity with GT Delta: 0.9906
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  92%|█████████▏| 182/197 [00:27<00:02,  6.72it/s]

Layer: model.layers.25.mlp.down_proj | 279.69097900390625, 276.55328369140625, 0.30327892303466797, 0.048965223133563995
Layer: model.layers.25.mlp.down_proj | Similarity with GT Delta: 0.9896
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  93%|█████████▎| 183/197 [00:27<00:02,  5.45it/s]

Layer: model.layers.26.self_attn.q_proj | 111.14830017089844, 104.51370239257812, 0.16549065709114075, 0.05810912325978279
Layer: model.layers.26.self_attn.q_proj | Similarity with GT Delta: 0.9841
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.26.self_attn.k_proj | 39.55620574951172, 36.68348693847656, 0.034836720675230026, 0.028289277106523514
Layer: model.layers.26.self_attn.k_proj | Similarity with GT Delta: 0.9445
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.26.self_attn.v_proj | 37.85851287841797, 37.05033493041992, 0.10055659711360931, 0.02887018956243992
Layer: model.layers.26.self_attn.v_proj | Similarity with GT Delta: 0.9549
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  95%|█████████▍| 187/197 [00:28<00:01,  7.17it/s]

Layer: model.layers.26.self_attn.o_proj | 110.66165161132812, 107.09095001220703, 0.198807030916214, 0.07294337451457977
Layer: model.layers.26.self_attn.o_proj | Similarity with GT Delta: 0.9841
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.26.mlp.gate_proj | 277.3244934082031, 267.40478515625, 1.4003190994262695, 0.07002976536750793
Layer: model.layers.26.mlp.gate_proj | Similarity with GT Delta: 0.9893
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  95%|█████████▌| 188/197 [00:28<00:01,  7.02it/s]

Layer: model.layers.26.mlp.up_proj | 280.43817138671875, 272.71490478515625, 1.4320975542068481, 0.08183415234088898
Layer: model.layers.26.mlp.up_proj | Similarity with GT Delta: 0.9914
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  96%|█████████▌| 189/197 [00:28<00:01,  6.22it/s]

Layer: model.layers.26.mlp.down_proj | 280.30670166015625, 270.4360656738281, 0.6272664666175842, 0.16388453543186188
Layer: model.layers.26.mlp.down_proj | Similarity with GT Delta: 0.9898
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  96%|█████████▋| 190/197 [00:28<00:01,  5.26it/s]

Layer: model.layers.27.self_attn.q_proj | 110.14762878417969, 108.73107147216797, 0.16813933849334717, 0.08140960335731506
Layer: model.layers.27.self_attn.q_proj | Similarity with GT Delta: 0.9830
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.27.self_attn.k_proj | 38.37486267089844, 36.41543960571289, 0.0447414293885231, 0.030015340074896812
Layer: model.layers.27.self_attn.k_proj | Similarity with GT Delta: 0.9460
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.27.self_attn.v_proj | 39.864749908447266, 38.925106048583984, 0.4053271412849426, 0.016726145520806313
Layer: model.layers.27.self_attn.v_proj | Similarity with GT Delta: 0.9632
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  98%|█████████▊| 194/197 [00:29<00:00,  6.97it/s]

Layer: model.layers.27.self_attn.o_proj | 111.72465515136719, 108.77156829833984, 0.46761804819107056, 0.017074961215257645
Layer: model.layers.27.self_attn.o_proj | Similarity with GT Delta: 0.9880
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}
Layer: model.layers.27.mlp.gate_proj | 275.3987121582031, 270.34576416015625, 1.3686476945877075, 0.12671266496181488
Layer: model.layers.27.mlp.gate_proj | Similarity with GT Delta: 0.9914
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering:  99%|█████████▉| 195/197 [00:29<00:00,  7.10it/s]

Layer: model.layers.27.mlp.up_proj | 278.4645690917969, 276.0065002441406, 1.3479621410369873, 0.10379988700151443
Layer: model.layers.27.mlp.up_proj | Similarity with GT Delta: 0.9909
size of W_obfus: {W_obfus.shape} | size of W_base: {W_base.shape}


Recovering: 100%|██████████| 197/197 [00:29<00:00,  6.62it/s]

Layer: model.layers.27.mlp.down_proj | 280.0889587402344, 269.974609375, 0.1753903031349182, 0.03446785360574722
Layer: model.layers.27.mlp.down_proj | Similarity with GT Delta: 0.9912
Keeping Base Weights for lm_head
Saving recovered model to /home/vipuser/LoRO/LoRO_attack/recovered_qwen_1_5B_model...





tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Saved.


# cmp ft & recovered

In [7]:
import torch
from transformers import AutoModelForCausalLM
import pandas as pd
import os

# ==========================================
# 配置：对比 Ground Truth (Target) 与 恢复模型 (Recovered)
# ==========================================

target_model_id = "zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct" 
recovered_model_path = "/home/vipuser/LoRO/LoRO_attack/recovered_qwen_1_5B_model"

device = "cpu" # 对比只需 CPU 即可，省显存

print(f"Loading Target Model (GT): {target_model_id}...")
try:
    model_gt = AutoModelForCausalLM.from_pretrained(target_model_id, trust_remote_code=True).to(device)
except Exception as e:
    print(f"Error loading GT model: {e}")
    # 如果显存不够，可以尝试加 device_map="cpu" 或 torch_dtype=torch.float16

print(f"Loading Recovered Model: {recovered_model_path}...")

try:
    model_recovered = AutoModelForCausalLM.from_pretrained(recovered_model_path, trust_remote_code=True).to(device)
except Exception as e:
    print(f"Error loading Recovered model: {e}")
    raise e

print("\nStarting Comparison (Target GT vs. Recovered)...")
print("*" * 90)
print(f"{'Layer Name':<50} | {'Cos Sim':<10} | {'Diff Norm':<12} | {'Rel Diff (%)':<12}")
print("-" * 90)

results = []
modules_recovered = dict(model_recovered.named_modules())

# 遍历 GT 模型的层
for name, module_gt in model_gt.named_modules():
    if isinstance(module_gt, torch.nn.Linear):
        if name in modules_recovered:
            module_rec = modules_recovered[name]
            
            # 获取权重
            w_gt = module_gt.weight.detach()
            w_rec = module_rec.weight.detach()
            
            # 检查形状
            if w_gt.shape != w_rec.shape:
                continue
                
            # 1. 计算余弦相似度
            cos_sim = torch.nn.functional.cosine_similarity(
                w_gt.flatten(), w_rec.flatten(), dim=0
            ).item()
            
            # 2. 计算差异 (Diff = GT - Recovered)
            # 如果攻击完美，Diff 应该全是 0
            diff = w_gt - w_rec
            norm_diff = torch.norm(diff).item()
            norm_gt = torch.norm(w_gt).item()
            
            # 相对差异
            rel_diff = norm_diff / norm_gt if norm_gt > 0 else 0.0
            
            # 打印结果 (只打印部分层，或者差异较大的层)
            # 如果 Cos Sim < 0.99 或者 Diff 比较大，说明恢复有问题
            print(f"{name} | {cos_sim:.6f}   | {norm_diff:.4f}       | {rel_diff*100:.4f}%")
            
            results.append({
                "Layer": name, 
                "Cos_Sim": cos_sim, 
                "Diff_Norm": norm_diff, 
                "Rel_Diff": rel_diff
            })

df = pd.DataFrame(results)
print("-" * 90)
print(f"Summary (Target vs Recovered):")
print(f"Avg Cosine Similarity: {df['Cos_Sim'].mean():.6f} (Target: 1.0)")
print(f"Avg Relative Diff:     {df['Rel_Diff'].mean()*100:.6f}% (Target: 0.0%)")
print("-" * 90)


Loading Target Model (GT): zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct...
Loading Recovered Model: /home/vipuser/LoRO/LoRO_attack/recovered_qwen_1_5B_model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Starting Comparison (Target GT vs. Recovered)...
******************************************************************************************
Layer Name                                         | Cos Sim    | Diff Norm    | Rel Diff (%)
------------------------------------------------------------------------------------------
model.layers.0.self_attn.q_proj | 1.000000   | 0.0373       | 0.0500%
model.layers.0.self_attn.k_proj | 1.000000   | 0.0261       | 0.0745%
model.layers.0.self_attn.v_proj | 0.999997   | 0.0308       | 0.2626%
model.layers.0.self_attn.o_proj | 0.999999   | 0.0439       | 0.1248%
model.layers.0.mlp.gate_proj | 1.000000   | 0.0794       | 0.0696%
model.layers.0.mlp.up_proj | 1.000000   | 0.0781       | 0.0824%
model.layers.0.mlp.down_proj | 1.000000   | 0.0347       | 0.0349%
model.layers.1.self_attn.q_proj | 1.000000   | 0.0405       | 0.0793%
model.layers.1.self_attn.k_proj | 0.999999   | 0.0309       | 0.1111%
model.layers.1.self_attn.v_proj | 0.999993   | 0.0473  

In [None]:
# full-FT with Knockoff (skipped)
# BUT I think we need LoRA due to its mathematical property
# the delta we lost is truncated SVD values (top 24)
# then test accuracy

# test

In [1]:
import os

# os.environ["HF_ENDPOINT"] = "https://hf-mirror.com/"
os.environ["HF_HOME"] = "/home/vipuser/.cache/huggingface"
os.environ["HF_HUB_CACHE"] = "/home/vipuser/.cache/huggingface/hub"
os.environ["TRANSFORMERS_CACHE"] = "/home/vipuser/.cache/huggingface/transformers"
os.makedirs(os.environ["HF_HUB_CACHE"], exist_ok=True)
os.makedirs(os.environ["TRANSFORMERS_CACHE"], exist_ok=True)


## recovered

In [3]:
# rec
import torch
import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset

# ==========================================
# 1. 类似 notebook 中的加载部分
# ==========================================
# 你的模型路径
model_path = "/home/vipuser/LoRO/LoRO_attack/recovered_qwen_1_5B_model"
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Loading model from {model_path}...")

try:
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map=device
    ).eval()
except Exception as e:
    print(f"Error: {e}")
    # 如果加载本地失败，演示用可以注释掉上面，打开下面这行
    # raise e

# ==========================================
# 2. 加载数据 (对应 notebook cell 8)
# ==========================================
print("Loading SQuAD v2 dataset...")
# 注意：你现在的任务是 SQuAD v2，比 v1 多了无解的情况
dataset = load_dataset("squad_v2", split="validation")

# dataset = dataset.select(range(100)) # 调试用，跑全量时注释掉

print(f"Start evaluating on {len(dataset)} samples...")

# ==========================================
# 3. 核心评估循环 (复刻 notebook cell 9)
# ==========================================
correct = 0
total = 0

# 进度条
progress_bar = tqdm.tqdm(range(len(dataset)))

for i in progress_bar:
    # 1. 获取数据
    item = dataset[i]
    context = item['context']
    question = item['question']
    gold_answers = item['answers']['text'] # 标准答案列表

    # 2. 构建 Prompt (因为是生成模型，不能用 QA pipeline，要用 Prompt 模拟)
    # 格式严格对应你微调时的格式
    prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"

    messages = [
        # 如果微调时有 system prompt 请加上，没有就删掉这行
        {"role": "system", "content": "Answer the question exactly based on the context. If unanswerable, say 'unanswerable'."},
        {"role": "user", "content": prompt}
    ]

    # 3. 模型推理 (代替 pipeline)
    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(input_text, return_tensors="pt").to(device)

    try:
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=50, # 答案通常很短
                do_sample=False,   # 贪婪搜索，保证结果确定性
                pad_token_id=tokenizer.eos_token_id
            )

        # 提取生成的答案
        generated_answer = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True).strip()

    except Exception as e:
        print(f"Error at index {i}: {e}")
        generated_answer = ""

    # 4. 判分逻辑 (完全复刻 notebook 的 exact match 逻辑)
    # 逻辑：只要生成的答案 在 标准答案列表里，就算对

    is_correct = False

    # 清洗一下字符串（忽略大小写和首尾空格）
    pred_clean = generated_answer.lower().strip()

    if len(gold_answers) > 0:
        # 情况 A: 有标准答案 (和 SQuAD v1 一样)
        # notebook 里的逻辑: if answer == result['answer']
        for answer in gold_answers:
            if answer.lower().strip() == pred_clean: # 只要命中一个就算对
                is_correct = True
                break
    else:
        # 情况 B: 无解 (SQuAD v2 特有)
        # 假设微调时教过模型输出 'unanswerable'
        if "unanswerable" in pred_clean:
            is_correct = True

    if is_correct:
        correct += 1

    total += 1

    # 实时更新显示 acc
    progress_bar.set_postfix({'acc': f"{correct/total:.2%}"})

# ==========================================
# 4. 打印最终结果
# ==========================================
print("\n" + "="*30)
print("correct:{}, total:{}, accuracy:{}".format(correct, total, correct/total))
print("="*30)

Loading model from /home/vipuser/LoRO/LoRO_attack/recovered_qwen_1_5B_model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading SQuAD v2 dataset...


Using the latest cached version of the dataset since squad_v2 couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'squad_v2' at /home/vipuser/.cache/huggingface/datasets/squad_v2/squad_v2/0.0.0/3ffb306f725f7d2ce8394bc1873b24868140c412 (last modified on Thu Dec 25 18:00:45 2025).


Start evaluating on 11873 samples...


100%|██████████| 11873/11873 [45:38<00:00,  4.34it/s, acc=36.94%] 


correct:4386, total:11873, accuracy:0.3694095847721722





## final

## base

In [None]:
# base
import torch
import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset

# ==========================================
# 1. 类似 notebook 中的加载部分
# ==========================================
# 你的模型路径
model_path = "Qwen/Qwen2.5-1.5B-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Loading model from {model_path}...")

try:
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map=device
    ).eval()
except Exception as e:
    print(f"Error: {e}")
    # 如果加载本地失败，演示用可以注释掉上面，打开下面这行
    # raise e

# ==========================================
# 2. 加载数据 (对应 notebook cell 8)
# ==========================================
print("Loading SQuAD v2 dataset...")
# 注意：你现在的任务是 SQuAD v2，比 v1 多了无解的情况
dataset = load_dataset("squad_v2", split="validation")

# dataset = dataset.select(range(100)) # 调试用，跑全量时注释掉

print(f"Start evaluating on {len(dataset)} samples...")

# ==========================================
# 3. 核心评估循环 (复刻 notebook cell 9)
# ==========================================
correct = 0
total = 0

# 进度条
progress_bar = tqdm.tqdm(range(len(dataset)))

for i in progress_bar:
    # 1. 获取数据
    item = dataset[i]
    context = item['context']
    question = item['question']
    gold_answers = item['answers']['text'] # 标准答案列表

    # 2. 构建 Prompt (因为是生成模型，不能用 QA pipeline，要用 Prompt 模拟)
    # 格式严格对应你微调时的格式
    prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"

    messages = [
        # 如果微调时有 system prompt 请加上，没有就删掉这行
        {"role": "system", "content": "Answer the question exactly based on the context. If unanswerable, say 'unanswerable'."},
        {"role": "user", "content": prompt}
    ]

    # 3. 模型推理 (代替 pipeline)
    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(input_text, return_tensors="pt").to(device)

    try:
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=50, # 答案通常很短
                do_sample=False,   # 贪婪搜索，保证结果确定性
                pad_token_id=tokenizer.eos_token_id
            )

        # 提取生成的答案
        generated_answer = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True).strip()

    except Exception as e:
        print(f"Error at index {i}: {e}")
        generated_answer = ""

    # 4. 判分逻辑 (完全复刻 notebook 的 exact match 逻辑)
    # 逻辑：只要生成的答案 在 标准答案列表里，就算对

    is_correct = False

    # 清洗一下字符串（忽略大小写和首尾空格）
    pred_clean = generated_answer.lower().strip()

    if len(gold_answers) > 0:
        # 情况 A: 有标准答案 (和 SQuAD v1 一样)
        # notebook 里的逻辑: if answer == result['answer']
        for answer in gold_answers:
            if answer.lower().strip() == pred_clean: # 只要命中一个就算对
                is_correct = True
                break
    else:
        # 情况 B: 无解 (SQuAD v2 特有)
        # 假设微调时教过模型输出 'unanswerable'
        if "unanswerable" in pred_clean:
            is_correct = True

    if is_correct:
        correct += 1

    total += 1

    # 实时更新显示 acc
    progress_bar.set_postfix({'acc': f"{correct/total:.2%}"})

# ==========================================
# 4. 打印最终结果
# ==========================================
print("\n" + "="*30)
print("correct:{}, total:{}, accuracy:{}".format(correct, total, correct/total))
print("="*30)

Loading model from Qwen/Qwen2.5-1.5B-Instruct...
Loading SQuAD v2 dataset...


Using the latest cached version of the dataset since squad_v2 couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'squad_v2' at /home/vipuser/.cache/huggingface/datasets/squad_v2/squad_v2/0.0.0/3ffb306f725f7d2ce8394bc1873b24868140c412 (last modified on Thu Dec 25 18:00:45 2025).


Start evaluating on 11873 samples...


100%|██████████| 11873/11873 [3:19:14<00:00,  1.01s/it, acc=17.96%] 


correct:2132, total:11873, accuracy:0.17956708498273394





## ft

In [5]:
# ft
import torch
import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset

# ==========================================
# 1. 类似 notebook 中的加载部分
# ==========================================
# 你的模型路径
model_path = "zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Loading model from {model_path}...")

try:
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map=device
    ).eval()
except Exception as e:
    print(f"Error: {e}")
    # 如果加载本地失败，演示用可以注释掉上面，打开下面这行
    # raise e

# ==========================================
# 2. 加载数据 (对应 notebook cell 8)
# ==========================================
print("Loading SQuAD v2 dataset...")
# 注意：你现在的任务是 SQuAD v2，比 v1 多了无解的情况
dataset = load_dataset("squad_v2", split="validation")

# dataset = dataset.select(range(100)) # 调试用，跑全量时注释掉

print(f"Start evaluating on {len(dataset)} samples...")

# ==========================================
# 3. 核心评估循环 (复刻 notebook cell 9)
# ==========================================
correct = 0
total = 0

# 进度条
progress_bar = tqdm.tqdm(range(len(dataset)))

for i in progress_bar:
    # 1. 获取数据
    item = dataset[i]
    context = item['context']
    question = item['question']
    gold_answers = item['answers']['text'] # 标准答案列表

    # 2. 构建 Prompt (因为是生成模型，不能用 QA pipeline，要用 Prompt 模拟)
    # 格式严格对应你微调时的格式
    prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"

    messages = [
        # 如果微调时有 system prompt 请加上，没有就删掉这行
        {"role": "system", "content": "Answer the question exactly based on the context. If unanswerable, say 'unanswerable'."},
        {"role": "user", "content": prompt}
    ]

    # 3. 模型推理 (代替 pipeline)
    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(input_text, return_tensors="pt").to(device)

    try:
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=50, # 答案通常很短
                do_sample=False,   # 贪婪搜索，保证结果确定性
                pad_token_id=tokenizer.eos_token_id
            )

        # 提取生成的答案
        generated_answer = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True).strip()

    except Exception as e:
        print(f"Error at index {i}: {e}")
        generated_answer = ""

    # 4. 判分逻辑 (完全复刻 notebook 的 exact match 逻辑)
    # 逻辑：只要生成的答案 在 标准答案列表里，就算对

    is_correct = False

    # 清洗一下字符串（忽略大小写和首尾空格）
    pred_clean = generated_answer.lower().strip()

    if len(gold_answers) > 0:
        # 情况 A: 有标准答案 (和 SQuAD v1 一样)
        # notebook 里的逻辑: if answer == result['answer']
        for answer in gold_answers:
            if answer.lower().strip() == pred_clean: # 只要命中一个就算对
                is_correct = True
                break
    else:
        # 情况 B: 无解 (SQuAD v2 特有)
        # 假设微调时教过模型输出 'unanswerable'
        if "unanswerable" in pred_clean:
            is_correct = True

    if is_correct:
        correct += 1

    total += 1

    # 实时更新显示 acc
    progress_bar.set_postfix({'acc': f"{correct/total:.2%}"})

# ==========================================
# 4. 打印最终结果
# ==========================================
print("\n" + "="*30)
print("correct:{}, total:{}, accuracy:{}".format(correct, total, correct/total))
print("="*30)

Loading model from zfdev/squad_v2-16bit-Qwen2.5-1.5B-Instruct...
Loading SQuAD v2 dataset...


Using the latest cached version of the dataset since squad_v2 couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'squad_v2' at /home/vipuser/.cache/huggingface/datasets/squad_v2/squad_v2/0.0.0/3ffb306f725f7d2ce8394bc1873b24868140c412 (last modified on Thu Dec 25 18:00:45 2025).


Start evaluating on 11873 samples...


100%|██████████| 11873/11873 [37:45<00:00,  5.24it/s, acc=37.08%] 


correct:4403, total:11873, accuracy:0.37084140486818834



