In [None]:
import torch
import numpy as np
import pandas as pd
from pathlib import Path
import torch.nn.functional as F


In [3]:
EMB_DIR = Path(".")

clean = torch.load("baseline_embeddings_mpnet.pt", map_location="cpu")
clean = clean.float()

print("Clean embeddings shape:", clean.shape)


Clean embeddings shape: torch.Size([169343, 768])


In [13]:
attack_embeddings = {
    "paraphrase_1step": "embeddings_mpnet_paraphrase_1step.pt",
    "paraphrase_2step": "embeddings_mpnet_paraphrase_2step.pt",
    "bt_zh_1step": "embeddings_mpnet_bt_zh_1step.pt",
    "bt_hi_1step": "embeddings_mpnet_bt_hi_1step.pt",
    "bt_de_1step": "embeddings_mpnet_bt_de_1step.pt",
}


In [8]:
import json

with open("paraphrase_attack_results_mpnet.json") as f:
    attack_meta = json.load(f)

attack_idx = torch.tensor(attack_meta["test_subset"])
print(len(attack_idx))

1000


In [5]:
def mse(a, b):
    return torch.mean((a - b) ** 2).item()

def l2(a, b):
    return torch.mean(torch.norm(a - b, dim=1)).item()

def cosine_stats(a, b):
    cos = F.cosine_similarity(a, b, dim=1)
    return {
        "cosine_mean": cos.mean().item(),
        "cosine_median": cos.median().item(),
        "cosine_max": cos.max().item()
    }


In [19]:
clean_full = torch.load("baseline_embeddings_mpnet.pt").float()

clean = clean_full[attack_idx]
print(clean.shape)  # (1000, 768)


torch.Size([1000, 768])


In [14]:
results = []

for attack, fname in attack_embeddings.items():
    attacked = torch.load(fname, map_location="cpu").float()

    assert attacked.shape[1] == clean.shape[1]
    assert attacked.shape[0] == clean.shape[0]

    res = {
        "attack": attack,
        "mse": mse(clean, attacked),
        "l2_distance": l2(clean, attacked),
    }
    res.update(cosine_stats(clean, attacked))

    results.append(res)

df = pd.DataFrame(results)
df


Unnamed: 0,attack,mse,l2_distance,cosine_mean,cosine_median,cosine_max
0,paraphrase_1step,0.000994,0.801528,0.618184,0.763509,0.992049
1,paraphrase_2step,0.001423,0.992204,0.453538,0.52902,0.970316
2,bt_zh_1step,0.000715,0.71243,0.725599,0.75519,0.981183
3,bt_hi_1step,0.001672,1.120851,0.358025,0.361594,0.841727
4,bt_de_1step,0.000203,0.352435,0.922151,0.946422,0.998618


In [18]:
df_sorted = df.sort_values("cosine_mean")
df_sorted

Unnamed: 0,attack,mse,l2_distance,cosine_mean,cosine_median,cosine_max
3,bt_hi_1step,0.001672,1.120851,0.358025,0.361594,0.841727
1,paraphrase_2step,0.001423,0.992204,0.453538,0.52902,0.970316
0,paraphrase_1step,0.000994,0.801528,0.618184,0.763509,0.992049
2,bt_zh_1step,0.000715,0.71243,0.725599,0.75519,0.981183
4,bt_de_1step,0.000203,0.352435,0.922151,0.946422,0.998618


In [16]:
df_sorted.to_csv("paraphrase_embedding_shift_mpnet.csv", index=False)
