In [1]:
!pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1
!pip install -q pandas numpy transformers sentence-transformers ogb torch-geometric tqdm matplotlib scikit-learn

Collecting torch==2.5.1
  Downloading torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision==0.20.1
  Downloading torchvision-0.20.1-cp312-cp312-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio==2.5.1
  Downloading torchaudio-2.5.1-cp312-cp312-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.5.1)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.5.1)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.5.1)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.5.1)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12

In [1]:

import os, json, time
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from ogb.nodeproppred import PygNodePropPredDataset

from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    MarianMTModel,
    MarianTokenizer,
)
from sentence_transformers import SentenceTransformer


In [2]:
os.makedirs("data/ogbn-arxiv/raw", exist_ok=True)

if not os.path.exists("data/ogbn-arxiv/raw/titleabs.tsv"):
    !wget -q -O data/ogbn-arxiv/raw/titleabs.tsv https://snap.stanford.edu/ogb/data/misc/ogbn_arxiv/titleabs.tsv


In [3]:
dataset = PygNodePropPredDataset(name="ogbn-arxiv", root="data/ogbn-arxiv")
data = dataset[0]

num_nodes = data.num_nodes
num_classes = int(data.y.max().item() + 1)

print(data)

Downloading http://snap.stanford.edu/ogb/data/nodeproppred/arxiv.zip


Downloaded 0.08 GB: 100%|██████████| 81/81 [00:09<00:00,  8.39it/s]
Processing...


Extracting data/ogbn-arxiv/arxiv.zip
Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1/1 [00:00<00:00, 13025.79it/s]


Converting graphs into PyG objects...


100%|██████████| 1/1 [00:00<00:00, 2340.57it/s]

Saving...
Data(num_nodes=169343, edge_index=[2, 1166243], x=[169343, 128], node_year=[169343, 1], y=[169343, 1])



Done!
  self.data, self.slices = torch.load(self.processed_paths[0])


In [4]:
split_idx = dataset.get_idx_split()
train_idx = split_idx["train"]
val_idx   = split_idx["valid"]
test_idx  = split_idx["test"]


In [5]:
mapping_file = "data/ogbn-arxiv/ogbn_arxiv/mapping/nodeidx2paperid.csv.gz"
ogb_map = pd.read_csv(mapping_file)
ogb_map.columns = ["ogb_id", "mag_id"]

raw_texts = pd.read_csv(
    "data/ogbn-arxiv/raw/titleabs.tsv",
    sep="\t",
    header=None,
    names=["mag_id", "title", "abstract"],
    on_bad_lines="skip"
)

merged = pd.merge(ogb_map, raw_texts, on="mag_id", how="left")
merged = merged.sort_values("ogb_id")

merged["full_text"] = merged["title"].fillna("") + " " + merged["abstract"].fillna("")
texts = merged["full_text"].tolist()

assert len(texts) == num_nodes
print("Loaded texts:", len(texts))


Loaded texts: 169343


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data.edge_index = data.edge_index.to(device)
data.y = data.y.squeeze().to(device)


In [None]:
encoder = SentenceTransformer("all-mpnet-base-v2", device=device)
encoder.eval()

emb_path = "baseline_embeddings_mpnet.pt"

print("Computing MPNet embeddings")

batch_size = 64
all_embeddings = []

for i in tqdm(range(0, len(texts), batch_size)):
    batch_texts = texts[i:i + batch_size]
    with torch.no_grad():
        emb = encoder.encode(
            batch_texts,
            convert_to_tensor=True,
            show_progress_bar=False
        )
    all_embeddings.append(emb.cpu())

node_embeddings = torch.cat(all_embeddings, dim=0)

# Save clean tensor
torch.save(node_embeddings, emb_path)

print("Saved embeddings to:", emb_path)
print("Embedding shape:", node_embeddings.shape)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing MPNet embeddings


  3%|▎         | 67/2646 [00:19<12:13,  3.51it/s]

In [9]:
import torch

obj = torch.load("mpnet_model.pt", map_location="cpu")

print(type(obj))

if isinstance(obj, torch.Tensor):
    print("Tensor shape:", obj.shape)

elif isinstance(obj, dict):
    print("Dict keys:", obj.keys())
    for k, v in obj.items():
        if torch.is_tensor(v):
            print(f"{k}: tensor {tuple(v.shape)}")
        else:
            print(f"{k}: {type(v)}")


<class 'collections.OrderedDict'>
Dict keys: odict_keys(['sage_model.convs.0.lin_l.weight', 'sage_model.convs.0.lin_l.bias', 'sage_model.convs.0.lin_r.weight', 'sage_model.convs.1.lin_l.weight', 'sage_model.convs.1.lin_l.bias', 'sage_model.convs.1.lin_r.weight'])
sage_model.convs.0.lin_l.weight: tensor (128, 768)
sage_model.convs.0.lin_l.bias: tensor (128,)
sage_model.convs.0.lin_r.weight: tensor (128, 768)
sage_model.convs.1.lin_l.weight: tensor (40, 128)
sage_model.convs.1.lin_l.bias: tensor (40,)
sage_model.convs.1.lin_r.weight: tensor (40, 128)


  obj = torch.load("mpnet_model.pt", map_location="cpu")


In [10]:

SUBSET_SIZE = 1000
test_subset = test_idx[:SUBSET_SIZE].cpu()
print("Using test subset size:", len(test_subset))


Using test subset size: 1000


In [11]:

RESULTS_PATH = "paraphrase_attack_results_mpnet.json"

if os.path.exists(RESULTS_PATH):
    with open(RESULTS_PATH, "r") as f:
        results = json.load(f)
else:
    results = {}

results["test_subset"] = test_subset.tolist()
with open(RESULTS_PATH, "w") as f:
    json.dump(results, f, indent=2)


In [13]:
import torch
from torch_geometric.nn import GraphSAGE

class SAGEModel(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, num_classes):
        super().__init__()
        self.sage_model = GraphSAGE(
            in_channels=in_channels,
            hidden_channels=hidden_channels,
            num_layers=2,
            out_channels=num_classes,
            dropout=0.5,
            act='relu'
        )

    def forward(self, x, edge_index):
        return self.sage_model(x, edge_index)

model = SAGEModel(
    in_channels=node_embeddings.size(1),
    hidden_channels=128,
    num_classes=int(data.y.max().item() + 1)
).to(device)

state_dict = torch.load("mpnet_model.pt", map_location=device)
model.load_state_dict(state_dict)
model.eval()

print("GraphSAGE model loaded successfully")


GraphSAGE model loaded successfully


  state_dict = torch.load("mpnet_model.pt", map_location=device)


In [14]:

@torch.no_grad()
def get_preds():
    model.eval()
    logits = model(data.x, data.edge_index)
    return logits.argmax(dim=1).cpu()

data.x = node_embeddings.clone().to(device)
baseline_preds = get_preds()

baseline_test_acc = (
    baseline_preds[test_subset] == data.y.cpu()[test_subset]
).float().mean().item()

results["baseline"] = {"test_acc": baseline_test_acc}
with open(RESULTS_PATH, "w") as f:
    json.dump(results, f, indent=2)

print("Baseline test accuracy (MPNet):", baseline_test_acc)


Baseline test accuracy (MPNet): 0.7239999771118164


In [15]:

def save_results(name, test_acc, flip_rate, cos_dist):
    results[name] = {
        "test_acc": float(test_acc),
        "accuracy_drop": float(baseline_test_acc - test_acc),
        "flip_rate": float(flip_rate),
        "cosine_mean": float(cos_dist.mean().item()),
        "cosine_median": float(cos_dist.median().item()),
        "cosine_max": float(cos_dist.max().item()),
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
    }
    with open(RESULTS_PATH, "w") as f:
        json.dump(results, f, indent=2)


In [16]:

PARA_MODEL = "t5-base"

t5_tokenizer = AutoTokenizer.from_pretrained(PARA_MODEL)
t5_model = AutoModelForSeq2SeqLM.from_pretrained(
    PARA_MODEL,
    use_safetensors=True
).to(device)
t5_model.eval()


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [17]:

@torch.no_grad()
def paraphrase_once(text):
    inputs = t5_tokenizer(
        "paraphrase: " + text,
        truncation=True,
        max_length=512,
        return_tensors="pt"
    ).to(device)

    outputs = t5_model.generate(
        **inputs,
        max_length=256,
        do_sample=True,
        temperature=1.3,
        top_p=0.95,
        num_beams=4
    )
    return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

def paraphrase_two(text):
    return paraphrase_once(paraphrase_once(text))


In [18]:

BT_LANGS = {
    "zh": ("Helsinki-NLP/opus-mt-en-zh", "Helsinki-NLP/opus-mt-zh-en"),
    "hi": ("Helsinki-NLP/opus-mt-en-hi", "Helsinki-NLP/opus-mt-hi-en"),
    "de": ("Helsinki-NLP/opus-mt-en-de", "Helsinki-NLP/opus-mt-de-en"),
}

TRANSLATORS = {}

def load_translator(name):
    tok = MarianTokenizer.from_pretrained(name)
    mdl = MarianMTModel.from_pretrained(
        name,
        weights_only=False
    ).to(device)
    mdl.eval()
    return tok, mdl

for lang, (en_x, x_en) in BT_LANGS.items():
    TRANSLATORS[lang] = {
        "en_x": load_translator(en_x),
        "x_en": load_translator(x_en),
    }


tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/806k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/805k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]



pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/805k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/807k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/312M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/306M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/813k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/304M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/304M [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/768k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/797k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/298M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/298M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/797k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/768k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/298M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

In [19]:

@torch.no_grad()
def translate(text, tok, mdl):
    inputs = tok(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    out = mdl.generate(**inputs, num_beams=4)
    return tok.decode(out[0], skip_special_tokens=True)

def backtranslate_once(text, lang):
    tok1, mdl1 = TRANSLATORS[lang]["en_x"]
    tok2, mdl2 = TRANSLATORS[lang]["x_en"]
    return translate(translate(text, tok1, mdl1), tok2, mdl2)

def backtranslate_two(text, lang):
    return backtranslate_once(backtranslate_once(text, lang), lang)


In [20]:

def run_attack(name, attack_fn):
    print(f"Running {name}")
    data.x = node_embeddings.clone().to(device)

    attacked = texts.copy()
    for idx in tqdm(test_subset, desc=name):
        attacked[idx] = attack_fn(texts[idx])

    embs = []
    for i in range(0, len(texts), 64):
        with torch.no_grad():
            embs.append(encoder.encode(attacked[i:i+64], convert_to_tensor=True))

    data.x = torch.cat(embs).to(device)

    preds = get_preds()
    test_preds = preds[test_subset]
    base_preds = baseline_preds[test_subset]

    test_acc = (test_preds == data.y.cpu()[test_subset]).float().mean().item()
    flip_rate = (test_preds != base_preds).float().mean().item()

    cos_dist = 1 - F.cosine_similarity(
        node_embeddings[test_subset].cpu(),
        data.x[test_subset].cpu(),
        dim=1
    )

    save_results(name, test_acc, flip_rate, cos_dist)

    torch.save(data.x[test_subset].cpu(), f"embeddings_mpnet_{name}.pt")


In [21]:

#run_attack("paraphrase_1step", paraphrase_once)
#run_attack("paraphrase_2step", paraphrase_two)

for lang in ["zh", "hi", "de"]:
    run_attack(f"bt_{lang}_1step", lambda t, l=lang: backtranslate_once(t, l))

print("All MPNet attacks complete")


Running paraphrase_1step


paraphrase_1step: 100%|██████████| 1000/1000 [26:08<00:00,  1.57s/it]


Running paraphrase_2step


paraphrase_2step: 100%|██████████| 1000/1000 [46:14<00:00,  2.77s/it]


Running bt_zh_1step


bt_zh_1step: 100%|██████████| 1000/1000 [51:13<00:00,  3.07s/it]


Running bt_hi_1step


bt_hi_1step: 100%|██████████| 1000/1000 [35:01<00:00,  2.10s/it]


Running bt_de_1step


bt_de_1step: 100%|██████████| 1000/1000 [56:42<00:00,  3.40s/it]


All MPNet attacks complete


In [None]:

with open(RESULTS_PATH, "r") as f:
    results = json.load(f)

rows = []
for k, v in results.items():
    if k in ["baseline", "test_subset"]:
        continue
    rows.append({
        "Attack": k,
        "Test Accuracy": v["test_acc"],
        "Accuracy Drop": v["accuracy_drop"],
        "Flip Rate": v["flip_rate"],
        "Cosine Mean": v["cosine_mean"],
    })

df = pd.DataFrame(rows).sort_values("Accuracy Drop", ascending=False)
df
