# Clone repo

In [1]:
import getpass
import subprocess
from urllib.parse import quote

username = input("GitHub username: ")
token = getpass.getpass("GitHub token: ")
repo_url = "https://github.com/ISE-Lab-AI4LIFE/SANNER_2025.git"

auth_url = repo_url.replace("https://", f"https://{quote(username)}:{quote(token)}@")

try:
    subprocess.run(["git", "clone", auth_url], check=True)
    print("✅ Repo cloned successfully!")
except subprocess.CalledProcessError as e:
    print("❌ Clone failed. Check error message below:")
    print(e.stderr)

GitHub username: hieunguyen-cyber
GitHub token: ··········
✅ Repo cloned successfully!


# Merge Hotflip result and dvide into pools

In [None]:
import pandas as pd
import numpy as np
import torch
from sentence_transformers import SentenceTransformer, util
from tqdm import tqdm

# --------- Load files ---------
merged_hotflip_results = pd.read_csv('/content/SANNER_2025/data/hotflip_result/merged_hotflip_results.csv')
pool = pd.read_csv('/content/SANNER_2025/data/pool.csv')
test = pd.read_csv('/content/SANNER_2025/data/test.csv')

# --------- Basic checks ---------
print("Basic shapes:")
print("merged_hotflip_results:", merged_hotflip_results.shape)
print("pool:", pool.shape)
print("test:", test.shape)

# Convert ids to string to avoid mismatch
merged_hotflip_results['document_id'] = merged_hotflip_results['document_id'].astype(str)
pool['document_id'] = pool['document_id'].astype(str)

# Keep only intersecting ids
merged = pool.merge(merged_hotflip_results, on='document_id', how='inner')
print("Number of matched pairs (target <-> poisoned):", len(merged))
if len(merged) == 0:
    raise ValueError("No matched document_id pairs between pool and merged_hotflip_results. Check formats.")

# --------- Prepare pools ---------
clean_ids = set(pool['document_id']) - set(merged['document_id'])
clean_docs = pool[pool['document_id'].isin(clean_ids)].copy()
target_docs = merged[['document_id', 'document']].copy()
poison_docs = merged[['document_id', 'final_poisoned_doc']].rename(columns={'final_poisoned_doc': 'document'}).copy()

print(f"Counts -> clean: {len(clean_docs)}, target: {len(target_docs)}, poison: {len(poison_docs)}")
queries = test['queries'].dropna().tolist()
print("n_queries:", len(queries))

# --------- Model & embedding function ---------
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2", device=device)

def embed_long_texts(texts, chunk_size=512):
    """Return tensor embeddings (normalized) for list of texts, using chunk-average for long texts."""
    out_embs = []
    for t in texts:
        words = t.split()
        if len(words) <= chunk_size:
            emb = model.encode([t], convert_to_tensor=True, normalize_embeddings=True)
            out_embs.append(emb[0])
        else:
            chunks = [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
            embs = model.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
            out_embs.append(embs.mean(dim=0))
    return torch.stack(out_embs)

# --------- Encode ---------
print("Encoding queries and doc pools (this may take a while)...")
query_embs = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True, show_progress_bar=True)
clean_embs = embed_long_texts(clean_docs['document'].astype(str).tolist())
target_embs = embed_long_texts(target_docs['document'].astype(str).tolist())
poison_embs = embed_long_texts(poison_docs['document'].astype(str).tolist())

# --------- Build pools ---------
clean_ids_list = list(clean_docs['document_id'].astype(str).tolist())
target_ids_list = list(target_docs['document_id'].astype(str).tolist())
poison_ids_list = list(poison_docs['document_id'].astype(str).tolist())

# Pools
pool_target_embs = torch.cat([clean_embs, target_embs], dim=0)
pool_target_ids = clean_ids_list + target_ids_list
pool_target_types = ['clean'] * len(clean_ids_list) + ['target'] * len(target_ids_list)

pool_poison_embs = torch.cat([clean_embs, poison_embs], dim=0)
pool_poison_ids = clean_ids_list + poison_ids_list
pool_poison_types = ['clean'] * len(clean_ids_list) + ['poisoned'] * len(poison_ids_list)

print("Pool sizes -> target-pool:", len(pool_target_ids), " poison-pool:", len(pool_poison_ids))

# --------- Function compute hit counts per doc ---------
def hits_and_rd(query_embs, pool_embs, pool_ids, pool_types, compare_ids, compare_embs_other=None):
    sim = util.cos_sim(query_embs, pool_embs)  # [n_q, n_docs]
    n_q, n_d = sim.shape
    hits = torch.zeros(n_d)
    K = 5
    for i in range(n_q):
        topk = torch.topk(sim[i], k=K).indices.tolist()
        for idx in topk:
            hits[idx] += 1
    hits_ratio = hits / n_q
    df_pool = pd.DataFrame({
        'document_id': pool_ids,
        'type': pool_types,
        f'hit@{K}': hits_ratio.cpu().numpy(),
        'hit_count': hits.cpu().numpy().astype(int)
    })
    rd_dict = {}
    if compare_embs_other is not None:
        sim_other = util.cos_sim(query_embs, compare_embs_other)
        id_to_pool_idx = {pid: idx for idx, pid in enumerate(pool_ids)}
        for j, cid in enumerate(compare_ids):
            if cid in id_to_pool_idx:
                pidx = id_to_pool_idx[cid]
                sim_pool_col = sim[:, pidx]
                sim_other_col = sim_other[:, j]
                rd = torch.mean(torch.abs(sim_pool_col - sim_other_col)).item()
                rd_dict[cid] = rd
            else:
                rd_dict[cid] = np.nan
    return df_pool, rd_dict

# --------- Compute stats for both pools ---------
K = 5
df_target_pool, _ = hits_and_rd(query_embs, pool_target_embs, pool_target_ids, pool_target_types, target_ids_list, compare_embs_other=None)
df_poison_pool, _ = hits_and_rd(query_embs, pool_poison_embs, pool_poison_ids, pool_poison_types, poison_ids_list, compare_embs_other=None)

# Extract only target rows and poison rows
df_target_only = df_target_pool[df_target_pool['type'] == 'target'].copy().reset_index(drop=True)
df_poison_only = df_poison_pool[df_poison_pool['type'] == 'poisoned'].copy().reset_index(drop=True)

mean_hit_target = df_target_only[f'hit@{K}'].mean()
mean_hit_poison = df_poison_only[f'hit@{K}'].mean()

print(f"\nMean Hit@{K} (target in pool clean+target): {mean_hit_target:.6f}")
print(f"Mean Hit@{K} (poison in pool clean+poison): {mean_hit_poison:.6f}")
print("Ratio (poison/target):", mean_hit_poison / (mean_hit_target + 1e-12))

# --------- Show top examples where poison increased most (we need RD per id and per-id differences):
merged_stats = pd.DataFrame({
    'document_id': target_ids_list,
    'hit_target': df_target_only[f'hit@{K}'].values,
    'hit_poison': df_poison_only[f'hit@{K}'].values
})
merged_stats['delta'] = merged_stats['hit_poison'] - merged_stats['hit_target']
merged_stats_sorted = merged_stats.sort_values('delta', ascending=False).reset_index(drop=True)

print("\nTop 10 docs with largest Hit@K increase after poisoning:")
print(merged_stats_sorted.head(10))

# Save detailed per-doc
merged_stats.to_csv('/content/compare_hit_target_vs_poison_per_doc.csv', index=False)
print("\nSaved per-doc compare file to /content/compare_hit_target_vs_poison_per_doc.csv")


Basic shapes:
merged_hotflip_results: (157, 3)
pool: (5446, 3)
test: (15232, 2)
Number of matched pairs (target <-> poisoned): 157
Counts -> clean: 5289, target: 157, poison: 157
n_queries: 15232
Encoding queries and doc pools (this may take a while)...


Batches:   0%|          | 0/476 [00:00<?, ?it/s]

Pool sizes -> target-pool: 5446  poison-pool: 5446

Mean Hit@5 (target in pool clean+target): 0.001105
Mean Hit@5 (poison in pool clean+poison): 0.001671
Ratio (poison/target): 1.5122966

Top 10 docs with largest Hit@K increase after poisoning:
                                         document_id  hit_target  hit_poison  \
0                 EvolInstructCode80k_document_62427    0.002232    0.007025   
1       RustInstr_train-00000-of-00001_document_3010    0.001313    0.005121   
2                              BuzzJS_document_32104    0.001510    0.005055   
3        RustInstr_train-00000-of-00001_document_944    0.001641    0.004464   
4  secalignDBGHaikuJS_train-00000-of-00001_docume...    0.000985    0.003611   
5                 EvolInstructCode80k_document_74796    0.001576    0.004005   
6                 EvolInstructCode80k_document_13036    0.002166    0.004530   
7                 EvolInstructCode80k_document_46286    0.002035    0.004399   
8                               Buz

In [3]:
import torch
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util

# --------- Load files ---------
merged_hotflip_results = pd.read_csv('/content/SANNER_2025/data/hotflip_result/merged_hotflip_results.csv')
pool = pd.read_csv('/content/SANNER_2025/data/pool.csv')
test = pd.read_csv('/content/SANNER_2025/data/test.csv')

# --------- Basic checks ---------
print("Basic shapes:")
print("merged_hotflip_results:", merged_hotflip_results.shape)
print("pool:", pool.shape)
print("test:", test.shape)

# Convert ids to string to avoid mismatch
merged_hotflip_results['document_id'] = merged_hotflip_results['document_id'].astype(str)
pool['document_id'] = pool['document_id'].astype(str)

# Keep only intersecting ids
merged = pool.merge(merged_hotflip_results, on='document_id', how='inner')
print("Number of matched pairs (target <-> poisoned):", len(merged))
if len(merged) == 0:
    raise ValueError("No matched document_id pairs between pool and merged_hotflip_results. Check formats.")

# --------- Prepare pools ---------
clean_ids = set(pool['document_id']) - set(merged['document_id'])
clean_docs = pool[pool['document_id'].isin(clean_ids)].copy()
target_docs = merged[['document_id', 'document']].copy()
poison_docs = merged[['document_id', 'final_poisoned_doc']].rename(columns={'final_poisoned_doc': 'document'}).copy()

print(f"Counts -> clean: {len(clean_docs)}, target: {len(target_docs)}, poison: {len(poison_docs)}")
queries = test['queries'].dropna().tolist()
print("n_queries:", len(queries))

# --------- Model & embedding function ---------
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2", device=device)

def embed_long_texts(texts, chunk_size=512):
    """Return tensor embeddings (normalized) for list of texts, using chunk-average for long texts."""
    out_embs = []
    for t in texts:
        words = t.split()
        if len(words) <= chunk_size:
            emb = model.encode([t], convert_to_tensor=True, normalize_embeddings=True)
            out_embs.append(emb[0])
        else:
            chunks = [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
            embs = model.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
            out_embs.append(embs.mean(dim=0))
    return torch.stack(out_embs)

# --------- Encode ---------
print("Encoding queries and doc pools (this may take a while)...")
query_embs = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True, show_progress_bar=True)
clean_embs = embed_long_texts(clean_docs['document'].astype(str).tolist())
target_embs = embed_long_texts(target_docs['document'].astype(str).tolist())
poison_embs = embed_long_texts(poison_docs['document'].astype(str).tolist())

# --------- Build pools ---------
clean_ids_list = list(clean_docs['document_id'].astype(str).tolist())
target_ids_list = list(target_docs['document_id'].astype(str).tolist())
poison_ids_list = list(poison_docs['document_id'].astype(str).tolist())

# Pools
pool_target_embs = torch.cat([clean_embs, target_embs], dim=0)
pool_target_ids = clean_ids_list + target_ids_list
pool_target_types = ['clean'] * len(clean_ids_list) + ['target'] * len(target_ids_list)

pool_poison_embs = torch.cat([clean_embs, poison_embs], dim=0)
pool_poison_ids = clean_ids_list + poison_ids_list
pool_poison_types = ['clean'] * len(clean_ids_list) + ['poisoned'] * len(poison_ids_list)

print("Pool sizes -> target-pool:", len(pool_target_ids), " poison-pool:", len(pool_poison_ids))

# ----------------- Custom Precision@K -----------------
def custom_precision_at_k_poison(query_embeds, pool_embs, pool_ids, poison_ids, K=10):
    """
    Tính Custom Precision@K cho poisoned docs theo kiểu bài báo.
    - total_hits / K -> có thể >1
    """
    # similarity matrix
    sim = torch.matmul(query_embeds, pool_embs.T)

    # top-K indices cho mỗi query
    topk_idx = torch.topk(sim, k=K, dim=1).indices

    total_hits = 0
    n_queries = sim.shape[0]

    for i in range(n_queries):
        topk_docs = [pool_ids[idx] for idx in topk_idx[i].tolist()]
        total_hits += sum(d in poison_ids for d in topk_docs)

    # custom Precision@K kiểu bài báo (có thể >1)
    custom_precision = total_hits / K

    # normalized Precision@K (trung bình per query, ≤1)
    normalized_precision = total_hits / (K * n_queries)

    return custom_precision, normalized_precision

# ----------------- Custom Precision@K for Target -----------------
def custom_precision_at_k_target(query_embeds, pool_embs, pool_ids, target_ids, K=10):
    """
    Tính Custom Precision@K cho target docs theo kiểu bài báo.
    - total_hits / K -> có thể >1
    """
    # similarity matrix
    sim = torch.matmul(query_embeds, pool_embs.T)

    # top-K indices cho mỗi query
    topk_idx = torch.topk(sim, k=K, dim=1).indices

    total_hits = 0
    n_queries = sim.shape[0]

    for i in range(n_queries):
        topk_docs = [pool_ids[idx] for idx in topk_idx[i].tolist()]
        total_hits += sum(d in target_ids for d in topk_docs)

    # custom Precision@K kiểu bài báo (có thể >1)
    custom_precision = total_hits / K

    # normalized Precision@K (trung bình per query, ≤1)
    normalized_precision = total_hits / (K * n_queries)

    return custom_precision, normalized_precision

# ----------------- Example usage -----------------
K_eval = 10  # Top-10 retrievals

# Calculate Precision@K for Poisoned docs
custom_p, normalized_p = custom_precision_at_k_poison(
    query_embs, pool_poison_embs, pool_poison_ids, poison_ids_list, K=K_eval
)

print(f"Custom Precision@{K_eval} (poisoned docs, bài báo style): {custom_p/100:.3f}")

# Calculate Precision@K for Target docs
custom_p_target, normalized_p_target = custom_precision_at_k_target(
    query_embs, pool_target_embs, pool_target_ids, target_ids_list, K=K_eval
)

print(f"Custom Precision@{K_eval} (target docs, bài báo style): {custom_p_target/100:.3f}")

Basic shapes:
merged_hotflip_results: (157, 3)
pool: (5446, 3)
test: (15232, 2)
Number of matched pairs (target <-> poisoned): 157
Counts -> clean: 5289, target: 157, poison: 157
n_queries: 15232
Encoding queries and doc pools (this may take a while)...


Batches:   0%|          | 0/476 [00:00<?, ?it/s]

Pool sizes -> target-pool: 5446  poison-pool: 5446
Custom Precision@10 (poisoned docs, bài báo style): 7.665
Custom Precision@10 (target docs, bài báo style): 5.014
