## Device Setup

In [1]:
!nvidia-smi

Sun Dec 21 08:03:40 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.54.03              Driver Version: 535.54.03    CUDA Version: 12.5     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          Off | 00000000:17:00.0 Off |                    0 |
| N/A   51C    P0              46W / 300W |      7MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA A100 80GB PCIe          Off | 00000000:31:00.0 Off |  

In [2]:
import os
import torch
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
print(f"Current PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")

Current PyTorch version: 2.9.1+cu128
CUDA available: True
CUDA version: 12.8


## Passage dataset

***
Percent of Spammers: 10, 20, 40, 60, 80

10 random seeds: range(20, 30)
for data creation for each type of spammer

invoke factor barp

get barp accuracy (+- std dev), wacc and tau

save in results/spammer_type/barp.csv
***

In [4]:
import sys
sys.path.insert(0, "../")
sys.path.insert(1, "../../")

from spammer_types import *
from util import *
import opt_fair
from distribution_utils import crowd_bt_dist, logistic_preference_dist, comparisons_to_df, safe_kendalltau, to_numpy
from metrics import compute_acc, compute_weighted_acc

### Get the original df of passage dataset

In [5]:
df_path = "../../real_data/passage/data/passage_cleaned.csv"

In [6]:
import pandas as pd
df = pd.read_csv(df_path)
def sort_df(df, column_name):
        # Sort by a specific column (replace 'column_name' with your column)
        df_sorted = df.sort_values(by=column_name, ascending=True)  # or ascending=False

        return df_sorted
df = sort_df(df, 'performer')
df[['left', 'right', 'label', 'performer']].head()

Unnamed: 0,left,right,label,performer
0,been wicked. They believed that the end of the...,lichen Sect Content Linking Artid A snake coil...,lichen Sect Content Linking Artid A snake coil...,5
20,"school, you noticed that all the clocks read a...",man in the seat with me and the two women acro...,man in the seat with me and the two women acro...,5
19,foreign animals or plants may be taken into th...,adhere to the wall. Using sizing that has been...,foreign animals or plants may be taken into th...,5
18,many pennies did each child get Which computat...,the picture on its cover. Write two or three s...,many pennies did each child get Which computat...,5
17,direction of the Sun. The length in meters and...,"the ground going into the cave. Oh, my he said...","the ground going into the cave. Oh, my he said...",5


In [7]:
percents = [10, 20, 40, 60, 80]
# percents = [10]

In [8]:
import pickle

with open("../../real_data/passage/data/PassageDF.pickle", "rb") as handle:
    df_passage = pickle.load(handle)
df_passage

Unnamed: 0,label,score
0,"a star. Our planet, Earth, orbits, or circles,...",1
1,"Adam, We did not have plastic toys. I played w...",1
2,Who said the little owl. Who wants to hunt wit...,1
3,dead leaf. This is a mole. Moles burrow underg...,1
4,ereaddatagradepsenvironcomp.html Environment r...,1
...,...,...
467,work over the summer on any changes they wish ...,12
468,between January and December plunged the Unite...,12
469,into a newly opened bank account. I was amazed...,12
470,"occurring phenomenon, manmade by products are ...",12


In [9]:
size = len(df_passage)
print(size)
classes = [0] * size
# for faceage it would be classes = df_passage['gender']

472


In [10]:
gt_df = df_passage

### Addition of Random Guessors

In [11]:
spammer_type = "random"

In [12]:
csv_file = f"results/{spammer_type}/barp.csv"

In [13]:
import os
os.makedirs(f"results/{spammer_type}", exist_ok=True)

In [14]:
import csv
# -------------------------
# Write CSV header
# -------------------------
header = [
    "percent",
    "BARP_acc_mean", "BARP_acc_std",
    "BARP_wacc_mean", "BARP_wacc_std",
    "BARP_tau_mean", "BARP_tau_std"
]

with open(csv_file, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)

In [None]:
for percent in percents:
    # initialize metrics
    BARP_accs, BARP_waccs, BARP_taus = [], [], []
    
    for sd in range(20, 30):
        
        # get df
        random_df, spammer_ids = add_random_spammer(df, percent, seed=sd)
        PC_faceage = df_to_pickle(random_df, df_passage)
        K = len(PC_faceage.keys())
        print(K)
        
        try:
            FaceAge = opt_fair.BARP(data=PC_faceage, penalty=0, classes=classes, device=device)
            annot_bt_temp, annot_bias = opt_fair._alternate_optim_torch(size, K, FaceAge, iters=100)
            annot_bt_np = to_numpy(annot_bt_temp)
            if np.isnan(annot_bt_np).any():
                continue
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
            if BARP_tau < 0:
                annot_bt_np = -annot_bt_np
            BARP_acc = compute_acc(gt_df, annot_bt_np, device)
            BARP_wacc = compute_weighted_acc(gt_df, annot_bt_np, device)
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
        except Exception as e:
            print(f"BARP failed due to {e}")
            continue
        BARP_accs.append(BARP_acc)
        BARP_waccs.append(BARP_wacc)
        BARP_taus.append(BARP_tau)
    
    row = [
        percent,
        np.mean(BARP_accs), np.std(BARP_accs),
        np.mean(BARP_waccs), np.std(BARP_waccs),
        np.mean(BARP_taus), np.std(BARP_taus)
    ]
    with open(csv_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)
    print(
    f"BARP | "
    f"Percent: {percent} |"
    f"Acc: {np.mean(BARP_accs):.4f} ± {np.std(BARP_accs):.4f} | "
    f"WAcc: {np.mean(BARP_waccs):.4f} ± {np.std(BARP_waccs):.4f} | "
    f"Tau: {np.mean(BARP_taus):.4f} ± {np.std(BARP_taus):.4f}")

Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:23<00:00,  1.19it/s]


Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:24<00:00,  1.18it/s]


Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:21<00:00,  1.22it/s]


Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:25<00:00,  1.17it/s]


Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:24<00:00,  1.19it/s]


Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:30<00:00,  1.11it/s]


Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:25<00:00,  1.17it/s]


Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:27<00:00,  1.14it/s]


Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:27<00:00,  1.14it/s]


Unique performers: 686
686


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:28<00:00,  1.12it/s]


BARP | Percent: 10 |Acc: 0.6869 ± 0.0024 | WAcc: 0.7523 ± 0.0031 | Tau: 0.3528 ± 0.0046
Unique performers: 748
748


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:27<00:00,  1.15it/s]


Unique performers: 748
748


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:22<00:00,  1.21it/s]


Unique performers: 748
748


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:16<00:00,  1.30it/s]


Unique performers: 748
748


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:54<00:00,  1.84it/s]


Unique performers: 748
748


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:01<00:00,  1.62it/s]


Unique performers: 748
748


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:25<00:00,  1.17it/s]


Unique performers: 748
748


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [01:23<00:00,  1.20it/s]


Unique performers: 748
748


 16%|██████████████████▉                                                                                                   | 16/100 [00:13<01:10,  1.19it/s]

### Addition of Anti-Personas

In [None]:
spammer_type = "anti"

In [None]:
csv_file = f"results/{spammer_type}/barp.csv"

In [None]:
import os
os.makedirs(f"results/{spammer_type}", exist_ok=True)

In [None]:
import csv
# -------------------------
# Write CSV header
# -------------------------
header = [
    "percent",
    "BARP_acc_mean", "BARP_acc_std",
    "BARP_wacc_mean", "BARP_wacc_std",
    "BARP_tau_mean", "BARP_tau_std"
]

with open(csv_file, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)

In [None]:
for percent in percents:
    # initialize metrics
    BARP_accs, BARP_waccs, BARP_taus = [], [], []
    
    for sd in range(20, 30):
        
        # get df
        random_df, spammer_ids = add_anti_personas(df, percent, seed=sd)
        PC_faceage = df_to_pickle(random_df, df_passage)
        K = len(PC_faceage.keys())
        print(K)
        
        try:
            FaceAge = opt_fair.BARP(data=PC_faceage, penalty=0, classes=classes, device=device)
            annot_bt_temp, annot_bias = opt_fair._alternate_optim_torch(size, K, FaceAge, iters=100)
            annot_bt_np = to_numpy(annot_bt_temp)
            if np.isnan(annot_bt_np).any():
                continue
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
            if BARP_tau < 0:
                annot_bt_np = -annot_bt_np
            BARP_acc = compute_acc(gt_df, annot_bt_np, device)
            BARP_wacc = compute_weighted_acc(gt_df, annot_bt_np, device)
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
        except Exception as e:
            print(f"BARP failed due to {e}")
            continue
        BARP_accs.append(BARP_acc)
        BARP_waccs.append(BARP_wacc)
        BARP_taus.append(BARP_tau)
    
    row = [
        percent,
        np.mean(BARP_accs), np.std(BARP_accs),
        np.mean(BARP_waccs), np.std(BARP_waccs),
        np.mean(BARP_taus), np.std(BARP_taus)
    ]
    with open(csv_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)
    print(
    f"BARP | "
    f"Percent: {percent} |"
    f"Acc: {np.mean(BARP_accs):.4f} ± {np.std(BARP_accs):.4f} | "
    f"WAcc: {np.mean(BARP_waccs):.4f} ± {np.std(BARP_waccs):.4f} | "
    f"Tau: {np.mean(BARP_taus):.4f} ± {np.std(BARP_taus):.4f}")

### Addition of Left Position Biased Spammers

In [None]:
spammer_type = "left"

In [None]:
csv_file = f"results/{spammer_type}/barp.csv"

In [None]:
import os
os.makedirs(f"results/{spammer_type}", exist_ok=True)

In [None]:
import csv
# -------------------------
# Write CSV header
# -------------------------
header = [
    "percent",
    "BARP_acc_mean", "BARP_acc_std",
    "BARP_wacc_mean", "BARP_wacc_std",
    "BARP_tau_mean", "BARP_tau_std"
]

with open(csv_file, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)

In [None]:
for percent in percents:
    # initialize metrics
    BARP_accs, BARP_waccs, BARP_taus = [], [], []
    
    for sd in range(20, 30):
        
        # get df
        random_df, spammer_ids = add_position_biased_spammers(df, percent, position_bias="left", seed=sd)
        PC_faceage = df_to_pickle(random_df, df_passage)
        K = len(PC_faceage.keys())
        print(K)
        
        try:
            FaceAge = opt_fair.BARP(data=PC_faceage, penalty=0, classes=classes, device=device)
            annot_bt_temp, annot_bias = opt_fair._alternate_optim_torch(size, K, FaceAge, iters=100)
            annot_bt_np = to_numpy(annot_bt_temp)
            if np.isnan(annot_bt_np).any():
                continue
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
            if BARP_tau < 0:
                annot_bt_np = -annot_bt_np
            BARP_acc = compute_acc(gt_df, annot_bt_np, device)
            BARP_wacc = compute_weighted_acc(gt_df, annot_bt_np, device)
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
        except Exception as e:
            print(f"BARP failed due to {e}")
            continue
        BARP_accs.append(BARP_acc)
        BARP_waccs.append(BARP_wacc)
        BARP_taus.append(BARP_tau)
    
    row = [
        percent,
        np.mean(BARP_accs), np.std(BARP_accs),
        np.mean(BARP_waccs), np.std(BARP_waccs),
        np.mean(BARP_taus), np.std(BARP_taus)
    ]
    with open(csv_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)
    print(
    f"BARP | "
    f"Percent: {percent} |"
    f"Acc: {np.mean(BARP_accs):.4f} ± {np.std(BARP_accs):.4f} | "
    f"WAcc: {np.mean(BARP_waccs):.4f} ± {np.std(BARP_waccs):.4f} | "
    f"Tau: {np.mean(BARP_taus):.4f} ± {np.std(BARP_taus):.4f}")

### Addition of Right Position Biased Spammers

In [None]:
spammer_type = "right"

In [None]:
csv_file = f"results/{spammer_type}/barp.csv"

In [None]:
import os
os.makedirs(f"results/{spammer_type}", exist_ok=True)

In [None]:
import csv
# -------------------------
# Write CSV header
# -------------------------
header = [
    "percent",
    "BARP_acc_mean", "BARP_acc_std",
    "BARP_wacc_mean", "BARP_wacc_std",
    "BARP_tau_mean", "BARP_tau_std"
]

with open(csv_file, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)

In [None]:
for percent in percents:
    # initialize metrics
    BARP_accs, BARP_waccs, BARP_taus = [], [], []
    
    for sd in range(20, 30):
        
        # get df
        random_df, spammer_ids = add_position_biased_spammers(df, percent, position_bias="right", seed=sd)
        PC_faceage = df_to_pickle(random_df, df_passage)
        K = len(PC_faceage.keys())
        print(K)
        
        try:
            FaceAge = opt_fair.BARP(data=PC_faceage, penalty=0, classes=classes, device=device)
            annot_bt_temp, annot_bias = opt_fair._alternate_optim_torch(size, K, FaceAge, iters=100)
            annot_bt_np = to_numpy(annot_bt_temp)
            if np.isnan(annot_bt_np).any():
                continue
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
            if BARP_tau < 0:
                annot_bt_np = -annot_bt_np
            BARP_acc = compute_acc(gt_df, annot_bt_np, device)
            BARP_wacc = compute_weighted_acc(gt_df, annot_bt_np, device)
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
        except Exception as e:
            print(f"BARP failed due to {e}")
            continue
        BARP_accs.append(BARP_acc)
        BARP_waccs.append(BARP_wacc)
        BARP_taus.append(BARP_tau)
    
    row = [
        percent,
        np.mean(BARP_accs), np.std(BARP_accs),
        np.mean(BARP_waccs), np.std(BARP_waccs),
        np.mean(BARP_taus), np.std(BARP_taus)
    ]
    with open(csv_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)
    print(
    f"BARP | "
    f"Percent: {percent} |"
    f"Acc: {np.mean(BARP_accs):.4f} ± {np.std(BARP_accs):.4f} | "
    f"WAcc: {np.mean(BARP_waccs):.4f} ± {np.std(BARP_waccs):.4f} | "
    f"Tau: {np.mean(BARP_taus):.4f} ± {np.std(BARP_taus):.4f}")

### Addition of Equal Proportion of all four types of spammers

In [None]:
spammer_type = "equal"

In [None]:
csv_file = f"results/{spammer_type}/barp.csv"

In [None]:
import os
os.makedirs(f"results/{spammer_type}", exist_ok=True)

In [None]:
import csv
# -------------------------
# Write CSV header
# -------------------------
header = [
    "percent",
    "BARP_acc_mean", "BARP_acc_std",
    "BARP_wacc_mean", "BARP_wacc_std",
    "BARP_tau_mean", "BARP_tau_std"
]

with open(csv_file, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)

In [None]:
for percent in percents:
    # initialize metrics
    BARP_accs, BARP_waccs, BARP_taus = [], [], []
    
    for sd in range(20, 30):
        
        # get df
        random_df, spammer_ids = add_equal_proportion_of_all_spammers(df, percent, seed=sd)
        PC_faceage = df_to_pickle(random_df, df_passage)
        K = len(PC_faceage.keys())
        print(K)
        
        try:
            FaceAge = opt_fair.BARP(data=PC_faceage, penalty=0, classes=classes, device=device)
            annot_bt_temp, annot_bias = opt_fair._alternate_optim_torch(size, K, FaceAge, iters=100)
            annot_bt_np = to_numpy(annot_bt_temp)
            if np.isnan(annot_bt_np).any():
                continue
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
            if BARP_tau < 0:
                annot_bt_np = -annot_bt_np
            BARP_acc = compute_acc(gt_df, annot_bt_np, device)
            BARP_wacc = compute_weighted_acc(gt_df, annot_bt_np, device)
            BARP_tau = safe_kendalltau(annot_bt_np, gt_df['score'].to_numpy())
        except Exception as e:
            print(f"BARP failed due to {e}")
            continue
        BARP_accs.append(BARP_acc)
        BARP_waccs.append(BARP_wacc)
        BARP_taus.append(BARP_tau)
    
    row = [
        percent,
        np.mean(BARP_accs), np.std(BARP_accs),
        np.mean(BARP_waccs), np.std(BARP_waccs),
        np.mean(BARP_taus), np.std(BARP_taus)
    ]
    with open(csv_file, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(row)
    print(
    f"BARP | "
    f"Percent: {percent} |"
    f"Acc: {np.mean(BARP_accs):.4f} ± {np.std(BARP_accs):.4f} | "
    f"WAcc: {np.mean(BARP_waccs):.4f} ± {np.std(BARP_waccs):.4f} | "
    f"Tau: {np.mean(BARP_taus):.4f} ± {np.std(BARP_taus):.4f}")