In [1]:
!pip install transformers torch pandas scikit-learn tqdm



Collecting transformers
  Obtaining dependency information for transformers from https://files.pythonhosted.org/packages/71/d3/c16c3b3cf7655a67db1144da94b021c200ac1303f82428f2beef6c2e72bb/transformers-4.57.1-py3-none-any.whl.metadata
  Using cached transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting torch
  Obtaining dependency information for torch from https://files.pythonhosted.org/packages/ae/95/ae26263aceb3d57b821179f827d0e321373ed49423e603dd5906ab14a730/torch-2.8.0-cp39-none-macosx_11_0_arm64.whl.metadata
  Using cached torch-2.8.0-cp39-none-macosx_11_0_arm64.whl.metadata (30 kB)
Collecting filelock (from transformers)
  Obtaining dependency information for filelock from https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl.metadata
  Using cached filelock-3.19.1-py3-none-any.whl.metadata (2.1 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Obtaining depe

0.8.5


Collecting google-cloud-aiplatform
  Obtaining dependency information for google-cloud-aiplatform from https://files.pythonhosted.org/packages/9c/0a/6ad76f2fcc7ed7049729f9cabf3c46f2143e2a8dd69bfbf1daf853a2559b/google_cloud_aiplatform-1.122.0-py2.py3-none-any.whl.metadata
  Downloading google_cloud_aiplatform-1.122.0-py2.py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting google-cloud-storage<3.0.0,>=1.32.0 (from google-cloud-aiplatform)
  Obtaining dependency information for google-cloud-storage<3.0.0,>=1.32.0 from https://files.pythonhosted.org/packages/d5/94/6db383d8ee1adf45dc6c73477152b82731fa4c4a46d9c1932cc8757e0fd4/google_cloud_storage-2.19.0-py2.py3-none-any.whl.metadata
  Downloading google_cloud_storage-2.19.0-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting google-cloud-bigquery!=3.20.0,<4.0.0,>=1.15.0 (from google-cloud-aiplatform)
  Obtaining dependency i

models/embedding-gecko-001
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash-preview-05-20
models/gemini-2.5-flash
models/gemini-2.5-flash-lite-preview-06-17
models/gemini-2.5-pro-preview-05-06
models/gemini-2.5-pro-preview-06-05
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/gemini-2.5-flash-preview-tts
models/gemini-2.5-pro-preview-tts
models/learnlm-2.0-flash-experimental
models/gemma-3-1b-it
models/gemma-3-4b-it
models/gemma-3-12b-it
models/gemma-3-27b-it
models/gemma-3n-e4b-it
models/gemma-3n-e2b-it
models/gemini-flash-latest
models/gemini-flash-lite-latest
models/gemini-

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, classification_report

# === Load Model ===
MODEL_NAME = "unitary/toxic-bert"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

CSV_PATH = "/Users/nili/PycharmProjects/exist2025/Dataset/Labeled __Clean_Dataset.csv"
MAX_REQUESTS = 25000

df = pd.read_csv(CSV_PATH)
df = df.dropna(subset=[df.columns[2]])

texts = df.iloc[:, 2].tolist()
true_labels = [int(float(x.strip("[]").split(",")[0])) for x in df.iloc[:, 3].tolist()]  # 1 = sexist, 0 = non-sexist


# Mapping from ToxicBERT to EXIST tasks
def map_to_exist(results):
    # ---- Task 1: sexist vs not ----
    toxic_flag = any(results[x] > 0.5 for x in results)

    if not toxic_flag:
        return 0, "none", []  # task1=0, no intention, no categories

    # ---- Task 2: intention (heuristics) ----
    if results["severe_toxic"] > 0.4 or results["threat"] > 0.4:
        intention = "direct"
    elif results["identity_hate"] > 0.4 or results["insult"] > 0.4:
        intention = "direct"
    elif results["toxic"] > 0.4:
        intention = "judgemental"   # possibly calling out sexism
    else:
        intention = "reported"

    # ---- Task 3: categories (multi-label) ----
    categories = []

    if results["identity_hate"] > 0.4:
        categories.append("ideological_inequality")

    if results["obscene"] > 0.4:
        categories.append("objectification")

    if results["insult"] > 0.4:
        categories.append("stereotyping_dominance")

    if results["threat"] > 0.4 or results["severe_toxic"] > 0.4:
        categories.append("sexual_violence")

    if not categories:
        categories.append("misogyny_non_sexual_violence")

    return 1, intention, categories


# Model prediction wrapper
def classify_text(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)

    scores = torch.sigmoid(outputs.logits).cpu().numpy()[0]
    labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
    results = dict(zip(labels, scores))

    return map_to_exist(results)


# === Run predictions ===
task1_preds, task2_preds, task3_preds = [], [], []

for i, text in tqdm(enumerate(texts[:MAX_REQUESTS]), total=MAX_REQUESTS):
    t1, t2, t3 = classify_text(text)
    task1_preds.append(t1)
    task2_preds.append(t2)
    task3_preds.append(",".join(t3))


# === Evaluate Task 1 only (binary) ===
true_eval = true_labels[:MAX_REQUESTS]

print("\n=== Toxic-BERT Baseline Evaluation (Task 1 only) ===")
print("Accuracy:", round(accuracy_score(true_eval, task1_preds), 3))
print("F1-Score:", round(f1_score(true_eval, task1_preds), 3))
print("\nClassification Report:")
print(classification_report(true_eval, task1_preds, target_names=["Non-Sexist", "Sexist"]))


# === Save Results ===
df_out = pd.DataFrame({
    "text": texts[:MAX_REQUESTS],
    "true_label": true_eval,
    "pred_task1": task1_preds,
    "pred_task2_intention": task2_preds,
    "pred_task3_categories": task3_preds,
})

df_out.to_csv("/Users/nili/PycharmProjects/exist2025/Dataset/toxicbert_exist_eval.csv", index=False)
print("\n Saved → toxicbert_exist_eval.csv")




KeyboardInterrupt: 