In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Mean Inferencing

In [None]:
"""
triple_soa_labels_minimal.py

Input:
    /content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli.csv

Assumes columns:
    - proposition
    - bart-large-mnli_entailment
    - bart-large-mnli_contradiction
    - roberta-large-mnli_entailment
    - roberta-large-mnli_contradiction
    - (optionally) bart-large-mnli_agency_score
    - (optionally) roberta-large-mnli_agency_score
    - (optionally) mean_agency_score

Output:
    /content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli_triple_labels.csv

Adds, per method (bart / roberta / combined):
    - *_soa_label      ∈ {POS_SOA, NEG_SOA, NON_SOA}
    - *_agency_related ∈ {True, False}
"""

import pandas as pd
import numpy as np

# 2. Config

INPUT_FILE  = "/content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli.csv"
OUTPUT_FILE = "/content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli_triple_labels.csv"

PROP_COL = "proposition"

# NLI probability columns
BART_ENT = "bart-large-mnli_entailment"
BART_CON = "bart-large-mnli_contradiction"
ROB_ENT  = "roberta-large-mnli_entailment"
ROB_CON  = "roberta-large-mnli_contradiction"

# Optional existing score columns
BART_SCORE_EXISTING = "bart-large-mnli_agency_score"
ROB_SCORE_EXISTING  = "roberta-large-mnli_agency_score"
MEAN_SCORE_EXISTING = "mean_agency_score"

# Thresholds (tune if you like)
BART_POS_TH = 0.30
BART_NEG_TH = -0.30

ROB_POS_TH  = 0.30
ROB_NEG_TH  = -0.30

COMB_POS_TH = 0.30
COMB_NEG_TH = -0.30


# 2. Load data

df = pd.read_csv(INPUT_FILE)
print(f"Loaded {len(df)} rows from: {INPUT_FILE}")

required = [PROP_COL, BART_ENT, BART_CON, ROB_ENT, ROB_CON]
missing = [c for c in required if c not in df.columns]
if missing:
    raise KeyError(f"Missing required columns: {missing}. Available: {list(df.columns)}")

# Ensure numeric NLI probs
for col in [BART_ENT, BART_CON, ROB_ENT, ROB_CON]:
    df[col] = pd.to_numeric(df[col], errors="coerce")


# 3. Compute scores

# BART score
if BART_SCORE_EXISTING in df.columns:
    df["bart_agency_score"] = pd.to_numeric(df[BART_SCORE_EXISTING], errors="coerce")
    print("Using existing BART score column:", BART_SCORE_EXISTING)
else:
    df["bart_agency_score"] = df[BART_ENT] - df[BART_CON]
    print("Computed BART score as entailment - contradiction")

# RoBERTa score
if ROB_SCORE_EXISTING in df.columns:
    df["roberta_agency_score"] = pd.to_numeric(df[ROB_SCORE_EXISTING], errors="coerce")
    print("Using existing RoBERTa score column:", ROB_SCORE_EXISTING)
else:
    df["roberta_agency_score"] = df[ROB_ENT] - df[ROB_CON]
    print("Computed RoBERTa score as entailment - contradiction")

# Combined score
if MEAN_SCORE_EXISTING in df.columns:
    df["combined_agency_score"] = pd.to_numeric(df[MEAN_SCORE_EXISTING], errors="coerce")
    print("Using existing mean_agency_score as combined_agency_score")
else:
    df["combined_agency_score"] = (
        df["bart_agency_score"] + df["roberta_agency_score"]
    ) / 2.0
    print("Computed combined_agency_score as mean of BART and RoBERTa scores")


# 4. Label function: POS_SOA / NEG_SOA / NON_SOA

def three_way_label(score: float, pos_th: float, neg_th: float) -> str:
    """
    POS_SOA if score >= pos_th
    NEG_SOA if score <= neg_th
    NON_SOA otherwise
    """
    if pd.isna(score):
        return "NON_SOA"
    if score >= pos_th:
        return "POS_SOA"
    elif score <= neg_th:
        return "NEG_SOA"
    else:
        return "NON_SOA"


def add_method_labels(df, score_col, label_prefix, pos_th, neg_th):
    """
    Adds:
        {label_prefix}_soa_label      ∈ {POS_SOA, NEG_SOA, NON_SOA}
        {label_prefix}_agency_related ∈ {True, False}
    """
    label_col = f"{label_prefix}_soa_label"
    flag_col  = f"{label_prefix}_agency_related"

    df[label_col] = df[score_col].apply(lambda s: three_way_label(s, pos_th, neg_th))
    df[flag_col]  = df[label_col].isin(["POS_SOA", "NEG_SOA"])

    print(f"\n=== {label_prefix.upper()} 3-way labels ===")
    print(df[label_col].value_counts(dropna=False))
    print(f"{label_prefix} agency-related proportion: {df[flag_col].mean():.3%}")


# 5. Apply for BART / RoBERTa / Combined

add_method_labels(
    df,
    score_col="bart_agency_score",
    label_prefix="bart",
    pos_th=BART_POS_TH,
    neg_th=BART_NEG_TH,
)

add_method_labels(
    df,
    score_col="roberta_agency_score",
    label_prefix="roberta",
    pos_th=ROB_POS_TH,
    neg_th=ROB_NEG_TH,
)

add_method_labels(
    df,
    score_col="combined_agency_score",
    label_prefix="combined",
    pos_th=COMB_POS_TH,
    neg_th=COMB_NEG_TH,
)


# 6. Save augmented file

df.to_csv(OUTPUT_FILE, index=False)
print(f"\nSaved augmented file with POS_SOA/NEG_SOA/NON_SOA labels + flags to:\n  {OUTPUT_FILE}")


Loaded 63084 rows from: /content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli.csv
Using existing BART score column: bart-large-mnli_agency_score
Using existing RoBERTa score column: roberta-large-mnli_agency_score
Using existing mean_agency_score as combined_agency_score

=== BART 3-way labels ===
bart_soa_label
NEG_SOA    30385
NON_SOA    19321
POS_SOA    13378
Name: count, dtype: int64
bart agency-related proportion: 69.373%

=== ROBERTA 3-way labels ===
roberta_soa_label
NON_SOA    48288
POS_SOA    12333
NEG_SOA     2463
Name: count, dtype: int64
roberta agency-related proportion: 23.454%

=== COMBINED 3-way labels ===
combined_soa_label
NON_SOA    35386
NEG_SOA    16961
POS_SOA    10737
Name: count, dtype: int64
combined agency-related proportion: 43.907%

Saved augmented file with POS_SOA/NEG_SOA/NON_SOA labels + flags to:
  /content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli_triple_labels.csv


#Winner Takes All Inferencing

In [None]:
import pandas as pd
import os

# 1. Config

INPUT_FILE = "/content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli.csv"
OUTPUT_FILE = "/content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli_triple_labels_winner.csv"

# Column names for per-model scores (if already present)
BART_SCORE_COL = "bart-large-mnli_agency_score"
ROBERTA_SCORE_COL = "roberta-large-mnli_agency_score"

# If we need to reconstruct scores from NLI probabilities
BART_ENT_COL = "bart-large-mnli_entailment"
BART_CON_COL  = "bart-large-mnli_contradiction"
ROBERTA_ENT_COL = "roberta-large-mnli_entailment"
ROBERTA_CON_COL  = "roberta-large-mnli_contradiction"

# Thresholds
POS_THRESHOLD = 0.3    # per-model POS cutoff
NEG_THRESHOLD = -0.3   # per-model NEG cutoff
COMBINED_THRESHOLD = 0.3  # minimal |score| for combined POS/NEG

# 2. Load data

if not os.path.exists(INPUT_FILE):
    raise FileNotFoundError(f"Input file not found: {INPUT_FILE}")

df = pd.read_csv(INPUT_FILE)
print(f"Loaded {len(df):,} rows from: {INPUT_FILE}")

cols = set(df.columns)

# 3. Ensure agency score columns exist

# BART score
if BART_SCORE_COL not in cols:
    if BART_ENT_COL in cols and BART_CON_COL in cols:
        print("BART agency score not found – computing from entailment - contradiction.")
        df[BART_SCORE_COL] = df[BART_ENT_COL] - df[BART_CON_COL]
    else:
        raise KeyError(
            f"Missing {BART_SCORE_COL} and cannot reconstruct because "
            f"{BART_ENT_COL} or {BART_CON_COL} are missing."
        )
else:
    print(f"Using existing BART score column: {BART_SCORE_COL}")

# RoBERTa score
if ROBERTA_SCORE_COL not in cols:
    if ROBERTA_ENT_COL in cols and ROBERTA_CON_COL in cols:
        print("RoBERTa agency score not found – computing from entailment - contradiction.")
        df[ROBERTA_SCORE_COL] = df[ROBERTA_ENT_COL] - df[ROBERTA_CON_COL]
    else:
        raise KeyError(
            f"Missing {ROBERTA_SCORE_COL} and cannot reconstruct because "
            f"{ROBERTA_ENT_COL} or {ROBERTA_CON_COL} are missing."
        )
else:
    print(f"Using existing RoBERTa score column: {ROBERTA_SCORE_COL}")

# Optional: keep a combined mean score around, if not present
if "combined_agency_score" not in df.columns:
    df["combined_agency_score"] = (df[BART_SCORE_COL] + df[ROBERTA_SCORE_COL]) / 2.0


# 4. Per-model 3-way labels: POS_SOA / NEG_SOA / NON_SOA

POS = "POS_SOA"
NEG = "NEG_SOA"
NON = "NON_SOA"

def soa_label_from_score(score: float) -> str:
    if score >= POS_THRESHOLD:
        return POS
    elif score <= NEG_THRESHOLD:
        return NEG
    else:
        return NON

# BART labels + flags
df["bart_soa_label"] = df[BART_SCORE_COL].apply(soa_label_from_score)
df["bart_agency_related"] = df["bart_soa_label"].apply(lambda x: x in {POS, NEG})

# RoBERTa labels + flags
df["roberta_soa_label"] = df[ROBERTA_SCORE_COL].apply(soa_label_from_score)
df["roberta_agency_related"] = df["roberta_soa_label"].apply(lambda x: x in {POS, NEG})

print("\n=== BART 3-way labels ===")
print(df["bart_soa_label"].value_counts())
print(f"bart agency-related proportion: "
      f"{df['bart_agency_related'].mean() * 100:.3f}%")

print("\n=== RoBERTa 3-way labels ===")
print(df["roberta_soa_label"].value_counts())
print(f"roberta agency-related proportion: "
      f"{df['roberta_agency_related'].mean() * 100:.3f}%")

# 5. Combined winner-takes-strongest logic

def combine_soa_winner_takes_strongest(row, thr=COMBINED_THRESHOLD):
    """
    Combined logic:
    - If neither model has |score| >= thr → NON_SOA, not agency-related.
    - Otherwise, agency_related = True, and whichever model has the larger
      |score| determines the POS/NEG label.
    """
    sb = float(row[BART_SCORE_COL])
    sr = float(row[ROBERTA_SCORE_COL])
    absb, absr = abs(sb), abs(sr)

    # 1) If neither model is strong enough → NON_SOA
    if absb < thr and absr < thr:
        combined_label = NON
        combined_related = False
        return combined_label, combined_related

    # 2) At least one model is strong: this is agency-related
    combined_related = True

    # 3) Winner is the model with the stronger absolute signal
    if absb >= absr:
        combined_label = POS if sb >= 0 else NEG
    else:
        combined_label = POS if sr >= 0 else NEG

    return combined_label, combined_related

combined = df.apply(combine_soa_winner_takes_strongest, axis=1)
df["combined_soa_label"] = combined.apply(lambda x: x[0])
df["combined_agency_related"] = combined.apply(lambda x: x[1])

print("\n=== COMBINED 3-way labels (winner-takes-strongest) ===")
print(df["combined_soa_label"].value_counts())
print(f"combined agency-related proportion: "
      f"{df['combined_agency_related'].mean() * 100:.3f}%")

# 6. Save augmented file

df.to_csv(OUTPUT_FILE, index=False)
print(f"\nSaved augmented file with BART/RoBERTa/combined SoA labels to:\n  {OUTPUT_FILE}")


Loaded 63,084 rows from: /content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli.csv
Using existing BART score column: bart-large-mnli_agency_score
Using existing RoBERTa score column: roberta-large-mnli_agency_score

=== BART 3-way labels ===
bart_soa_label
NEG_SOA    30385
NON_SOA    19321
POS_SOA    13378
Name: count, dtype: int64
bart agency-related proportion: 69.373%

=== RoBERTa 3-way labels ===
roberta_soa_label
NON_SOA    48288
POS_SOA    12333
NEG_SOA     2463
Name: count, dtype: int64
roberta agency-related proportion: 23.454%

=== COMBINED 3-way labels (winner-takes-strongest) ===
combined_soa_label
NEG_SOA    30095
POS_SOA    17278
NON_SOA    15711
Name: count, dtype: int64
combined agency-related proportion: 75.095%

Saved augmented file with BART/RoBERTa/combined SoA labels to:
  /content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli_triple_labels_winner.csv


#Consensus Inferencing

In [None]:
import pandas as pd
import os

# 1. Config

INPUT_FILE = "/content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli.csv"
OUTPUT_FILE = "/content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli_triple_labels_consensus.csv"

# Column names for per-model scores (if already present)
BART_SCORE_COL = "bart-large-mnli_agency_score"
ROBERTA_SCORE_COL = "roberta-large-mnli_agency_score"

# If we need to reconstruct scores from NLI probabilities
BART_ENT_COL = "bart-large-mnli_entailment"
BART_CON_COL  = "bart-large-mnli_contradiction"
ROBERTA_ENT_COL = "roberta-large-mnli_entailment"
ROBERTA_CON_COL  = "roberta-large-mnli_contradiction"

# Thresholds
POS_THRESHOLD = 0.3    # per-model POS cutoff
NEG_THRESHOLD = -0.3   # per-model NEG cutoff
COMBINED_THRESHOLD = 0.3  # used only for optional winner-takes-strongest label

POS = "POS_SOA"
NEG = "NEG_SOA"
NON = "NON_SOA"

# 2. Load data

if not os.path.exists(INPUT_FILE):
    raise FileNotFoundError(f"Input file not found: {INPUT_FILE}")

df = pd.read_csv(INPUT_FILE)
print(f"Loaded {len(df):,} rows from: {INPUT_FILE}")

cols = set(df.columns)

# 3. Ensure agency score columns exist

# BART
if BART_SCORE_COL not in cols:
    if BART_ENT_COL in cols and BART_CON_COL in cols:
        print("BART agency score not found – computing from entailment - contradiction.")
        df[BART_SCORE_COL] = df[BART_ENT_COL] - df[BART_CON_COL]
    else:
        raise KeyError(
            f"Missing {BART_SCORE_COL} and cannot reconstruct because "
            f"{BART_ENT_COL} or {BART_CON_COL} are missing."
        )
else:
    print(f"Using existing BART score column: {BART_SCORE_COL}")

# RoBERTa
if ROBERTA_SCORE_COL not in cols:
    if ROBERTA_ENT_COL in cols and ROBERTA_CON_COL in cols:
        print("RoBERTa agency score not found – computing from entailment - contradiction.")
        df[ROBERTA_SCORE_COL] = df[ROBERTA_ENT_COL] - df[ROBERTA_CON_COL]
    else:
        raise KeyError(
            f"Missing {ROBERTA_SCORE_COL} and cannot reconstruct because "
            f"{ROBERTA_ENT_COL} or {ROBERTA_CON_COL} are missing."
        )
else:
    print(f"Using existing RoBERTa score column: {ROBERTA_SCORE_COL}")

# Optional: keep mean score if you like
if "combined_agency_score" not in df.columns:
    df["combined_agency_score"] = (df[BART_SCORE_COL] + df[ROBERTA_SCORE_COL]) / 2.0

# 4. Per-model 3-way labels

def soa_label_from_score(score: float) -> str:
    if score >= POS_THRESHOLD:
        return POS
    elif score <= NEG_THRESHOLD:
        return NEG
    else:
        return NON

# BART labels + flags
df["bart_soa_label"] = df[BART_SCORE_COL].apply(soa_label_from_score)
df["bart_agency_related"] = df["bart_soa_label"].apply(lambda x: x in {POS, NEG})

# RoBERTa labels + flags
df["roberta_soa_label"] = df[ROBERTA_SCORE_COL].apply(soa_label_from_score)
df["roberta_agency_related"] = df["roberta_soa_label"].apply(lambda x: x in {POS, NEG})

print("\n=== BART 3-way labels ===")
print(df["bart_soa_label"].value_counts())
print(f"bart agency-related proportion: {df['bart_agency_related'].mean() * 100:.3f}%")

print("\n=== RoBERTa 3-way labels ===")
print(df["roberta_soa_label"].value_counts())
print(f"roberta agency-related proportion: {df['roberta_agency_related'].mean() * 100:.3f}%")

# 5. Combined label (CONSENSUS for direction, OR for relatedness)

def combine_soa_consensus(row):
    bart_label = row["bart_soa_label"]
    rob_label  = row["roberta_soa_label"]
    bart_rel   = row["bart_agency_related"]
    rob_rel    = row["roberta_agency_related"]

    # OR for relatedness
    combined_related = bool(bart_rel or rob_rel)

    # If neither sees it as SoA-related
    if not combined_related:
        return NON, False

    # Both agree on POS or both on NEG → consensus direction
    if bart_label == rob_label and bart_label in {POS, NEG}:
        return bart_label, True

    # Otherwise, they disagree or at least one is NON_SOA:
    # still agency-related overall, but no stable direction
    return NON, True

consensus = df.apply(combine_soa_consensus, axis=1)
df["combined_soa_label"] = consensus.apply(lambda x: x[0])
df["combined_agency_related"] = consensus.apply(lambda x: x[1])

print("\n=== COMBINED 3-way labels (CONSENSUS direction) ===")
print(df["combined_soa_label"].value_counts())
print(f"combined agency-related proportion: {df['combined_agency_related'].mean() * 100:.3f}%")

# 6. (Optional) winner-takes-strongest label, if you still want it

def combine_soa_winner_takes_strongest(row, thr=COMBINED_THRESHOLD):
    sb = float(row[BART_SCORE_COL])
    sr = float(row[ROBERTA_SCORE_COL])
    absb, absr = abs(sb), abs(sr)

    if absb < thr and absr < thr:
        return NON, False

    combined_related = True

    if absb >= absr:
        label = POS if sb >= 0 else NEG
    else:
        label = POS if sr >= 0 else NEG

    return label, combined_related

winner = df.apply(combine_soa_winner_takes_strongest, axis=1)
df["combined_soa_label_winner"] = winner.apply(lambda x: x[0])
df["combined_agency_related_winner"] = winner.apply(lambda x: x[1])

print("\n=== COMBINED 3-way labels (winner-takes-strongest, optional) ===")
print(df["combined_soa_label_winner"].value_counts())
print(f"combined (winner) agency-related proportion: "
      f"{df['combined_agency_related_winner'].mean() * 100:.3f}%")

# 7. Save augmented file

df.to_csv(OUTPUT_FILE, index=False)
print(f"\nSaved augmented file with per-model + combined SoA labels to:\n  {OUTPUT_FILE}")


Loaded 63,084 rows from: /content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli.csv
Using existing BART score column: bart-large-mnli_agency_score
Using existing RoBERTa score column: roberta-large-mnli_agency_score

=== BART 3-way labels ===
bart_soa_label
NEG_SOA    30385
NON_SOA    19321
POS_SOA    13378
Name: count, dtype: int64
bart agency-related proportion: 69.373%

=== RoBERTa 3-way labels ===
roberta_soa_label
NON_SOA    48288
POS_SOA    12333
NEG_SOA     2463
Name: count, dtype: int64
roberta agency-related proportion: 23.454%

=== COMBINED 3-way labels (CONSENSUS direction) ===
combined_soa_label
NON_SOA    56322
POS_SOA     4809
NEG_SOA     1953
Name: count, dtype: int64
combined agency-related proportion: 75.095%

=== COMBINED 3-way labels (winner-takes-strongest, optional) ===
combined_soa_label_winner
NEG_SOA    30095
POS_SOA    17278
NON_SOA    15711
Name: count, dtype: int64
combined (winner) agency-related proportion: 75.095%

Saved augmented file with per-model 

#Substituting RoBERTa with BERTweet with BART Inferencing

In [None]:
import transformers
print(transformers.__version__)

4.57.2


In [None]:
import os
# Add this line to force synchronous CUDA launches for better error reporting
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

import numpy as np
import pandas as pd
from dataclasses import dataclass

import torch
import torch.nn.functional as F

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    set_seed,
)

# 1. Config

INPUT_CSV = "/content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli.csv"
OUTPUT_CSV = "/content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli_bertweet_student.csv"
MODEL_DIR = "/content/drive/MyDrive/NLP /bertweet_soa_nli_student"

HYPOTHESIS_TEXT = (
    "The proposition refers to the ability of humans to make choices, "
    "exert control, or take responsibility for the actions and outcomes of AI."
)

BERTWEET_MODEL_NAME = "vinai/bertweet-base"

# SoA thresholds (same logic as your MNLI pipeline)
POS_THR = 0.3
NEG_THR = -0.3

# Training hyperparameters
RANDOM_SEED = 42
NUM_EPOCHS = 3
BATCH_SIZE = 16
LEARNING_RATE = 2e-5
WEIGHT_DECAY = 0.01
WARMUP_RATIO = 0.1
MAX_LENGTH = 128  # Changed from 256 to 128 to match BERTweet-base max_position_embeddings

set_seed(RANDOM_SEED)

# 2. Load original data

if not os.path.exists(INPUT_CSV):
    raise FileNotFoundError(f"Input file not found: {INPUT_CSV}")

df_full = pd.read_csv(INPUT_CSV)

if "proposition" not in df_full.columns:
    raise KeyError(
        "Expected column 'proposition' in input CSV. "
        f"Available columns: {list(df_full.columns)}"
    )

required_bart_cols = [
    "bart-large-mnli_entailment",
    "bart-large-mnli_neutral",
    "bart-large-mnli_contradiction",
]
missing = [c for c in required_bart_cols if c not in df_full.columns]
if missing:
    raise KeyError(
        "Missing required BART columns in input CSV: "
        + ", ".join(missing)
    )

print(f"Loaded {len(df_full)} rows from: {INPUT_CSV}")

# 3. Build distillation dataset (train on BART outputs)

# For training, we only need unique propositions with valid BART probs
df_train = df_full[["proposition"] + required_bart_cols].copy()

# Drop rows with any NaNs in teacher columns
df_train = df_train.dropna(subset=required_bart_cols)

# Deduplicate for training signal
df_train = df_train.drop_duplicates(subset=["proposition"]).reset_index(drop=True)

# Normalize teacher probabilities row-wise to be safe
probs = df_train[required_bart_cols].to_numpy(dtype=np.float32)
probs_sum = probs.sum(axis=1, keepdims=True)
probs_sum[probs_sum == 0.0] = 1.0  # avoid divide-by-zero
probs = probs / probs_sum

df_train["teacher_ent"] = probs[:, 0]
df_train["teacher_neu"] = probs[:, 1]
df_train["teacher_con"] = probs[:, 2]

print(f"Distillation training set: {len(df_train)} unique propositions")

# Build Hugging Face Dataset
dataset = Dataset.from_pandas(
    df_train[["proposition", "teacher_ent", "teacher_neu", "teacher_con"]]
)

# Train/validation split
dataset = dataset.train_test_split(test_size=0.1, seed=RANDOM_SEED)
hf_train = dataset["train"]
hf_val = dataset["test"]

# 4. Tokenization + dataset mapping

tokenizer = AutoTokenizer.from_pretrained(BERTWEET_MODEL_NAME, use_fast=True)

def tokenize_batch(batch):
    # premise = proposition, hypothesis = SoA hypothesis
    enc = tokenizer(
        batch["proposition"],
        [HYPOTHESIS_TEXT] * len(batch["proposition"]),
        truncation=True,
        max_length=MAX_LENGTH,
        padding="max_length",
        return_overflowing_tokens=False,  # avoid overflow warnings
    )
    # Attach soft labels from BART teacher
    ent = np.array(batch["teacher_ent"], dtype=np.float32)
    neu = np.array(batch["teacher_neu"], dtype=np.float32)
    con = np.array(batch["teacher_con"], dtype=np.float32)
    labels = np.stack([ent, neu, con], axis=1)  # (batch_size, 3)
    enc["labels"] = labels
    return enc

hf_train = hf_train.map(
    tokenize_batch,
    batched=True,
    remove_columns=hf_train.column_names,
)
hf_val = hf_val.map(
    tokenize_batch,
    batched=True,
    remove_columns=hf_val.column_names,
)

hf_train.set_format(type="torch")
hf_val.set_format(type="torch")

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# 5. Distillation Trainer (KL divergence)

# Removed @dataclass as it was causing issues with Trainer's __init__
class DistillationTrainer(Trainer):
    """
    Custom Trainer that uses KL divergence between teacher (soft labels)
    and student probabilities as the training loss.
    """

    # Explicitly define __init__ to correctly pass arguments to the parent Trainer
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=0):
        labels = inputs.pop("labels")  # shape (batch, 3), teacher probabilities
        outputs = model(**inputs)
        logits = outputs.logits  # shape (batch, 3)

        # student log-probs
        student_log_probs = F.log_softmax(logits, dim=-1)
        teacher_probs = labels

        # KL(teacher || student)
        loss = F.kl_div(
            student_log_probs,
            teacher_probs,
            reduction="batchmean",
        )

        if return_outputs:
            return loss, outputs
        return loss

# 6. Model + TrainingArguments + Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    BERTWEET_MODEL_NAME,
    num_labels=3,
)

training_args = TrainingArguments(
    output_dir=os.path.join(MODEL_DIR, "checkpoints"),
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    warmup_ratio=WARMUP_RATIO,
    logging_steps=100,
    save_steps=1000,
    save_total_limit=2,
    fp16=False,              # Changed to False to address CUDA error
    report_to="none",
)

trainer = DistillationTrainer(
    model=model,
    args=training_args,
    train_dataset=hf_train,
    eval_dataset=hf_val,
    data_collator=data_collator,
    tokenizer=tokenizer,
)

print("Starting distillation training...")
trainer.train()
print("Training finished.")

# Save final student model + tokenizer
os.makedirs(MODEL_DIR, exist_ok=True)
model.save_pretrained(MODEL_DIR)
tokenizer.save_pretrained(MODEL_DIR)
print(f"Saved distilled BERTweet NLI student to: {MODEL_DIR}")

# 7. Apply trained student to ALL rows in original CSV

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

propositions_all = df_full["proposition"].fillna("").astype(str).tolist()
batch_size_infer = 64

bert_ent_list = []
bert_neu_list = []
bert_con_list = []
bert_score_list = []
bert_label_list = []
bert_related_list = []

def soa_label_from_score(s: float, pos_thr: float = POS_THR, neg_thr: float = NEG_THR):
    if s >= pos_thr:
        return "POS_SOA"
    elif s <= neg_thr:
        return "NEG_SOA"
    else:
        return "NON_SOA"

with torch.no_grad():
    for i in range(0, len(propositions_all), batch_size_infer):
        batch_props = propositions_all[i : i + batch_size_infer]
        enc = tokenizer(
            batch_props,
            [HYPOTHESIS_TEXT] * len(batch_props),
            truncation=True,
            max_length=MAX_LENGTH,
            padding=True,
            return_tensors="pt",
        )
        enc = {k: v.to(device) for k, v in enc.items()}

        outputs = model(**enc)
        logits = outputs.logits  # (batch, 3)
        probs = torch.softmax(logits, dim=-1).cpu().numpy()

        for p_ent, p_neu, p_con in probs:
            score = float(p_ent - p_con)
            label = soa_label_from_score(score)
            related = label in ("POS_SOA", "NEG_SOA")

            bert_ent_list.append(float(p_ent))
            bert_neu_list.append(float(p_neu))
            bert_con_list.append(float(p_con))
            bert_score_list.append(score)
            bert_label_list.append(label)
            bert_related_list.append(related)

# Sanity check
assert len(bert_ent_list) == len(df_full)

df_full["bertweet_entailment"] = bert_ent_list
df_full["bertweet_neutral"] = bert_neu_list
df_full["bertweet_contradiction"] = bert_con_list
df_full["bertweet_agency_score"] = bert_score_list
df_full["bertweet_soa_label"] = bert_label_list
df_full["bertweet_agency_related"] = bert_related_list

# 8. Save updated CSV

df_full.to_csv(OUTPUT_CSV, index=False)
print(f"\nSaved updated file with BERTweet student inferences to:\n  {OUTPUT_CSV}")


Loaded 63084 rows from: /content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli.csv
Distillation training set: 62708 unique propositions


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

bpe.codes: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0


Map:   0%|          | 0/56437 [00:00<?, ? examples/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens ar

Map:   0%|          | 0/6271 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  super().__init__(*args, **kwargs)


Starting distillation training...


model.safetensors:   0%|          | 0.00/543M [00:00<?, ?B/s]

Step,Training Loss
100,0.3535
200,0.2971
300,0.2864
400,0.2775
500,0.255
600,0.2346
700,0.2305
800,0.2299
900,0.2307
1000,0.2056


Training finished.


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Saved distilled BERTweet NLI student to: /content/drive/MyDrive/NLP /bertweet_soa_nli_student


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens ar


Saved updated file with BERTweet student inferences to:
  /content/drive/MyDrive/NLP /ai_human_agency_inferences_mnli_bertweet_student.csv
