In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#!/usr/bin/env python3
"""
integrated_mnli_inference.py

Run MNLI-based NLI with BART and RoBERTa on Reddit propositions and export:

- Per-model NLI probabilities:
    * P(ENTAILMENT), P(NEUTRAL), P(CONTRADICTION)
- Per-model agency scores:
    * s_m(p) = P_m(ENTAILMENT | p, h) - P_m(CONTRADICTION | p, h)
- Combined mean agency score across models:
    * s_mean(p) = (s_BART(p) + s_RoBERTa(p)) / 2
- (Optional) legacy positive-consensus flag using entailment-only scores.

NOTE:
This script *does not* perform the ±0.3 POS/NEG/NON thresholding or winner-
takes-strongest ensemble; that logic is applied downstream in a separate script.
"""

import os
import numpy as np
import pandas as pd
import torch
from tqdm.auto import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# 1. Configuration

MODELS = {
    "BART_MNLI": "facebook/bart-large-mnli",
    "ROBERTA_MNLI": "roberta-large-mnli",
}

# Fixed hypothesis for human agency wrt AI (keep exactly as in your pipeline)
FIXED_HYPOTHESIS_AGENCY = (
    "The proposition refers to the ability of humans to make choices, "
    "exert control, or take responsibility for the actions and outcomes of AI."
)

DATA_FILENAME   = "/content/drive/MyDrive/NLP /artificial_filtered_output.jsonl"
OUTPUT_FILENAME = "ai_human_agency_inferences_mnli.csv"
PROPOSITION_COLUMN = "proposition"

# Legacy consensus-entailment threshold (from the old zero-shot script)
CONSENSUS_ENTAIL_THRESHOLD = 0.4

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# 2. Data loading

if not os.path.exists(DATA_FILENAME):
    raise FileNotFoundError(f"Input file not found: {DATA_FILENAME}")

df = pd.read_json(DATA_FILENAME, lines=True)

if PROPOSITION_COLUMN not in df.columns:
    raise KeyError(
        f"Column '{PROPOSITION_COLUMN}' not found. "
        f"Available columns: {list(df.columns)}"
    )

df[PROPOSITION_COLUMN] = df[PROPOSITION_COLUMN].fillna("").astype(str)
sentences = df[PROPOSITION_COLUMN].tolist()

print(f"Loaded {len(sentences)} propositions from {DATA_FILENAME}.")

# 3. Proper MNLI NLI runner

def run_mnli_nli(model_id, input_sentences, fixed_hypothesis, batch_size=None):
    """
    Run true MNLI-style NLI:

        premise   = proposition
        hypothesis = fixed_hypothesis

    Returns a DataFrame with columns:

        {model_short}_entailment
        {model_short}_neutral
        {model_short}_contradiction
    """
    print(f"\n--- Running MNLI NLI with model: {model_id} ---")

    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSequenceClassification.from_pretrained(model_id).to(device)
    model.eval()

    if batch_size is None:
        batch_size = 32 if device.type == "cuda" else 8

    ent_scores = []
    neu_scores = []
    con_scores = []

    # Normalize labels to uppercase for robust mapping
    id2label = {i: lbl.upper() for i, lbl in model.config.id2label.items()}
    print(f"Model label mapping: {id2label}")

    for i in tqdm(range(0, len(input_sentences), batch_size),
                  desc=f"NLI {model_id.split('/')[-1]}"):
        batch = input_sentences[i:i + batch_size]

        enc = tokenizer(
            batch,
            [fixed_hypothesis] * len(batch),
            truncation=True,
            padding=True,
            return_tensors="pt"
        ).to(device)

        with torch.no_grad():
            logits = model(**enc).logits
            probs = torch.softmax(logits, dim=-1).cpu().numpy()

        for row in probs:
            scores = {id2label[j]: row[j] for j in range(len(row))}
            ent_scores.append(scores.get("ENTAILMENT", 0.0))
            neu_scores.append(scores.get("NEUTRAL", 0.0))
            con_scores.append(scores.get("CONTRADICTION", 0.0))

    model_short = model_id.split("/")[-1]
    out_df = pd.DataFrame({
        f"{model_short}_entailment": ent_scores,
        f"{model_short}_neutral": neu_scores,
        f"{model_short}_contradiction": con_scores,
    })

    return out_df

# 4. Run NLI for both models and merge

df_results = df.copy()

for _, model_id in MODELS.items():
    model_df = run_mnli_nli(model_id, sentences, FIXED_HYPOTHESIS_AGENCY)
    df_results = pd.concat([df_results, model_df], axis=1)

# 5. Compute agency scores and mean score

def safe_col(df_, name: str) -> str:
    if name not in df_.columns:
        raise KeyError(f"Expected column missing: {name}")
    return name

bart_ent = safe_col(df_results, "bart-large-mnli_entailment")
bart_con = safe_col(df_results, "bart-large-mnli_contradiction")
rob_ent  = safe_col(df_results, "roberta-large-mnli_entailment")
rob_con  = safe_col(df_results, "roberta-large-mnli_contradiction")

# Per-model agency scores: s_m = P(ENT) - P(CON)
df_results["bart-large-mnli_agency_score"] = df_results[bart_ent] - df_results[bart_con]
df_results["roberta-large-mnli_agency_score"] = df_results[rob_ent] - df_results[rob_con]

# Mean agency score across the two models (your s_mean)
df_results["mean_agency_score"] = (
    df_results["bart-large-mnli_agency_score"] +
    df_results["roberta-large-mnli_agency_score"]
) / 2.0

# 6. Legacy “consensus entailment” flag (optional diagnostic)

# This reproduces the spirit of the first script:
#   both models have high entailment-only scores → "highly likely agency".
# It is *not* the main SoA label for the paper; the ±0.3 and winner-takes-strongest
# logic is applied in a downstream script.
df_results["consensus_entail_pos_flag"] = (
    (df_results[bart_ent] > CONSENSUS_ENTAIL_THRESHOLD) &
    (df_results[rob_ent]  > CONSENSUS_ENTAIL_THRESHOLD)
)


# 7. Export with safe column selection
desired_output_columns = [
    PROPOSITION_COLUMN,
    # NLI probabilities
    "bart-large-mnli_entailment",
    "bart-large-mnli_neutral",
    "bart-large-mnli_contradiction",
    "roberta-large-mnli_entailment",
    "roberta-large-mnli_neutral",
    "roberta-large-mnli_contradiction",
    # Agency scores
    "bart-large-mnli_agency_score",
    "roberta-large-mnli_agency_score",
    "mean_agency_score",
    # Legacy diagnostic flag
    "consensus_entail_pos_flag",
    # Optional metadata if present in the JSONL
    "timestamp",
    "sentiment_label",
    "sentiment_score",
    "sim_ai",
    "lex",
    "dep",
]

existing_output_columns = [c for c in desired_output_columns if c in df_results.columns]
df_output = df_results[existing_output_columns].copy()

df_output.to_csv(OUTPUT_FILENAME, index=False)

print("\n--- MNLI Inference Pipeline Complete ---")
print(f"Results saved to: {OUTPUT_FILENAME}")
print("\n--- Preview (first 5 rows) ---")
print(df_output.head())
print("-" * 40)
