In [1]:
import pandas as pd
from datasets import load_dataset

HALLU_CSV   = "../Data/hallucination_annotations_xsum_summaries.csv"
FACTUAL_CSV = "../Data/factuality_annotations_xsum_summaries.csv"

# 1) -----------------------------------------------------------------
# Read the two annotation files
faith_df   = pd.read_csv(HALLU_CSV)
factual_df = pd.read_csv(FACTUAL_CSV)

# 2) -----------------------------------------------------------------
# Ensure a single gold label per summary
if "gold_is_factual" not in faith_df.columns:
    maj = (
        (factual_df["is_factual"] == "yes")
        .groupby([factual_df.bbcid, factual_df.system])
        .mean()                               # fraction "yes"
        .ge(0.5)                              # majority vote
        .reset_index()
        .rename(columns={"is_factual": "gold_is_factual"})
        .replace({True: "yes", False: "no"})
    )
    faith_df = faith_df.merge(maj, on=["bbcid", "system"], how="left")

faith_df["label"] = (faith_df["gold_is_factual"] == "yes").astype(int)

# 3) -----------------------------------------------------------------
# Pull source articles from Hugging Face XSum
print("Loading XSum… this takes ~1 min the first time.")
xsum = load_dataset("EdinburghNLP/xsum")       # train/validation/test splits

# Build a mapping id(int) -> document
docs = {int(ex["id"]): ex["document"]
        for split in xsum.values()
        for ex in split}

# 4) -----------------------------------------------------------------
# Attach full article text
faith_df["document"] = faith_df["bbcid"].map(docs)

missing = faith_df["document"].isna().sum()
if missing:
    raise ValueError(f"{missing} BBC IDs in the CSV were not found in XSum")

# 5) -----------------------------------------------------------------
# Keep only what we feed into the detector pipeline
data = faith_df[["document", "summary", "label"]]

print(f"{len(data):,} examples loaded.")
print(data.head(3))


  from .autonotebook import tqdm as notebook_tqdm


Loading XSum… this takes ~1 min the first time.
11,185 examples loaded.
                                            document  \
0  France's Dubuisson carded a 67 to tie with ove...   
1  France's Dubuisson carded a 67 to tie with ove...   
2  France's Dubuisson carded a 67 to tie with ove...   

                                             summary  label  
0  rory mcilroy will take a one-shot lead into th...      0  
1  rory mcilroy will take a one-shot lead into th...      0  
2  rory mcilroy will take a one-shot lead into th...      0  


In [2]:
from sklearn.model_selection import train_test_split

train, temp  = train_test_split(data, test_size=0.40,
                                stratify=data.label, random_state=42)
calib, test  = train_test_split(temp, test_size=0.50,
                                stratify=temp.label, random_state=42)

for df,name in [(train,"train"), (calib,"calib"), (test,"test")]:
    print(f"{name}: {len(df):,} rows (pos {df.label.mean():.2%})")


train: 6,711 rows (pos 6.68%)
calib: 2,237 rows (pos 6.66%)
test: 2,237 rows (pos 6.71%)


In [None]:
from ollama import chat , Client

client = Client()

def ask_llm(prompts, model="llama3.2:1b", **kw):
    """Return a list of completions equal in length to prompts."""
    out = []
    for p in prompts:
        res = client.chat(model=model, messages=[{"role":"user","content":p}], **kw)
        out.append(res["message"]["content"])
    return out


ValueError: Unknown scheme for proxy URL URL('socks://127.0.0.1:10808/')

In [17]:
import torch, math
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tok_t5 = AutoTokenizer.from_pretrained("google/flan-t5-small")
mod_t5 = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small").eval()

@torch.no_grad()
def inverse_ppl(texts, max_len=512, batch_size=8):
    inv = []
    for i in range(0, len(texts), batch_size):
        enc = tok_t5(texts[i:i+batch_size], return_tensors="pt", truncation=True,
                     padding=True, max_length=max_len)
        out = mod_t5(input_ids=enc["input_ids"], attention_mask=enc["attention_mask"],
                     labels=enc["input_ids"])
        loss = out.loss
        ppl  = torch.exp(loss)           # natural exp ≈ 2.718…
        inv.extend( (1/ppl).cpu().tolist() )
    return inv


In [18]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F

tok_nli = AutoTokenizer.from_pretrained("microsoft/deberta-large-mnli")
mod_nli = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-large-mnli").eval()

@torch.no_grad()
def nli_faith(documents, summaries, batch_size=4):
    """Return 1 − P(contradiction) per pair."""
    scores = []
    for i in range(0, len(documents), batch_size):
        enc = tok_nli(
            documents[i:i+batch_size],
            summaries[i:i+batch_size],
            return_tensors="pt",
            truncation=True, padding=True, max_length=512
        )
        logits = mod_nli(**enc).logits
        probs  = F.softmax(logits, dim=-1)   # order: contradiction, neutral, entailment
        scores.extend( (1 - probs[:,0]).cpu().tolist() )
    return scores


Some weights of the model checkpoint at microsoft/deberta-large-mnli were not used when initializing DebertaForSequenceClassification: ['config']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [19]:
def p_true(documents, summaries, judge="qwen:7b", batch_size=2):
    prompts = [
        f"You are a fact-checker. Respond only with 'True' or 'False'.\n"
        f"Document:\n{doc}\n---\nSummary:\n{summ}\n---\nIs the summary factual?"
        for doc,summ in zip(documents,summaries)
    ]
    outs = ask_llm(prompts, model=judge, stream=False)
    return [ 1.0 if o.strip().lower().startswith("true") else 0.0 for o in outs ]


In [20]:
splits = dict(train=train, calib=calib, test=test)
for name,df in splits.items():
    df["score_invppl"] = inverse_ppl(df["summary"].tolist())
    df["score_nli"]    = nli_faith(df["document"].tolist(), df["summary"].tolist())
    df["score_ptrue"]  = p_true(df["document"].tolist(), df["summary"].tolist())
    df.to_parquet(f"xsum_{name}_scores.pq", index=False)


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


TypeError: 'float' object is not iterable

In [21]:
from sklearn.isotonic import IsotonicRegression
import numpy as np

def platt(x, y):
    # x = raw score (0-1), y = 0/1 label
    ir = IsotonicRegression(out_of_bounds="clip")
    ir.fit(x, y)
    return ir

calib_df = pd.read_parquet("xsum_calib_scores.pq")
calibrators = {}
for col in ["score_invppl", "score_nli", "score_ptrue"]:
    calibrators[col] = platt(calib_df[col].values, calib_df.label.values)


FileNotFoundError: [Errno 2] No such file or directory: 'xsum_calib_scores.pq'

In [22]:
import joblib, os, json
os.makedirs("artifacts", exist_ok=True)
joblib.dump(calibrators, "artifacts/calibrators.pkl")


NameError: name 'calibrators' is not defined

In [23]:
from scipy.special import logit
from sklearn.linear_model import LogisticRegression

def make_X(df):
    feats = []
    for col in ["score_invppl","score_nli","score_ptrue"]:
        p = calibrators[col].predict(df[col].values)
        feats.append(p)
    X = logit(np.vstack(feats).T.clip(1e-4, 1-1e-4))
    return X

X_cal   = make_X(calib_df)
y_cal   = calib_df.label.values

meta = LogisticRegression(max_iter=200, class_weight="balanced").fit(X_cal, y_cal)
joblib.dump(meta, "artifacts/meta.pkl")


NameError: name 'calib_df' is not defined

In [24]:
from sklearn.metrics import f1_score, brier_score_loss, roc_auc_score

test_df = pd.read_parquet("xsum_test_scores.pq")
X_test  = make_X(test_df)
p_test  = meta.predict_proba(X_test)[:,1]

print("F1 @ 0.5:",   f1_score(test_df.label, p_test > 0.5))
print("Brier:",       brier_score_loss(test_df.label, p_test))
print("AUROC:",       roc_auc_score(test_df.label, p_test))


FileNotFoundError: [Errno 2] No such file or directory: 'xsum_test_scores.pq'

In [25]:
import scipy.special as sp
calibrators = joblib.load("artifacts/calibrators.pkl")
meta        = joblib.load("artifacts/meta.pkl")

def hallucination_guard(doc, summ, thr=0.5):
    raw = {
        "score_invppl": inverse_ppl([summ])[0],
        "score_nli":    nli_faith([doc], [summ])[0],
        "score_ptrue":  p_true([doc], [summ])[0],
    }
    xs = [ calibrators[k].predict([raw[k]])[0] for k in raw ]
    p  = meta.predict_proba( sp.logit(np.array(xs).reshape(1,-1)) )[0,1]
    return {"p_hallucination": float(p),
            "is_hallucinated": bool(p > thr),
            "breakdown": raw}


FileNotFoundError: [Errno 2] No such file or directory: 'artifacts/calibrators.pkl'