In [1]:
# ============================
# Qualitative Examples Export (with explicit paths)
# ============================
import os, json, random, textwrap, pandas as pd, numpy as np
import torch
from transformers import GPT2TokenizerFast, GPT2LMHeadModel, DistilBertTokenizerFast, DistilBertForSequenceClassification

# ---- Paths (edit if needed) ----
DATASET_JSON      = "/content/drive/MyDrive/Colab Notebooks/train_dataset.json"
HPM_FINAL_DIR     = "/content/drive/MyDrive/Colab Notebooks/NewBestModel/hallucination_detector_final"
SGCT_FINAL_DIR    = "/content/drive/MyDrive/Colab Notebooks/SGCT_final_model"
SGCT_CHECKPOINT_DIR = "/content/drive/MyDrive/Colab Notebooks/SGCT_checkpoints"
BASE_MODEL_NAME   = "gpt2"  # or "gpt2-medium" if you trained that variant

# ---- Parameters ----
K_SAMPLES = 5
RANDOM_SEED = 42
INCLUDE_HPM = True
WRAP_CHARS = 90
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---- Load dataset and test split ----
from sklearn.model_selection import train_test_split
with open(DATASET_JSON, "r") as f:
    data = json.load(f)
df_all = pd.DataFrame(data).dropna(subset=["question","answer"]).reset_index(drop=True)
train_df, temp_df = train_test_split(df_all, test_size=0.3, random_state=42)
val_df,   test_df = train_test_split(temp_df, test_size=0.5, random_state=42)
test_q = test_df["question"].tolist()

# ---- Load PRE and POST models ----
# PRE: base GPT-2
pre_tok = GPT2TokenizerFast.from_pretrained(BASE_MODEL_NAME)
pre_gen = GPT2LMHeadModel.from_pretrained(BASE_MODEL_NAME).to(DEVICE)
if pre_tok.pad_token is None:
    pre_tok.pad_token = pre_tok.eos_token
    pre_gen.config.pad_token_id = pre_tok.eos_token_id

# POST: fine-tuned SGCT
post_tok = GPT2TokenizerFast.from_pretrained(SGCT_FINAL_DIR)
post_gen = GPT2LMHeadModel.from_pretrained(SGCT_FINAL_DIR).to(DEVICE)

# Optional HPM
if INCLUDE_HPM:
    hpm_tok = DistilBertTokenizerFast.from_pretrained(HPM_FINAL_DIR)
    hpm_mod = DistilBertForSequenceClassification.from_pretrained(HPM_FINAL_DIR).to(DEVICE)
    hpm_mod.eval()

# ---- Generation helpers ----
def generate_answer(tok, model, q):
    prompt = f"Question: {q}\nAnswer:"
    enc = tok(prompt, return_tensors="pt").to(DEVICE)
    out = model.generate(
        **enc,
        max_new_tokens=96,
        do_sample=True, top_p=0.92, top_k=50,
        temperature=0.8,
        pad_token_id=tok.eos_token_id
    )
    text = tok.decode(out[0], skip_special_tokens=True)
    return text.split("Answer:")[-1].strip()

@torch.no_grad()
def hpm_score_batch(questions, answers):
    enc = hpm_tok([f"Q: {q}" for q in questions], answers,
                  truncation=True, max_length=256, padding=True, return_tensors="pt").to(DEVICE)
    logits = hpm_mod(input_ids=enc["input_ids"], attention_mask=enc["attention_mask"]).logits
    probs = torch.softmax(logits, dim=-1)[:,1]
    return probs.detach().cpu().numpy().tolist()

def to_category(score, tau_h=0.3, tau_f=0.7):
    if score >= tau_f: return "Factual"
    if score <= tau_h: return "Hallucinated"
    return "Uncertain"

# ---- Sample and generate ----
random.seed(RANDOM_SEED)
idxs = random.sample(range(len(test_q)), k=min(K_SAMPLES, len(test_q)))
qs = [test_q[i] for i in idxs]

pre_ans, post_ans = [], []
for q in qs:
    pre_ans.append(generate_answer(pre_tok, pre_gen, q))
    post_ans.append(generate_answer(post_tok, post_gen, q))

if INCLUDE_HPM:
    post_scores = hpm_score_batch(qs, post_ans)

# ---- Build DataFrame ----
if INCLUDE_HPM:
    df_show = pd.DataFrame({
        "Question": qs,
        "Pre-SGCT Answer": pre_ans,
        "Post-SGCT Answer": post_ans,
        "HPM Score (Post)": [round(s, 3) for s in post_scores],
        "Category (Post)": [to_category(s) for s in post_scores],
    })
else:
    df_show = pd.DataFrame({
        "Question": qs,
        "Pre-SGCT Answer": pre_ans,
        "Post-SGCT Answer": post_ans,
    })

# ---- Save CSV ----
os.makedirs(SGCT_CHECKPOINT_DIR, exist_ok=True)
csv_path = os.path.join(SGCT_CHECKPOINT_DIR, "qualitative_examples.csv")
df_show.to_csv(csv_path, index=False)
print(f"Saved qualitative examples → {csv_path}")

# ---- Print LaTeX table ----
def latex_escape(s):
    return (s.replace("&","\\&").replace("%","\\%").replace("$","\\$")
             .replace("#","\\#").replace("_","\\_").replace("{","\\{").replace("}","\\}")
             .replace("~","\\textasciitilde{}").replace("^","\\textasciicircum{}")
             .replace("\\","\\textbackslash{}"))

print("\nLaTeX-ready table:\n")
if INCLUDE_HPM:
    print(r"\begin{table}[htbp]")
    print(r"\centering")
    print(r"\caption{Qualitative examples before and after SGCT fine-tuning, with HPM scores and categories (post).}")
    print(r"\label{tab:sgct-examples}")
    print(r"\begin{tabular}{p{4cm}p{4.5cm}p{4.5cm}c c}")
    print(r"\toprule")
    print(r"\textbf{Question} & \textbf{Pre-SGCT Answer} & \textbf{Post-SGCT Answer} & \textbf{HPM Score} & \textbf{Category} \\")
    print(r"\midrule")
    for i in range(len(qs)):
        q  = latex_escape(qs[i]); pa = latex_escape(pre_ans[i]); po = latex_escape(post_ans[i])
        sc = f"{post_scores[i]:.2f}"; cat= to_category(post_scores[i])
        print(f"{q} & {pa} & {po} & {sc} & {cat} \\\\")
        if i < len(qs)-1: print(r"\midrule")
    print(r"\bottomrule")
    print(r"\end{tabular}")
    print(r"\end{table}")
else:
    print(r"\begin{table}[htbp]")
    print(r"\centering")
    print(r"\caption{Qualitative examples of model outputs before and after SGCT fine-tuning.}")
    print(r"\label{tab:sgct-examples}")
    print(r"\begin{tabular}{p{4cm}p{5cm}p{5cm}}")
    print(r"\toprule")
    print(r"\textbf{Question} & \textbf{Pre-SGCT Answer} & \textbf{Post-SGCT Answer} \\")
    print(r"\midrule")
    for i in range(len(qs)):
        q  = latex_escape(qs[i]); pa = latex_escape(pre_ans[i]); po = latex_escape(post_ans[i])
        print(f"{q} & {pa} & {po} \\\\")
        if i < len(qs)-1: print(r"\midrule")
    print(r"\bottomrule")
    print(r"\end{tabular}")
    print(r"\end{table}")

# ---- Notebook preview ----
print("\nPreview:")
for i in range(len(qs)):
    print("-"*80)
    print("Q:", textwrap.fill(qs[i], WRAP_CHARS))
    print("PRE:", textwrap.fill(pre_ans[i], WRAP_CHARS))
    print("POST:", textwrap.fill(post_ans[i], WRAP_CHARS))
    if INCLUDE_HPM:
        print(f"HPM (post): {post_scores[i]:.3f}  →  {to_category(post_scores[i])}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Saved qualitative examples → /content/drive/MyDrive/Colab Notebooks/SGCT_checkpoints/qualitative_examples.csv

LaTeX-ready table:

\begin{table}[htbp]
\centering
\caption{Qualitative examples before and after SGCT fine-tuning, with HPM scores and categories (post).}
\label{tab:sgct-examples}
\begin{tabular}{p{4cm}p{4.5cm}p{4.5cm}c c}
\toprule
\textbf{Question} & \textbf{Pre-SGCT Answer} & \textbf{Post-SGCT Answer} & \textbf{HPM Score} & \textbf{Category} \\
\midrule
Shahid Kapoor starred in Ishq Vishk. & He was the second most-watched Indian to star in a British film. He starred in the first two films. He was also the first Indian to star in a major American film.
A young man with a heart of gold, Shahid Kapoor is a star of the Indian cinema. He is considered to be the greatest Indian film star of all time. He's also one of the most respected filmmakers in India.
His films include Jai The Ripper, & Shahid Kapoor starred in Ishq Vishk. & 0.67 & Uncertain \\
\midrule
Can you only say tru

In [1]:
# =======================================
# Threshold calibration sweep (τ_f = 0.4, 0.6, 0.7, 0.8, 0.9)
# =======================================
import os, json, numpy as np, pandas as pd, torch
from transformers import GPT2TokenizerFast, GPT2LMHeadModel, DistilBertTokenizerFast, DistilBertForSequenceClassification

# ---- Paths ----
DATASET_JSON   = "/content/drive/MyDrive/Colab Notebooks/train_dataset.json"
SGCT_FINAL_DIR = "/content/drive/MyDrive/Colab Notebooks/SGCT_final_model"
HPM_FINAL_DIR  = "/content/drive/MyDrive/Colab Notebooks/NewBestModelNew/hallucination_detector_final"
OUT_DIR        = "/content/drive/MyDrive/Colab Notebooks/SGCT_checkpoints"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---- Load test split (same seed 42 as before) ----
from sklearn.model_selection import train_test_split
with open(DATASET_JSON, "r") as f:
    data = json.load(f)
df_all = pd.DataFrame(data).dropna(subset=["question","answer"]).reset_index(drop=True)
train_df, temp_df = train_test_split(df_all, test_size=0.3, random_state=42)
val_df,   test_df = train_test_split(temp_df, test_size=0.5, random_state=42)
test_q = test_df["question"].tolist()

# ---- Load POST model (SGCT fine-tuned) ----
tok = GPT2TokenizerFast.from_pretrained(SGCT_FINAL_DIR)
gen = GPT2LMHeadModel.from_pretrained(SGCT_FINAL_DIR).to(DEVICE)

# ---- Load HPM ----
hpm_tok = DistilBertTokenizerFast.from_pretrained(HPM_FINAL_DIR)
hpm_mod = DistilBertForSequenceClassification.from_pretrained(HPM_FINAL_DIR).to(DEVICE)
hpm_mod.eval()

# ---- Generate POST answers ----
def generate_answer(q):
    prompt = f"Question: {q}\nAnswer:"
    enc = tok(prompt, return_tensors="pt").to(DEVICE)
    out = gen.generate(
        **enc,
        max_new_tokens=96,
        do_sample=True, top_p=0.92, top_k=50,
        temperature=0.8,
        pad_token_id=tok.eos_token_id
    )
    text = tok.decode(out[0], skip_special_tokens=True)
    return text.split("Answer:")[-1].strip()

print("Generating answers on test set... (this may take a while)")
post_answers = [generate_answer(q) for q in test_q]

# ---- HPM scores ----
@torch.no_grad()
def hpm_score_batch(questions, answers):
    enc = hpm_tok([f"Q: {q}" for q in questions], answers,
                  truncation=True, max_length=256, padding=True, return_tensors="pt").to(DEVICE)
    logits = hpm_mod(input_ids=enc["input_ids"], attention_mask=enc["attention_mask"]).logits
    probs = torch.softmax(logits, dim=-1)[:,1]
    return probs.detach().cpu().numpy().tolist()

post_scores = hpm_score_batch(test_q, post_answers)

# ---- Threshold sweep ----
taus = [0.4, 0.6, 0.7, 0.8, 0.9]
hallucinated_tau = 0.3
results = []

for tau in taus:
    factual_rate   = (np.array(post_scores) >= tau).mean()
    halluc_rate    = (np.array(post_scores) <= hallucinated_tau).mean()
    uncertain_rate = ((np.array(post_scores) > hallucinated_tau) & (np.array(post_scores) < tau)).mean()
    results.append({
        "tau_f": tau,
        "factual": float(factual_rate),
        "hallucinated": float(halluc_rate),
        "uncertain": float(uncertain_rate)
    })
    print(f"τ_f={tau:.1f} | factual={factual_rate:.3f}, halluc={halluc_rate:.3f}, uncertain={uncertain_rate:.3f}")

# ---- Save JSON ----
os.makedirs(OUT_DIR, exist_ok=True)
json_path = os.path.join(OUT_DIR, "threshold_calibration.json")
with open(json_path, "w") as f: json.dump(results, f, indent=2)
print(f"\nSaved threshold calibration results → {json_path}")



Generating answers on test set... (this may take a while)
τ_f=0.4 | factual=0.634, halluc=0.331, uncertain=0.035
τ_f=0.6 | factual=0.571, halluc=0.331, uncertain=0.099
τ_f=0.7 | factual=0.534, halluc=0.331, uncertain=0.136
τ_f=0.8 | factual=0.452, halluc=0.331, uncertain=0.218
τ_f=0.9 | factual=0.228, halluc=0.331, uncertain=0.441

Saved threshold calibration results → /content/drive/MyDrive/Colab Notebooks/SGCT_checkpoints/threshold_calibration.json
