In [1]:
# Install dependencies
!pip install transformers datasets torch torchvision torchaudio accelerate -q
!pip install wordcloud matplotlib seaborn
!pip install transformers torch --quiet





In [1]:
# Import libraries
import pandas as pd
import numpy as np
import torch
from tqdm.auto import tqdm

from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, BertModel
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
def ensure_cuda(tensor):
    return tensor.to('cuda') if torch.cuda.is_available() else tensor

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [3]:
# Load the full preprocessed dataset from EDA
full_df = pd.read_csv("/content/clean_liar (1).csv")

# Check unique values in the 'split' column to diagnose the empty valid_df
print("Unique values in 'split' column:", full_df['split'].unique())

# Split into train/validation/test sets
train_df = full_df[full_df['split'] == 'train'].reset_index(drop=True)
valid_df = full_df[full_df['split'] == 'val'].reset_index(drop=True) # Corrected from 'valid' to 'val'
test_df  = full_df[full_df['split'] == 'test'].reset_index(drop=True)

print(f"Train: {len(train_df)} | Valid: {len(valid_df)} | Test: {len(test_df)}")

# Check label distribution
train_df['binary_label'].value_counts(normalize=True)

Unique values in 'split' column: ['train' 'val' 'test']
Train: 10237 | Valid: 1283 | Test: 1266


Unnamed: 0_level_0,proportion
binary_label,Unnamed: 1_level_1
True,0.561883
False,0.438117


In [None]:
from torch.utils.data import Dataset

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

class FakeNewsDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.texts = dataframe['statement'].tolist()
        self.labels = dataframe['binary_label'].tolist()
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_len, return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

train_dataset = FakeNewsDataset(train_df, tokenizer)
valid_dataset = FakeNewsDataset(valid_df, tokenizer)
test_dataset  = FakeNewsDataset(test_df, tokenizer)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    report_to="none"
)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


  trainer = Trainer(


In [None]:
trainer.train()
trainer.evaluate()




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6117,0.642889,0.636009




Epoch,Training Loss,Validation Loss,Accuracy
1,0.6117,0.642889,0.636009
2,0.5221,0.6655,0.647701




{'eval_loss': 0.6655000448226929,
 'eval_accuracy': 0.6477007014809041,
 'eval_runtime': 348.2334,
 'eval_samples_per_second': 3.684,
 'eval_steps_per_second': 0.462,
 'epoch': 2.0}

In [None]:
# Evaluate on test data
predictions = trainer.predict(test_dataset)
y_true = predictions.label_ids
y_pred = np.argmax(predictions.predictions, axis=1)

print(classification_report(y_true, y_pred, target_names=["Fake", "True"]))




              precision    recall  f1-score   support

        Fake       0.63      0.48      0.55       553
        True       0.66      0.78      0.71       713

    accuracy                           0.65      1266
   macro avg       0.64      0.63      0.63      1266
weighted avg       0.65      0.65      0.64      1266



In [None]:
model.save_pretrained("./bert_fake_news_model")
tokenizer.save_pretrained("./bert_fake_news_model")


('./bert_fake_news_model/tokenizer_config.json',
 './bert_fake_news_model/special_tokens_map.json',
 './bert_fake_news_model/vocab.txt',
 './bert_fake_news_model/added_tokens.json')

In [None]:
#save model in drive
from google.colab import drive
drive.mount('/content/drive')

!cp -r /content/bert_fake_news_model /content/drive/MyDrive/bert_fake_news_model


Mounted at /content/drive


In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

bert_model_path = "./bert_fake_news_model"
bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
bert_model = BertForSequenceClassification.from_pretrained(bert_model_path)
bert_model.eval()


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [None]:
def bert_predict_label(text):
    inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256)
    with torch.no_grad():
        logits = bert_model(**inputs).logits
    predicted_class = torch.argmax(logits, dim=1).item()  # 0 = Fake, 1 = True
    return predicted_class


In [None]:
import pandas as pd

full_df = pd.read_csv("/content/clean_liar (1).csv")

#column for BERT predictions (for evaluation, not for training)
full_df["bert_pred"] = full_df["statement"].apply(bert_predict_label)

# T5 will train on true binary labels
def create_t5_input(row):
    label_text = "true" if row["binary_label"] == 1 else "fake"
    return (
        f"Explain why this news is labeled as {label_text}.\n"
        f"Statement: {row['statement']}\n"
        f"Speaker: {row.get('speaker', 'Unknown')}\n"
        f"Party: {row.get('party', 'Unknown')}\n"
        f"Subject: {row.get('subject', 'General')}"
    )

def create_t5_target(row):
    if row["binary_label"] == 1:
        return "This statement is true because it aligns with verified evidence or widely accepted facts."
    else:
        return "This statement is fake because it contradicts known facts, is unsupported by evidence, or misrepresents data."

full_df["input_text"] = full_df.apply(create_t5_input, axis=1)
full_df["target_text"] = full_df.apply(create_t5_target, axis=1)


In [None]:
from sklearn.model_selection import train_test_split
from datasets import Dataset

train_df, valid_df = train_test_split(full_df[["input_text", "target_text"]], test_size=0.1, random_state=42)

train_dataset = Dataset.from_pandas(train_df.reset_index(drop=True))
valid_dataset = Dataset.from_pandas(valid_df.reset_index(drop=True))


In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

model_name = "google/flan-t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def preprocess(examples):
    model_inputs = tokenizer(examples["input_text"], max_length=256, truncation=True, padding="max_length")
    labels = tokenizer(examples["target_text"], max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_train = train_dataset.map(preprocess, batched=True)
tokenized_valid = valid_dataset.map(preprocess, batched=True)


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/11507 [00:00<?, ? examples/s]

Map:   0%|          | 0/1279 [00:00<?, ? examples/s]

In [None]:
def get_explanation(statement, speaker=None, party=None, subject=None):
    # BERT prediction
    bert_label = bert_predict_label(statement)
    label_text = "true" if bert_label == 1 else "fake"

    # Create T5 prompt using BERT's decision
    prompt = (
        f"Explain why BERT classified this news as {label_text}.\n"
        f"Statement: {statement}\n"
        f"Speaker: {speaker or 'Unknown'}\n"
        f"Party: {party or 'Unknown'}\n"
        f"Subject: {subject or 'General'}"
    )

    # Step 3 → Generate explanation
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    output_ids = model.generate(inputs.input_ids, max_length=128, num_beams=5)
    explanation = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    return bert_label, explanation


In [None]:
model.save_pretrained('./t5_explanation_model')
tokenizer.save_pretrained('./t5_explanation_model')

('./t5_explanation_model/tokenizer_config.json',
 './t5_explanation_model/special_tokens_map.json',
 './t5_explanation_model/spiece.model',
 './t5_explanation_model/added_tokens.json')

In [None]:
!cp -r /content/t5_explanation_model /content/drive/MyDrive/t5_explanation_model

In [5]:
import os

#load saved bert model

bert_saved_path = "/content/drive/MyDrive/bert_fake_news_model"

if os.path.isdir(bert_saved_path):
    bert_tokenizer = BertTokenizer.from_pretrained(bert_saved_path)
    bert_model = BertForSequenceClassification.from_pretrained(bert_saved_path)
    bert_model.eval()
    print("Loaded BERT from", bert_saved_path)
else:
    print("No saved BERT at", bert_saved_path, "- you should fine-tune BERT first (see below).")

Loaded BERT from /content/drive/MyDrive/bert_fake_news_model


In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define bert_predict_label (from cell S6TXblXCb0jy)
def bert_predict_label(text):
    inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256).to(device)
    with torch.no_grad():
        logits = bert_model(**inputs).logits
    predicted_class = torch.argmax(logits, dim=1).item()  # 0 = Fake, 1 = True
    return predicted_class

# Create 'bert_pred' column (from cell NDEjL7vCb5Z4)
full_df["bert_pred"] = full_df["statement"].apply(bert_predict_label)

# Linguistic cues function (simple)
exaggeration_tokens = set(["always","never","everyone","nobody","everybody","completely","totally","all"])
def linguistic_cues(text):
    toks = [w.lower() for w in word_tokenize(str(text))]
    cues = {
        "num_tokens": sum(1 for t in toks if t.isdigit()),
        "has_exaggeration": int(any(token in exaggeration_tokens for token in toks)),
        "num_words": len(toks),
        "avg_word_len": np.mean([len(w) for w in toks]) if toks else 0
    }
    return cues

nltk.download('punkt_tab', quiet=True)
# small sample to speed things applied to full_df if you have compute
cues = [linguistic_cues(s) for s in tqdm(full_df['statement'].astype(str).tolist())]
cues_df = pd.DataFrame(cues)
full_df = pd.concat([full_df.reset_index(drop=True), cues_df], axis=1)

#  t5_input (include bert_pred textual)
def build_t5_input(row):
    pred_text = "true" if int(row['bert_pred']) == 1 else "fake"

    # credibility_score
    credibility_val = row.get('credibility_score')
    credibility_str = "N/A"
    if pd.notna(credibility_val):
        try:
            credibility_str = f"{float(credibility_val):.2f}"
        except (ValueError, TypeError):
            credibility_str = str(credibility_val)

    # linguistic cues
    avg_word_len_val = row.get('avg_word_len', 0)
    if isinstance(avg_word_len_val, pd.Series):
        avg_word_len_formatted = "N/A"
    else:
        try:
            avg_word_len_formatted = f"{float(avg_word_len_val):.2f}"
        except (ValueError, TypeError):
            avg_word_len_formatted = str(avg_word_len_val)

    num_words_val = row.get('num_words', 0)
    if isinstance(num_words_val, pd.Series):
        num_words_formatted = "N/A"
    else:
        try:
            num_words_formatted = f"{int(num_words_val)}"
        except (ValueError, TypeError):
            num_words_formatted = str(num_words_val)

    has_exaggeration_val = row.get('has_exaggeration', 0)
    if isinstance(has_exaggeration_val, pd.Series):
        has_exaggeration_formatted = "N/A"
    else:
        try:
            has_exaggeration_formatted = f"{int(has_exaggeration_val)}"
        except (ValueError, TypeError):
            has_exaggeration_formatted = str(has_exaggeration_val)

    return (
        f"Explain why the model predicted this statement as {pred_text}.\n"
        f"Statement: {row['statement']}\n"
        f"Subject: {row.get('subject','Unknown')}\n"
        f"Speaker: {row.get('speaker','Unknown')}\n"
        f"Party: {row.get('party','Unknown')}\n"
        f"Credibility: {credibility_str}\n"
        f"Linguistic cues: avg_word_len={avg_word_len_formatted}, num_words={num_words_formatted}, has_exag={has_exaggeration_formatted}"
    )

full_df['t5_input'] = full_df.apply(build_t5_input, axis=1)


  0%|          | 0/12786 [00:00<?, ?it/s]

In [20]:
def build_explanation_target(row):
    if int(row['bert_pred']) == 1:
        return (
            "The model predicted this as true. The claim uses factual, neutral language and lacks exaggerated wording. "
            "It matches linguistic and contextual patterns seen in verified statements, and the speaker's historical credibility "
            f"(score={row.get('credibility_score', np.nan):.2f}) supports plausibility in this context."
        )
    else:
        return (
            "The model predicted this as fake. The statement contains linguistic markers commonly found in misleading claims "
            "(e.g., exaggeration or unsupported numeric claims) and lacks referenced evidence. The speaker's history and the "
            f"topic context also reduce credibility (credibility={row.get('credibility_score', np.nan):.2f})."
        )

full_df['t5_target'] = full_df.apply(build_explanation_target, axis=1)
full_df[['t5_input','t5_target']].head(2)


Unnamed: 0,t5_input,t5_target
0,Explain why the model predicted this statement...,The model predicted this as true. The claim us...
1,Explain why the model predicted this statement...,The model predicted this as true. The claim us...


In [22]:

from datasets import Dataset
sample_frac = 0.5
train_df = full_df.sample(frac=sample_frac, random_state=42).reset_index(drop=True)

# split train/val
from sklearn.model_selection import train_test_split
t5_train, t5_val = train_test_split(train_df[['t5_input','t5_target']], test_size=0.1, random_state=42)

train_ds = Dataset.from_pandas(t5_train.reset_index(drop=True))
val_ds   = Dataset.from_pandas(t5_val.reset_index(drop=True))

model_name = "google/flan-t5-base"
t5_tokenizer = AutoTokenizer = None
from transformers import AutoTokenizer
t5_tokenizer = AutoTokenizer.from_pretrained(model_name)
t5_model = T5ForConditionalGeneration.from_pretrained(model_name)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [23]:
def preprocess_t5(batch):
    inputs = t5_tokenizer(batch['t5_input'], max_length=256, truncation=True, padding='max_length')
    labels = t5_tokenizer(batch['t5_target'], max_length=128, truncation=True, padding='max_length')
    inputs['labels'] = labels['input_ids']
    return inputs

tokenized_train = train_ds.map(preprocess_t5, batched=True)
tokenized_val   = val_ds.map(preprocess_t5, batched=True)


Map:   0%|          | 0/5753 [00:00<?, ? examples/s]

Map:   0%|          | 0/640 [00:00<?, ? examples/s]

In [24]:
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

training_args = Seq2SeqTrainingArguments(
    output_dir="./t5_explainer_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    predict_with_generate=True,
    logging_steps=50,
    eval_steps=200,
    save_total_limit=2,
    num_train_epochs=2,
    learning_rate=3e-5,
    fp16=torch.cuda.is_available()
)

trainer = Seq2SeqTrainer(
    model=t5_model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=t5_tokenizer,
)

trainer.train()
trainer.save_model("./t5_explainer_model")
t5_tokenizer.save_pretrained("./t5_explainer_model")
print("Saved T5 explainer at ./t5_explainer_model")
!cp -r /content/t5_explainer_model /content/drive/MyDrive/t5_explainer_model
print("saved T5 to drive")


  trainer = Seq2SeqTrainer(
  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mleonkanabu[0m ([33mleonkanabu-n-a[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
50,0.0
100,0.0
150,0.0
200,0.0
250,0.0
300,0.0
350,0.0
400,0.0
450,0.0
500,0.0


Saved T5 explainer at ./t5_explainer_model
saved T5 to drive


In [28]:
import nltk
from nltk.tokenize import word_tokenize

# Ensure nltk data is downloaded (if not already)
nltk.download('punkt', quiet=True)


t5_model.to(device)
bert_model.to(device)

def bert_predict(text):
    inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256).to(device)
    with torch.no_grad():
        outputs = bert_model(**inputs)
        logits = outputs.logits
    probabilities = torch.softmax(logits, dim=1)
    predicted_class = torch.argmax(probabilities, dim=1).item()
    confidence = torch.max(probabilities, dim=1).values.item()
    return predicted_class, confidence

# Linguistic cues function
exaggeration_tokens = set(["always","never","everyone","nobody","everybody","completely","totally","all"])
def linguistic_cues(text):
    toks = [w.lower() for w in word_tokenize(str(text))]
    cues = {
        "num_tokens": sum(1 for t in toks if t.isdigit()),
        "has_exaggeration": int(any(token in exaggeration_tokens for token in toks)),
        "num_words": len(toks),
        "avg_word_len": np.mean([len(w) for w in toks]) if toks else 0
    }
    return cues

def explain_by_pipeline(statement, speaker=None, party=None, subject=None, credibility_score=None):
    # BERT predict
    p, conf = bert_predict(statement)
    label_text = 'true' if p == 1 else 'fake'

    # Get linguistic cues for the statement
    cues = linguistic_cues(statement)

    # Handle credibility_score safely
    credibility_str = "N/A"
    if credibility_score is not None and pd.notna(credibility_score):
        try:
            credibility_str = f"{float(credibility_score):.2f}"
        except (ValueError, TypeError):
            credibility_str = str(credibility_score)

    # Format linguistic cues safely
    avg_word_len_formatted = f"{cues.get('avg_word_len', 0):.2f}"
    num_words_formatted = f"{int(cues.get('num_words', 0))}"
    has_exaggeration_formatted = f"{int(cues.get('has_exaggeration', 0))}"

    # Build prompt (matching the training input format)
    prompt = (
        f"Explain why the model predicted this statement as {label_text}.\n"
        f"Statement: {statement}\n"
        f"Subject: {subject or 'Unknown'}\n"
        f"Speaker: {speaker or 'Unknown'}\n"
        f"Party: {party or 'Unknown'}\n"
        f"Credibility: {credibility_str}\n"
        f"Linguistic cues: avg_word_len={avg_word_len_formatted}, num_words={num_words_formatted}, has_exag={has_exaggeration_formatted}"
    )

    inputs = t5_tokenizer(prompt, return_tensors='pt', truncation=True, padding=True, max_length=256).to(device)
    out = t5_model.generate(**inputs, max_length=256, num_beams=8, early_stopping=True)
    explanation = t5_tokenizer.decode(out[0], skip_special_tokens=True)
    return p, conf, explanation


samp_row = full_df.iloc[10]
samp_statement = samp_row['statement']
samp_speaker = samp_row.get('speaker')
samp_party = samp_row.get('party')
samp_subject = samp_row.get('subject')
samp_credibility = samp_row.get('credibility_score')

pred, conf, expl = explain_by_pipeline(
    samp_statement,
    speaker=samp_speaker,
    party=samp_party,
    subject=samp_subject,
    credibility_score=samp_credibility
)
print("Statement:", samp_statement)
print("BERT pred:", pred, "conf:", conf)
print("T5 explanation:", expl)


Statement: Since 2000, nearly 12 million Americans have slipped out of the middle class and into poverty.
BERT pred: 1 conf: 0.930569052696228
T5 explanation: num_words = 18, num_words = 18


In [29]:
def build_explanation_target(row):
    speaker = row.get('speaker', 'Unknown')
    party = row.get('party', 'Unknown')
    subject = row.get('subject', 'General')

    # Safely get and format credibility_score
    credibility_val = row.get('credibility_score')
    credibility_str = "N/A"
    if pd.notna(credibility_val):
        try:
            credibility_str = f"{float(credibility_val):.2f}"
        except (ValueError, TypeError):
            credibility_str = str(credibility_val)

    # Safely get and format linguistic cues
    avg_word_len_val = row.get('avg_word_len', np.nan)
    avg_word_len_formatted = "N/A"
    if pd.notna(avg_word_len_val):
        try:
            avg_word_len_formatted = f"{float(avg_word_len_val):.2f}"
        except (ValueError, TypeError):
            avg_word_len_formatted = str(avg_word_len_val)

    num_words_val = row.get('num_words', np.nan)
    num_words_formatted = "N/A"
    if pd.notna(num_words_val):
        try:
            num_words_formatted = f"{int(num_words_val)}"
        except (ValueError, TypeError):
            num_words_formatted = str(num_words_val)

    has_exaggeration_val = row.get('has_exaggeration', np.nan)
    has_exaggeration_formatted = "N/A"
    if pd.notna(has_exaggeration_val):
        try:
            has_exaggeration_formatted = "present" if int(has_exaggeration_val) == 1 else "absent"
        except (ValueError, TypeError):
            has_exaggeration_formatted = str(has_exaggeration_val)

    if int(row['bert_pred']) == 1: # True prediction
        return (
            f"The model predicted this as true. The statement by {speaker} (Party: {party}, Subject: {subject}) "
            f"uses factual, neutral language and lacks exaggerated wording ({has_exaggeration_formatted}). "
            f"Linguistic cues (avg word len: {avg_word_len_formatted}, num words: {num_words_formatted}) align with verified statements. "
            f"The speaker's historical credibility (score: {credibility_str}) supports plausibility in this context."
        )
    else: # Fake prediction
        return (
            f"The model predicted this as fake. The statement by {speaker} (Party: {party}, Subject: {subject}) "
            f"contains linguistic markers commonly found in misleading claims (e.g., exaggeration: {has_exaggeration_formatted}) "
            f"and lacks referenced evidence. Linguistic cues (avg word len: {avg_word_len_formatted}, num words: {num_words_formatted}) "
            f"and the speaker's history (credibility: {credibility_str}) reduce its credibility."
        )

full_df['t5_target'] = full_df.apply(build_explanation_target, axis=1)
full_df[['t5_input','t5_target']].head(2)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [30]:
def build_explanation_target(row):
    speaker = row.get('speaker', 'Unknown')
    party = row.get('party', 'Unknown')
    subject = row.get('subject', 'General')

    # Safely get and format credibility_score
    credibility_val = row.get('credibility_score', np.nan)
    credibility_str = "N/A"
    if not isinstance(credibility_val, pd.Series) and pd.notna(credibility_val):
        try:
            credibility_str = f"{float(credibility_val):.2f}"
        except (ValueError, TypeError):
            credibility_str = str(credibility_val)

    # Safely get and format linguistic cues
    avg_word_len_val = row.get('avg_word_len', np.nan)
    avg_word_len_formatted = "N/A"
    if not isinstance(avg_word_len_val, pd.Series) and pd.notna(avg_word_len_val):
        try:
            avg_word_len_formatted = f"{float(avg_word_len_val):.2f}"
        except (ValueError, TypeError):
            avg_word_len_formatted = str(avg_word_len_val)

    num_words_val = row.get('num_words', np.nan)
    num_words_formatted = "N/A"
    if not isinstance(num_words_val, pd.Series) and pd.notna(num_words_val):
        try:
            num_words_formatted = f"{int(num_words_val)}"
        except (ValueError, TypeError):
            num_words_formatted = str(num_words_val)

    has_exaggeration_val = row.get('has_exaggeration', np.nan)
    has_exaggeration_formatted = "N/A"
    if not isinstance(has_exaggeration_val, pd.Series) and pd.notna(has_exaggeration_val):
        try:
            has_exaggeration_formatted = "present" if int(has_exaggeration_val) == 1 else "absent"
        except (ValueError, TypeError):
            has_exaggeration_formatted = str(has_exaggeration_val)

    if int(row['bert_pred']) == 1: # True prediction
        return (
            f"The model predicted this as true. The statement by {speaker} (Party: {party}, Subject: {subject}) "
            f"uses factual, neutral language and lacks exaggerated wording ({has_exaggeration_formatted}). "
            f"Linguistic cues (avg word len: {avg_word_len_formatted}, num words: {num_words_formatted}) align with verified statements. "
            f"The speaker's historical credibility (score: {credibility_str}) supports plausibility in this context."
        )
    else: # Fake prediction
        return (
            f"The model predicted this as fake. The statement by {speaker} (Party: {party}, Subject: {subject}) "
            f"contains linguistic markers commonly found in misleading claims (e.g., exaggeration: {has_exaggeration_formatted}) "
            f"and lacks referenced evidence. Linguistic cues (avg word len: {avg_word_len_formatted}, num words: {num_words_formatted}) "
            f"and the speaker's history (credibility: {credibility_str}) reduce its credibility."
        )

full_df['t5_target'] = full_df.apply(build_explanation_target, axis=1)
full_df[['t5_input','t5_target']].head(2)

Unnamed: 0,t5_input,t5_target
0,Explain why the model predicted this statement...,The model predicted this as true. The statemen...
1,Explain why the model predicted this statement...,The model predicted this as true. The statemen...


In [31]:
sample_frac = 0.5   # use the same sample_frac as before
train_df = full_df.sample(frac=sample_frac, random_state=42).reset_index(drop=True)

# split train/val
t5_train, t5_val = train_test_split(train_df[['t5_input','t5_target']], test_size=0.1, random_state=42)

train_ds = Dataset.from_pandas(t5_train.reset_index(drop=True))
val_ds   = Dataset.from_pandas(t5_val.reset_index(drop=True))

def preprocess_t5(batch):
    inputs = t5_tokenizer(batch['t5_input'], max_length=256, truncation=True, padding='max_length')
    labels = t5_tokenizer(batch['t5_target'], max_length=128, truncation=True, padding='max_length')
    inputs['labels'] = labels['input_ids']
    return inputs

tokenized_train = train_ds.map(preprocess_t5, batched=True)
tokenized_val   = val_ds.map(preprocess_t5, batched=True)

trainer = Seq2SeqTrainer(
    model=t5_model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=t5_tokenizer,
)

trainer.train()
trainer.save_model("./t5_explainer_model_refined")
t5_tokenizer.save_pretrained("./t5_explainer_model_refined")
print("Saved refined T5 explainer at ./t5_explainer_model_refined")
!cp -r /content/t5_explainer_model_refined /content/drive/MyDrive/t5_explainer_model_refined
print("saved refined T5 to drive")

Map:   0%|          | 0/5753 [00:00<?, ? examples/s]

Map:   0%|          | 0/640 [00:00<?, ? examples/s]

  trainer = Seq2SeqTrainer(


Step,Training Loss
50,0.0
100,0.0
150,0.0
200,0.0
250,0.0
300,0.0
350,0.0
400,0.0
450,0.0
500,0.0


Saved refined T5 explainer at ./t5_explainer_model_refined
saved refined T5 to drive


In [32]:
samp_row = full_df.iloc[10]
samp_statement = samp_row['statement']
samp_speaker = samp_row.get('speaker')
samp_party = samp_row.get('party')
samp_subject = samp_row.get('subject')
samp_credibility = samp_row.get('credibility_score')

pred, conf, expl = explain_by_pipeline(
    samp_statement,
    speaker=samp_speaker,
    party=samp_party,
    subject=samp_subject,
    credibility_score=samp_credibility
)

print("Statement:", samp_statement)
print("BERT pred:", pred, "conf:", conf)
print(
    "T5 explanation (refined):",
    expl.replace(f" ({samp_credibility:.2f})", "") if pd.notna(samp_credibility) else expl
)

Statement: Since 2000, nearly 12 million Americans have slipped out of the middle class and into poverty.
BERT pred: 1 conf: 0.930569052696228
T5 explanation (refined): avg_word_len=4.39


**Reasoning**:
The previous output showed an incomplete T5 explanation, likely due to unintended string manipulation during printing. To accurately assess the quality of the refined explanations, I will remove the post-processing step from the print statement and display the raw output generated by the T5 model from the `explain_by_pipeline` function.



In [33]:
samp_row = full_df.iloc[10]
samp_statement = samp_row['statement']
samp_speaker = samp_row.get('speaker')
samp_party = samp_row.get('party')
samp_subject = samp_row.get('subject')
samp_credibility = samp_row.get('credibility_score')

pred, conf, expl = explain_by_pipeline(
    samp_statement,
    speaker=samp_speaker,
    party=samp_party,
    subject=samp_subject,
    credibility_score=samp_credibility
)

print("Statement:", samp_statement)
print("BERT pred:", pred, "conf:", conf)
print("T5 explanation (raw refined):", expl)


Statement: Since 2000, nearly 12 million Americans have slipped out of the middle class and into poverty.
BERT pred: 1 conf: 0.930569052696228
T5 explanation (raw refined): avg_word_len = 4.39


In [34]:
import os

# Load the refined T5 model and tokenizer
t5_model_path_refined = './t5_explainer_model_refined'

# Ensure the model is loaded from the refined path, otherwise reload if it's the old one.
if 't5_model' not in locals() or t5_model.config._name_or_path != t5_model_path_refined:
    if os.path.isdir(t5_model_path_refined):
        t5_tokenizer = T5Tokenizer.from_pretrained(t5_model_path_refined)
        t5_model = T5ForConditionalGeneration.from_pretrained(t5_model_path_refined)
        print("Loaded refined T5 model and tokenizer.")
    else:
        print("Refined T5 model not found. Please ensure it was saved correctly.")
else:
    print("Refined T5 model already loaded.")

t5_model.to(device)

# Redefine explain_by_pipeline with an increased max_length for generation
def explain_by_pipeline(statement, speaker=None, party=None, subject=None, credibility_score=None):
    # BERT predict
    p, conf = bert_predict(statement)
    label_text = 'true' if p == 1 else 'fake'

    # Get linguistic cues for the statement
    cues = linguistic_cues(statement)

    # Handle credibility_score safely
    credibility_str = "N/A"
    if credibility_score is not None and pd.notna(credibility_score):
        try:
            credibility_str = f"{float(credibility_score):.2f}"
        except (ValueError, TypeError):
            credibility_str = str(credibility_score)

    # Format linguistic cues safely
    avg_word_len_val = cues.get('avg_word_len', 0)
    avg_word_len_formatted = f"{avg_word_len_val:.2f}" if isinstance(avg_word_len_val, (int, float)) else str(avg_word_len_val)

    num_words_val = cues.get('num_words', 0)
    num_words_formatted = f"{int(num_words_val)}" if isinstance(num_words_val, (int, float)) else str(num_words_val)

    has_exaggeration_val = cues.get('has_exaggeration', 0)
    has_exaggeration_formatted = "present" if has_exaggeration_val == 1 else "absent"

    # Build prompt (matching the training input format)
    prompt = (
        f"Explain why the model predicted this statement as {label_text}.\n"
        f"Statement: {statement}\n"
        f"Subject: {subject or 'Unknown'}\n"
        f"Speaker: {speaker or 'Unknown'}\n"
        f"Party: {party or 'Unknown'}\n"
        f"Credibility: {credibility_str}\n"
        f"Linguistic cues: avg_word_len={avg_word_len_formatted}, num_words={num_words_formatted}, has_exag={has_exaggeration_formatted}"
    )

    inputs = t5_tokenizer(prompt, return_tensors='pt', truncation=True, padding=True, max_length=256).to(device)
    # Increased max_length in generate to allow for longer explanations
    out = t5_model.generate(**inputs, max_length=256, num_beams=8, early_stopping=True)
    explanation = t5_tokenizer.decode(out[0], skip_special_tokens=True)
    return p, conf, explanation

# Test again with a sample row
samp_row = full_df.iloc[10]
samp_statement = samp_row['statement']
samp_speaker = samp_row.get('speaker')
samp_party = samp_row.get('party')
samp_subject = samp_row.get('subject')
samp_credibility = samp_row.get('credibility_score')

pred, conf, expl = explain_by_pipeline(
    samp_statement,
    speaker=samp_speaker,
    party=samp_party,
    subject=samp_subject,
    credibility_score=samp_credibility
)

print("Statement:", samp_statement)
print("BERT pred:", pred, "conf:", conf)
print("T5 explanation (raw refined):")
print(expl)


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Loaded refined T5 model and tokenizer.
Statement: Since 2000, nearly 12 million Americans have slipped out of the middle class and into poverty.
BERT pred: 1 conf: 0.930569052696228
T5 explanation (raw refined):
avg_word_len = 4.39, num_words = 18, has_exag = absent


In [35]:
import os

# Load the refined T5 model and tokenizer
t5_model_path_refined = './t5_explainer_model_refined'

# Explicitly load the refined model and tokenizer
t5_tokenizer = T5Tokenizer.from_pretrained(t5_model_path_refined)
t5_model = T5ForConditionalGeneration.from_pretrained(t5_model_path_refined)
print("Loaded refined T5 model and tokenizer.")

t5_model.to(device)

# Redefine explain_by_pipeline with an increased max_length for generation
def explain_by_pipeline(statement, speaker=None, party=None, subject=None, credibility_score=None):
    # BERT predict
    p, conf = bert_predict(statement)
    label_text = 'true' if p == 1 else 'fake'

    # Get linguistic cues for the statement
    cues = linguistic_cues(statement)

    # Handle credibility_score safely
    credibility_str = "N/A"
    if credibility_score is not None and pd.notna(credibility_score):
        try:
            credibility_str = f"{float(credibility_score):.2f}"
        except (ValueError, TypeError):
            credibility_str = str(credibility_score)

    # Format linguistic cues safely
    avg_word_len_val = cues.get('avg_word_len', 0)
    avg_word_len_formatted = f"{avg_word_len_val:.2f}" if isinstance(avg_word_len_val, (int, float)) else str(avg_word_len_val)

    num_words_val = cues.get('num_words', 0)
    num_words_formatted = f"{int(num_words_val)}" if isinstance(num_words_val, (int, float)) else str(num_words_val)

    has_exaggeration_val = cues.get('has_exaggeration', 0)
    has_exaggeration_formatted = "present" if has_exaggeration_val == 1 else "absent"

    # Build prompt (matching the training input format)
    prompt = (
        f"Explain why the model predicted this statement as {label_text}.\n"
        f"Statement: {statement}\n"
        f"Subject: {subject or 'Unknown'}\n"
        f"Speaker: {speaker or 'Unknown'}\n"
        f"Party: {party or 'Unknown'}\n"
        f"Credibility: {credibility_str}\n"
        f"Linguistic cues: avg_word_len={avg_word_len_formatted}, num_words={num_words_formatted}, has_exag={has_exaggeration_formatted}"
    )

    inputs = t5_tokenizer(prompt, return_tensors='pt', truncation=True, padding=True, max_length=256).to(device)
    # Increased max_length in generate to allow for longer explanations
    out = t5_model.generate(**inputs, max_length=256, num_beams=8, early_stopping=True)
    explanation = t5_tokenizer.decode(out[0], skip_special_tokens=True)
    return p, conf, explanation

# Test again with a sample row
samp_row = full_df.iloc[10]
samp_statement = samp_row['statement']
samp_speaker = samp_row.get('speaker')
samp_party = samp_row.get('party')
samp_subject = samp_row.get('subject')
samp_credibility = samp_row.get('credibility_score')

pred, conf, expl = explain_by_pipeline(
    samp_statement,
    speaker=samp_speaker,
    party=samp_party,
    subject=samp_subject,
    credibility_score=samp_credibility
)

print("Statement:", samp_statement)
print("BERT pred:", pred, "conf:", conf)
print("T5 explanation (raw refined):")
print(expl)

Loaded refined T5 model and tokenizer.
Statement: Since 2000, nearly 12 million Americans have slipped out of the middle class and into poverty.
BERT pred: 1 conf: 0.930569052696228
T5 explanation (raw refined):
avg_word_len = 4.39, num_words = 18, has_exag = absent


In [36]:
sample_frac = 0.5   # use the same sample_frac as before
train_df = full_df.sample(frac=sample_frac, random_state=42).reset_index(drop=True)

# split train/val
t5_train, t5_val = train_test_split(train_df[['t5_input','t5_target']], test_size=0.1, random_state=42)

train_ds = Dataset.from_pandas(t5_train.reset_index(drop=True))
val_ds   = Dataset.from_pandas(t5_val.reset_index(drop=True))

# Redefine preprocess_t5 to use a larger max_length for labels
def preprocess_t5(batch):
    inputs = t5_tokenizer(batch['t5_input'], max_length=256, truncation=True, padding='max_length')
    labels = t5_tokenizer(batch['t5_target'], max_length=256, truncation=True, padding='max_length') # Increased max_length for labels
    inputs['labels'] = labels['input_ids']
    return inputs

tokenized_train = train_ds.map(preprocess_t5, batched=True)
tokenized_val   = val_ds.map(preprocess_t5, batched=True)

# Load T5 model (if not already loaded or to ensure it's reset for retraining)
model_name = "google/flan-t5-base"
t5_model = T5ForConditionalGeneration.from_pretrained(model_name)
t5_model.to(device) # Ensure model is on the correct device

training_args = Seq2SeqTrainingArguments(
    output_dir="./t5_explainer_model_re_refined", # New output directory for the re-trained model
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    predict_with_generate=True,
    logging_steps=50,
    eval_steps=200,
    save_total_limit=2,
    num_train_epochs=2,
    learning_rate=3e-5,
    fp16=torch.cuda.is_available()
)

trainer = Seq2SeqTrainer(
    model=t5_model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=t5_tokenizer,
)

trainer.train()
trainer.save_model("./t5_explainer_model_re_refined")
t5_tokenizer.save_pretrained("./t5_explainer_model_re_refined")
print("Saved re-refined T5 explainer at ./t5_explainer_model_re_refined")
!cp -r /content/t5_explainer_model_re_refined /content/drive/MyDrive/t5_explainer_model_re_refined
print("saved re-refined T5 to drive")

Map:   0%|          | 0/5753 [00:00<?, ? examples/s]

Map:   0%|          | 0/640 [00:00<?, ? examples/s]

  trainer = Seq2SeqTrainer(


Step,Training Loss
50,0.0
100,0.0
150,0.0
200,0.0
250,0.0
300,0.0
350,0.0
400,0.0
450,0.0
500,0.0


Saved re-refined T5 explainer at ./t5_explainer_model_re_refined
saved re-refined T5 to drive


In [37]:
import os

# Load the refined T5 model and tokenizer
t5_model_path_refined = './t5_explainer_model_re_refined'

# Explicitly load the refined model and tokenizer
t5_tokenizer = T5Tokenizer.from_pretrained(t5_model_path_refined)
t5_model = T5ForConditionalGeneration.from_pretrained(t5_model_path_refined)
print("Loaded refined T5 model and tokenizer.")

t5_model.to(device)

# Redefine explain_by_pipeline with an increased max_length for generation
def explain_by_pipeline(statement, speaker=None, party=None, subject=None, credibility_score=None):
    # BERT predict
    p, conf = bert_predict(statement)
    label_text = 'true' if p == 1 else 'fake'

    # Get linguistic cues for the statement
    cues = linguistic_cues(statement)

    # Handle credibility_score safely
    credibility_str = "N/A"
    if credibility_score is not None and pd.notna(credibility_score):
        try:
            credibility_str = f"{float(credibility_score):.2f}"
        except (ValueError, TypeError):
            credibility_str = str(credibility_score)

    # Format linguistic cues safely
    avg_word_len_val = cues.get('avg_word_len', 0)
    avg_word_len_formatted = f"{avg_word_len_val:.2f}" if isinstance(avg_word_len_val, (int, float)) else str(avg_word_len_val)

    num_words_val = cues.get('num_words', 0)
    num_words_formatted = f"{int(num_words_val)}" if isinstance(num_words_val, (int, float)) else str(num_words_val)

    has_exaggeration_val = cues.get('has_exaggeration', 0)
    has_exaggeration_formatted = "present" if has_exaggeration_val == 1 else "absent"

    # Build prompt (matching the training input format)
    prompt = (
        f"Explain why the model predicted this statement as {label_text}.\n"
        f"Statement: {statement}\n"
        f"Subject: {subject or 'Unknown'}\n"
        f"Speaker: {speaker or 'Unknown'}\n"
        f"Party: {party or 'Unknown'}\n"
        f"Credibility: {credibility_str}\n"
        f"Linguistic cues: avg_word_len={avg_word_len_formatted}, num_words={num_words_formatted}, has_exag={has_exaggeration_formatted}"
    )

    inputs = t5_tokenizer(prompt, return_tensors='pt', truncation=True, padding=True, max_length=256).to(device)
    # Increased max_length in generate to allow for longer explanations
    out = t5_model.generate(**inputs, max_length=256, num_beams=8, early_stopping=True)
    explanation = t5_tokenizer.decode(out[0], skip_special_tokens=True)
    return p, conf, explanation

# Test again with a sample row
samp_row = full_df.iloc[10]
samp_statement = samp_row['statement']
samp_speaker = samp_row.get('speaker')
samp_party = samp_row.get('party')
samp_subject = samp_row.get('subject')
samp_credibility = samp_row.get('credibility_score')

pred, conf, expl = explain_by_pipeline(
    samp_statement,
    speaker=samp_speaker,
    party=samp_party,
    subject=samp_subject,
    credibility_score=samp_credibility
)

print("Statement:", samp_statement)
print("BERT pred:", pred, "conf:", conf)
print("T5 explanation (raw refined):")
print(expl)

Loaded refined T5 model and tokenizer.
Statement: Since 2000, nearly 12 million Americans have slipped out of the middle class and into poverty.
BERT pred: 1 conf: 0.930569052696228
T5 explanation (raw refined):
avg_word_len = 4.39, num_words = 18, has_exag = absent


In [6]:
def extract_top_words(statement, top_n=5):
    # Tokenize input
    inputs = tokenizer(
        statement,
        return_tensors="pt",
        truncation=True,
        padding=True
    )

    # Get outputs with attentions
    with torch.no_grad():
        outputs = bert(**inputs)
        attentions = outputs.attentions   # list of layers

    # We take the last layer attention
    last_layer = attentions[-1]  # shape: (batch, heads, seq, seq)

    # Average across all heads
    avg_attention = last_layer.mean(dim=1)[0]  # shape: (seq, seq)

    # CLS attention to each token (how much CLS attends to token)
    cls_attention = avg_attention[0]  # shape: (seq,)

    # Convert tokens
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])

    # Reconstruct words: keep track of subwords
    word_scores = {}
    current_word = ""
    current_score_sum = 0
    current_token_count = 0

    for token, score in zip(tokens, cls_attention):
        token = token.replace("##", "")

        if token in ["[CLS]", "[SEP]", "[PAD]"]:
            continue


        if token.startswith("##"):
            current_word += token[2:]
        else:
            # Save previous word if exists
            if current_word:
                word_scores[current_word] = current_score_sum / max(1, current_token_count)

            # Start new word
            current_word = token
            current_score_sum = 0
            current_token_count = 0

        # Accumulate scores
        current_score_sum += float(score)
        current_token_count += 1

    # Final word
    if current_word:
        word_scores[current_word] = current_score_sum / max(1, current_token_count)

    # Sort by importance
    sorted_words = sorted(word_scores.items(), key=lambda x: x[1], reverse=True)

    # Return top N as comma-separated string
    top_words = [w for w, s in sorted_words[:top_n]]

    return ", ".join(top_words)


In [7]:
test_statement = "Climate change is a total hoax made up by corrupt scientists."
extract_top_words(test_statement, top_n=5)



'is, a, climate, corrupt, .'

In [4]:
def generate_target_explanation(text, label):
    if label == 1:
        return (
            "The statement is likely fake because it contains exaggeration, lacks verifiable "
            "evidence, or uses vague claims that cannot be fact-checked."
        )
    else:
        return (
            "The statement appears true because it contains factual details, specific evidence, "
            "or verifiable information."
        )


In [5]:
explain_df = pd.DataFrame()
explain_df["input_text"] = full_df.apply(
    lambda row: f"explain: {row['statement']} ||| label: {'fake' if row['binary_label']==1 else 'real'}",
    axis=1
)
explain_df["target_text"] = full_df.apply(
    lambda row: generate_target_explanation(row["statement"], row["binary_label"]),
    axis=1
)

explain_df.head()


Unnamed: 0,input_text,target_text
0,explain: When did the decline of coal start? I...,The statement is likely fake because it contai...
1,explain: Hillary Clinton agrees with John McCa...,The statement is likely fake because it contai...
2,explain: Health care reform legislation is lik...,The statement appears true because it contains...
3,explain: The economic turnaround started at th...,The statement is likely fake because it contai...
4,explain: The Chicago Bears have had more start...,The statement is likely fake because it contai...


In [6]:
from datasets import Dataset

explain_dataset = Dataset.from_pandas(explain_df)
explain_dataset = explain_dataset.train_test_split(test_size=0.1, seed=42)
explain_dataset


DatasetDict({
    train: Dataset({
        features: ['input_text', 'target_text'],
        num_rows: 11507
    })
    test: Dataset({
        features: ['input_text', 'target_text'],
        num_rows: 1279
    })
})

In [7]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

model_name = "t5-base"   # or "t5-small" if you need speed

explain_tokenizer = T5Tokenizer.from_pretrained(model_name)
explain_model = T5ForConditionalGeneration.from_pretrained(model_name)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [8]:
def tokenize(batch):
    inputs = explain_tokenizer(
        batch["input_text"],
        padding="max_length",
        truncation=True,
        max_length=256
    )

    targets = explain_tokenizer(
        batch["target_text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

    inputs["labels"] = targets["input_ids"]
    return inputs


In [9]:
tokenized_dataset = explain_dataset.map(tokenize, batched=True)


Map:   0%|          | 0/11507 [00:00<?, ? examples/s]

Map:   0%|          | 0/1279 [00:00<?, ? examples/s]

In [11]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./t5_explainer_model_re_refined2",
    eval_strategy="epoch",
    logging_strategy="steps",
    logging_steps=50,
    save_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    weight_decay=0.01,
    warmup_steps=100,
    learning_rate=3e-4,
    fp16=True,
)


In [12]:
trainer = Trainer(
    model=explain_model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
)

trainer.train()
trainer.save_model("./t5_explainer_model_re_refined2")
t5_tokenizer.save_pretrained("./t5_explainer_model_re_refined2")
print("Saved re-refined T5 explainer at ./t5_explainer_model_re_refined2")
!cp -r /content/t5_explainer_model_re_refined2 /content/drive/MyDrive/t5_explainer_model_re_refined2
print("saved re-refined T5 to drive")

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mleonkanabu[0m ([33mleonkanabu-n-a[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
1,0.0,1e-06
2,0.0,1e-06
3,0.0,


NameError: name 't5_tokenizer' is not defined

In [17]:
trainer.save_model("./t5_explainer_model_re_refined2")
explain_tokenizer.save_pretrained("./t5_explainer_model_re_refined2")
print("Saved re-refined T5 explainer at ./t5_explainer_model_re_refined2")

# Create the directory in Google Drive if it doesn't exist
!mkdir -p /content/drive/MyDrive/t5_explainer_model_re_refined2

!cp -r /content/t5_explainer_model_re_refined2 /content/drive/MyDrive/t5_explainer_model_re_refined2
print("saved re-refined T5 to drive")

Saved re-refined T5 explainer at ./t5_explainer_model_re_refined2
saved re-refined T5 to drive


In [36]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the re-refined T5 model and tokenizer directly for this function
t5_model_path_re_refined = './t5_explainer_model_re_refined2'
explain_tokenizer = T5Tokenizer.from_pretrained(t5_model_path_re_refined)
explain_model = T5ForConditionalGeneration.from_pretrained(t5_model_path_re_refined)
explain_model.to(device)

def generate_explanation(statement, pred_label):
    # Convert numeric → text label
    label = "fake" if pred_label == 1 else "real"

    prompt = (
        f"explain the classification.\n"
        f"statement: {statement}\n"
        f"classification: {label}\n\n"
        f"Give a detailed, human-like explanation."
    )

    # Tokenize
    inputs = explain_tokenizer(prompt, return_tensors="pt")

    # Ensure model + inputs on same device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    explain_model.to(device)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate
    with torch.no_grad():
        output_ids = explain_model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            num_beams=5,
            max_length=200,
            min_length=50,              # ensures real output
            temperature=0.8,
            top_p=0.9,
            do_sample=False,            # beam search only
            repetition_penalty=2.0
        )

    explanation = explain_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return explanation



In [37]:
sample = "Millions of people attended my rally yesterday!"
pred_label = 1  # fake

print(generate_explanation(sample, pred_label))


s


In [38]:
!ls t5_explainer_model_re_refined2


added_tokens.json  config.json		   special_tokens_map.json
checkpoint-2877    generation_config.json  spiece.model
checkpoint-5754    model.safetensors	   tokenizer_config.json
checkpoint-8631    runs			   training_args.bin


In [35]:
sample = "Millions of people attended my rally yesterday!"
pred_label = 1  # fake

print(generate_explanation(sample, pred_label))




In [39]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments
from torch.utils.data import Dataset
import torch, os, pandas as pd

# -----------------------
# LOAD DATASET
# -----------------------
df = pd.DataFrame([
    {"text": "Millions of people attended my rally yesterday!",
     "label": 1,
     "explanation": "Uses exaggeration: the claim about millions attending is extremely unlikely."},

    {"text": "The earth orbits around the sun.",
     "label": 0,
     "explanation": "Factually correct scientific statement about planetary motion."}
])

class ExplainDataset(Dataset):
    def __init__(self, df, tokenizer):
        self.df = df
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        input_text = f"explain label {row['label']}: {row['text']}"
        target = row["explanation"]

        enc = self.tokenizer(
            input_text, truncation=True, padding="max_length", max_length=128, return_tensors="pt"
        )
        target_enc = self.tokenizer(
            target, truncation=True, padding="max_length", max_length=64, return_tensors="pt"
        )

        enc = {k: v.squeeze() for k, v in enc.items()}
        enc["labels"] = target_enc["input_ids"].squeeze()
        return enc

# -----------------------
# MODEL + TOKENIZER
# -----------------------
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

dataset = ExplainDataset(df, tokenizer)


args = TrainingArguments(
    output_dir="t5_explainer",
    overwrite_output_dir=True,
    learning_rate=3e-4,
    per_device_train_batch_size=4,
    num_train_epochs=6,
    save_strategy="epoch",
)

trainer = Trainer(model=model, args=args, train_dataset=dataset)

trainer.train()


trainer.save_model("t5_explainer2")
tokenizer.save_pretrained("t5_explainer2")


print("T5 explainer saved successfully!")
!cp -r /content/t5_explainer2 /content/drive/MyDrive/t5_explainer2
print("saved re-refined T5 to drive")


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Step,Training Loss


T5 explainer saved successfully!
saved re-refined T5 to drive


In [41]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

tokenizer = T5Tokenizer.from_pretrained("t5_explainer2")
model = T5ForConditionalGeneration.from_pretrained("t5_explainer2")

def generate_explanation(text, label):
    input_text = f"explain label {label}: {text}"

    encoding = tokenizer(
        input_text,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=128
    )

    output = model.generate(
        input_ids=encoding["input_ids"],
        attention_mask=encoding["attention_mask"],
        max_length=80,
        num_beams=6,
        length_penalty=0.7
    )

    return tokenizer.decode(output[0], skip_special_tokens=True)


# TEST
sample = "Millions of people attended my rally yesterday!"
pred_label = 1  # fake news
print(generate_explanation(sample, pred_label))


Expliquez label 1: Millions of people attended my rally yesterday!
