In [12]:
!pip install transformers datasets torch scikit-learn pandas faiss-cpu sentence-transformers

import os, zipfile
import pandas as pd
import torch
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from torch.utils.data import Dataset
from datasets import Dataset as HFDataset



In [5]:
# Build FAISS index from retrieval headlines CSV
headlines_df = pd.read_csv("live_data_retrieval_headlines.csv")
live_headlines = headlines_df['Headline'].dropna().tolist()

# Embed & normalize
retrieval_model = SentenceTransformer("all-MiniLM-L6-v2")
embs = retrieval_model.encode(live_headlines, convert_to_numpy=True, show_progress_bar=True)
faiss.normalize_L2(embs)

# Create IndexFlatIP and add vectors
d = embs.shape[1]
faiss_index = faiss.IndexFlatIP(d)
faiss_index.add(embs)

print(f"Built FAISS index with {faiss_index.ntotal} vectors")

Batches:   0%|          | 0/71 [00:00<?, ?it/s]

Built FAISS index with 2247 vectors


In [6]:
faiss.write_index(faiss_index, "faiss_live_headlines.index")
print("FAISS index saved as 'faiss_live_headlines.index'")

FAISS index saved as 'faiss_live_headlines.index'


In [2]:
# Load & preprocess live test dataset
test_df = pd.read_csv("politifact-sample.csv")
test_df = test_df.dropna(subset=['title','label'])
test_df['label'] = test_df['label'].astype(int)
test_df.reset_index(drop=True, inplace=True)
print(f"Loaded test set: {len(test_df)} samples")

Loaded test set: 2777 samples


In [8]:
# Retrieval + Input‐prep functions & Dataset class
def search_similar_articles(query_headline, model, faiss_index, headlines, k=3):
    q = query_headline.strip().lower()
    q_emb = model.encode([q], convert_to_numpy=True)
    faiss.normalize_L2(q_emb)
    distances, indices = faiss_index.search(q_emb, k)
    results = []
    for rank, idx in enumerate(indices[0]):
        if idx < 0: continue
        results.append(f"{headlines[idx]} (dist: {distances[0][rank]:.4f})")
    return results

def prepare_input(article, facts, tokenizer, max_length=512):
    art_tokens = tokenizer.encode(article, add_special_tokens=False)
    input_ids = [tokenizer.cls_token_id] + art_tokens + [tokenizer.sep_token_id]
    tt = [0]*(len(art_tokens)+2)
    for fact in facts:
        f_tokens = tokenizer.encode(fact, add_special_tokens=False)
        input_ids += f_tokens + [tokenizer.sep_token_id]
        tt += [1]*(len(f_tokens)+1)
    am = [1]*len(input_ids)
    if len(input_ids)>max_length:
        input_ids, tt, am = input_ids[:max_length], tt[:max_length], am[:max_length]
    else:
        pad = max_length-len(input_ids)
        input_ids += [tokenizer.pad_token_id]*pad
        tt += [0]*pad
        am += [0]*pad
    return {
        'input_ids': torch.tensor(input_ids),
        'token_type_ids': torch.tensor(tt),
        'attention_mask': torch.tensor(am)
    }

class FakeNewsDataset(Dataset):
    def __init__(self, df, tokenizer, retrieval_model, faiss_index, headlines, max_length=512):
        self.df = df.reset_index(drop=True)
        self.tokenizer, self.rm, self.idx, self.hd = tokenizer, retrieval_model, faiss_index, headlines
        self.max_length = max_length
    def __len__(self): return len(self.df)
    def __getitem__(self, i):
        row = self.df.iloc[i]
        art, lbl = row['title'], row['label']
        facts = search_similar_articles(art, self.rm, self.idx, self.hd, k=3)
        enc = prepare_input(art, facts, self.tokenizer, self.max_length)
        enc['labels'] = torch.tensor(lbl, dtype=torch.long)
        return enc


In [9]:
# Unzip & load fine‐tuned RAG model
with zipfile.ZipFile("fine_tuned_bert_update_4_RAG.zip","r") as z:
    z.extractall("fine_tuned_bert_update_4_RAG")

model_dir = "fine_tuned_bert_update_4_RAG"
model = BertForSequenceClassification.from_pretrained(model_dir, num_labels=2)
tokenizer = BertTokenizer.from_pretrained(model_dir)


In [10]:
# Prepare dataset & define metrics
test_dataset = FakeNewsDataset(test_df, tokenizer, retrieval_model, faiss_index, live_headlines)

def compute_metrics(pred):
    logits, labels = pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc}

args = TrainingArguments(
    output_dir="./eval_live",
    per_device_eval_batch_size=8,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=args,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)


In [11]:
# Cell 8: Run evaluation
metrics = trainer.evaluate()
print("Evaluation metrics:", metrics)

from sklearn.metrics import classification_report
preds_out = trainer.predict(test_dataset)
y_true, y_pred = preds_out.label_ids, np.argmax(preds_out.predictions, axis=-1)
print(classification_report(y_true, y_pred, target_names=["Fake","Real"]))

Evaluation metrics: {'eval_loss': 0.3867934048175812, 'eval_model_preparation_time': 0.0057, 'eval_accuracy': 0.9150162045372704, 'eval_runtime': 104.2114, 'eval_samples_per_second': 26.648, 'eval_steps_per_second': 3.339}
              precision    recall  f1-score   support

        Fake       0.94      0.97      0.95      2512
        Real       0.58      0.39      0.47       265

    accuracy                           0.92      2777
   macro avg       0.76      0.68      0.71      2777
weighted avg       0.90      0.92      0.91      2777



In [12]:
counts = test_df['label'].value_counts().sort_index()
print(f"Number of Fake examples (label 0): {counts.get(0, 0)}")
print(f"Number of Real examples (label 1): {counts.get(1, 0)}")

Number of Fake examples (label 0): 2512
Number of Real examples (label 1): 265


In [7]:
# CL Models Test
with zipfile.ZipFile("fine_tuned_bert_initial.zip", "r") as z:
    z.extractall("fine_tuned_bert_initial")

cl_model_dir = "fine_tuned_bert_initial"
cl_model = BertForSequenceClassification.from_pretrained(cl_model_dir, num_labels=2)
cl_tokenizer = BertTokenizer.from_pretrained(cl_model_dir)

In [10]:
## Prepare Test Dataset for CL Model

def tokenize_function(texts):
    return cl_tokenizer(texts, padding="max_length", truncation=True, max_length=128)

# Tokenize all test titles
test_texts = test_df['title'].tolist()
test_labels = test_df['label'].tolist()
test_encodings = tokenize_function(test_texts)

# Build HuggingFace Dataset
cl_test_dataset = HFDataset.from_dict({
    "input_ids": test_encodings["input_ids"],
    "attention_mask": test_encodings["attention_mask"],
    "labels": test_labels,
})

In [13]:
## Evaluate Continual‑Learning Models
cl_eval_args = TrainingArguments(
    output_dir="./cl_eval",
    per_device_eval_batch_size=16,
    report_to="none"
)

def compute_metrics_cl(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, preds)
    prec, rec, f1, _ = precision_recall_fscore_support(labels, preds, labels=[0,1])
    return {
        "accuracy": acc,
        "precision_fake": prec[0], "recall_fake": rec[0], "f1_fake": f1[0],
        "precision_real": prec[1], "recall_real": rec[1], "f1_real": f1[1]
    }

cl_trainer = Trainer(
    model=cl_model,
    args=cl_eval_args,
    eval_dataset=cl_test_dataset,
    compute_metrics=compute_metrics_cl
)

# Run evaluation
cl_metrics = cl_trainer.evaluate()
print("Continual‑Learning Model Metrics:", cl_metrics)

# Detailed classification report
cl_preds = cl_trainer.predict(cl_test_dataset)
y_true, y_pred = cl_preds.label_ids, np.argmax(cl_preds.predictions, axis=-1)
print("Continual Learning Baseline Period:")
print(classification_report(y_true, y_pred, target_names=["Fake","Real"]))


Continual‑Learning Model Metrics: {'eval_loss': 0.4858787953853607, 'eval_model_preparation_time': 0.0066, 'eval_accuracy': 0.7850198055455527, 'eval_precision_fake': 0.9601153291686689, 'eval_recall_fake': 0.7953821656050956, 'eval_f1_fake': 0.8700195950359242, 'eval_precision_real': 0.2614942528735632, 'eval_recall_real': 0.6867924528301886, 'eval_f1_real': 0.37877211238293446, 'eval_runtime': 21.9764, 'eval_samples_per_second': 126.363, 'eval_steps_per_second': 7.918}
Continual Learning Baseline Period:
              precision    recall  f1-score   support

        Fake       0.96      0.80      0.87      2512
        Real       0.26      0.69      0.38       265

    accuracy                           0.79      2777
   macro avg       0.61      0.74      0.62      2777
weighted avg       0.89      0.79      0.82      2777



In [14]:
# Period 4 CL Model
with zipfile.ZipFile("fine_tuned_bert_update_4_(2022).zip", "r") as z:
    z.extractall("fine_tuned_bert_update_4_(2022)")

period4_cl_model_dir = "fine_tuned_bert_update_4_(2022)"
period4_cl_model = BertForSequenceClassification.from_pretrained(period4_cl_model_dir, num_labels=2)
period4_cl_tokenizer = BertTokenizer.from_pretrained(period4_cl_model_dir)


In [16]:
cl_p4_trainer = Trainer(
    model=period4_cl_model,
    args=cl_eval_args,
    eval_dataset=cl_test_dataset,
    compute_metrics=compute_metrics_cl
)

cl_eval_result = cl_p4_trainer.evaluate()
print(cl_eval_result)

cl_p4_preds = cl_p4_trainer.predict(cl_test_dataset)
y_true, y_pred = cl_p4_preds.label_ids, np.argmax(cl_p4_preds.predictions, axis=-1)
print("Continual Learning Period 4:")
print(classification_report(y_true, y_pred, target_names=["Fake","Real"]))

{'eval_loss': 0.2675170302391052, 'eval_model_preparation_time': 0.0028, 'eval_accuracy': 0.9146561037090385, 'eval_precision_fake': 0.9533678756476683, 'eval_recall_fake': 0.9522292993630573, 'eval_f1_fake': 0.9527982473610834, 'eval_precision_real': 0.5522388059701493, 'eval_recall_real': 0.5584905660377358, 'eval_f1_real': 0.5553470919324578, 'eval_runtime': 20.0882, 'eval_samples_per_second': 138.241, 'eval_steps_per_second': 8.662}
Continual Learning Period 4:
              precision    recall  f1-score   support

        Fake       0.95      0.95      0.95      2512
        Real       0.55      0.56      0.56       265

    accuracy                           0.91      2777
   macro avg       0.75      0.76      0.75      2777
weighted avg       0.92      0.91      0.91      2777

