In [1]:
!pip install transformers datasets torch scikit-learn pandas faiss-cpu sentence-transformers

import os, zipfile
import pandas as pd
import torch
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from torch.utils.data import Dataset
from datasets import Dataset as HFDataset

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_6

In [2]:
# Build FAISS index from retrieval headlines CSV
headlines_df = pd.read_csv("live_data_retrieval_headlines.csv")
live_headlines = headlines_df['Headline'].dropna().tolist()

# Embed & normalize
retrieval_model = SentenceTransformer("all-MiniLM-L6-v2")
embs = retrieval_model.encode(live_headlines, convert_to_numpy=True, show_progress_bar=True)
faiss.normalize_L2(embs)

# Create IndexFlatIP and add vectors
d = embs.shape[1]
faiss_index = faiss.IndexFlatIP(d)
faiss_index.add(embs)

print(f"Built FAISS index with {faiss_index.ntotal} vectors")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/71 [00:00<?, ?it/s]

Built FAISS index with 2247 vectors


In [3]:
faiss.write_index(faiss_index, "faiss_live_headlines.index")
print("FAISS index saved as 'faiss_live_headlines.index'")

FAISS index saved as 'faiss_live_headlines.index'


In [4]:
# Load & preprocess live test dataset
test_df = pd.read_csv("politifact-sample.csv")
test_df = test_df.dropna(subset=['title','label'])
test_df['label'] = test_df['label'].astype(int)
test_df.reset_index(drop=True, inplace=True)
print(f"Loaded test set: {len(test_df)} samples")

Loaded test set: 279 samples


In [5]:
# Retrieval + Input‐prep functions & Dataset class
def search_similar_articles(query_headline, model, faiss_index, headlines, k=3):
    q = query_headline.strip().lower()
    q_emb = model.encode([q], convert_to_numpy=True)
    faiss.normalize_L2(q_emb)
    distances, indices = faiss_index.search(q_emb, k)
    results = []
    for rank, idx in enumerate(indices[0]):
        if idx < 0: continue
        results.append(f"{headlines[idx]} (dist: {distances[0][rank]:.4f})")
    return results

def prepare_input(article, facts, tokenizer, max_length=512):
    art_tokens = tokenizer.encode(article, add_special_tokens=False)
    input_ids = [tokenizer.cls_token_id] + art_tokens + [tokenizer.sep_token_id]
    tt = [0]*(len(art_tokens)+2)
    for fact in facts:
        f_tokens = tokenizer.encode(fact, add_special_tokens=False)
        input_ids += f_tokens + [tokenizer.sep_token_id]
        tt += [1]*(len(f_tokens)+1)
    am = [1]*len(input_ids)
    if len(input_ids)>max_length:
        input_ids, tt, am = input_ids[:max_length], tt[:max_length], am[:max_length]
    else:
        pad = max_length-len(input_ids)
        input_ids += [tokenizer.pad_token_id]*pad
        tt += [0]*pad
        am += [0]*pad
    return {
        'input_ids': torch.tensor(input_ids),
        'token_type_ids': torch.tensor(tt),
        'attention_mask': torch.tensor(am)
    }

class FakeNewsDataset(Dataset):
    def __init__(self, df, tokenizer, retrieval_model, faiss_index, headlines, max_length=512):
        self.df = df.reset_index(drop=True)
        self.tokenizer, self.rm, self.idx, self.hd = tokenizer, retrieval_model, faiss_index, headlines
        self.max_length = max_length
    def __len__(self): return len(self.df)
    def __getitem__(self, i):
        row = self.df.iloc[i]
        art, lbl = row['title'], row['label']
        facts = search_similar_articles(art, self.rm, self.idx, self.hd, k=3)
        enc = prepare_input(art, facts, self.tokenizer, self.max_length)
        enc['labels'] = torch.tensor(lbl, dtype=torch.long)
        return enc


In [6]:
# Unzip & load fine‐tuned RAG model
with zipfile.ZipFile("fine_tuned_bert_update_4_RAG.zip","r") as z:
    z.extractall("fine_tuned_bert_update_4_RAG")

model_dir = "fine_tuned_bert_update_4_RAG"
model = BertForSequenceClassification.from_pretrained(model_dir, num_labels=2)
tokenizer = BertTokenizer.from_pretrained(model_dir)


In [7]:
# Prepare dataset & define metrics
test_dataset = FakeNewsDataset(test_df, tokenizer, retrieval_model, faiss_index, live_headlines)

def compute_metrics(pred):
    logits, labels = pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc}

args = TrainingArguments(
    output_dir="./eval_live",
    per_device_eval_batch_size=8,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=args,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)


In [8]:
# Cell 8: Run evaluation
metrics = trainer.evaluate()
print("Evaluation metrics:", metrics)

from sklearn.metrics import classification_report
preds_out = trainer.predict(test_dataset)
y_true, y_pred = preds_out.label_ids, np.argmax(preds_out.predictions, axis=-1)
print(classification_report(y_true, y_pred, target_names=["Fake","Real"]))

Evaluation metrics: {'eval_loss': 0.356037974357605, 'eval_model_preparation_time': 0.0046, 'eval_accuracy': 0.921146953405018, 'eval_runtime': 10.0161, 'eval_samples_per_second': 27.855, 'eval_steps_per_second': 3.494}
              precision    recall  f1-score   support

        Fake       0.95      0.97      0.96       257
        Real       0.50      0.36      0.42        22

    accuracy                           0.92       279
   macro avg       0.72      0.67      0.69       279
weighted avg       0.91      0.92      0.92       279



In [9]:
counts = test_df['label'].value_counts().sort_index()
print(f"Number of Fake examples (label 0): {counts.get(0, 0)}")
print(f"Number of Real examples (label 1): {counts.get(1, 0)}")

Number of Fake examples (label 0): 257
Number of Real examples (label 1): 22


In [10]:
# CL Models Test
with zipfile.ZipFile("fine_tuned_bert_initial.zip", "r") as z:
    z.extractall("fine_tuned_bert_initial")

cl_model_dir = "fine_tuned_bert_initial"
cl_model = BertForSequenceClassification.from_pretrained(cl_model_dir, num_labels=2)
cl_tokenizer = BertTokenizer.from_pretrained(cl_model_dir)

In [11]:
## Prepare Test Dataset for CL Model

def tokenize_function(texts):
    return cl_tokenizer(texts, padding="max_length", truncation=True, max_length=128)

# Tokenize all test titles
test_texts = test_df['title'].tolist()
test_labels = test_df['label'].tolist()
test_encodings = tokenize_function(test_texts)

# Build HuggingFace Dataset
cl_test_dataset = HFDataset.from_dict({
    "input_ids": test_encodings["input_ids"],
    "attention_mask": test_encodings["attention_mask"],
    "labels": test_labels,
})

In [12]:
## Evaluate Continual‑Learning Models
cl_eval_args = TrainingArguments(
    output_dir="./cl_eval",
    per_device_eval_batch_size=16,
    report_to="none"
)

def compute_metrics_cl(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, preds)
    prec, rec, f1, _ = precision_recall_fscore_support(labels, preds, labels=[0,1])
    return {
        "accuracy": acc,
        "precision_fake": prec[0], "recall_fake": rec[0], "f1_fake": f1[0],
        "precision_real": prec[1], "recall_real": rec[1], "f1_real": f1[1]
    }

cl_trainer = Trainer(
    model=cl_model,
    args=cl_eval_args,
    eval_dataset=cl_test_dataset,
    compute_metrics=compute_metrics_cl
)

# Run evaluation
cl_metrics = cl_trainer.evaluate()
print("Continual‑Learning Model Metrics:", cl_metrics)

# Detailed classification report
cl_preds = cl_trainer.predict(cl_test_dataset)
y_true, y_pred = cl_preds.label_ids, np.argmax(cl_preds.predictions, axis=-1)
print("Continual Learning Baseline Period:")
print(classification_report(y_true, y_pred, target_names=["Fake","Real"]))


Continual‑Learning Model Metrics: {'eval_loss': 0.548371434211731, 'eval_model_preparation_time': 0.0028, 'eval_accuracy': 0.7562724014336918, 'eval_precision_fake': 0.9701492537313433, 'eval_recall_fake': 0.7587548638132295, 'eval_f1_fake': 0.851528384279476, 'eval_precision_real': 0.20512820512820512, 'eval_recall_real': 0.7272727272727273, 'eval_f1_real': 0.32, 'eval_runtime': 2.0289, 'eval_samples_per_second': 137.512, 'eval_steps_per_second': 8.872}
Continual Learning Baseline Period:
              precision    recall  f1-score   support

        Fake       0.97      0.76      0.85       257
        Real       0.21      0.73      0.32        22

    accuracy                           0.76       279
   macro avg       0.59      0.74      0.59       279
weighted avg       0.91      0.76      0.81       279



In [13]:
# Period 4 CL Model
with zipfile.ZipFile("fine_tuned_bert_update_4_(2022).zip", "r") as z:
    z.extractall("fine_tuned_bert_update_4_(2022)")

period4_cl_model_dir = "fine_tuned_bert_update_4_(2022)"
period4_cl_model = BertForSequenceClassification.from_pretrained(period4_cl_model_dir, num_labels=2)
period4_cl_tokenizer = BertTokenizer.from_pretrained(period4_cl_model_dir)


In [14]:
cl_p4_trainer = Trainer(
    model=period4_cl_model,
    args=cl_eval_args,
    eval_dataset=cl_test_dataset,
    compute_metrics=compute_metrics_cl
)

cl_eval_result = cl_p4_trainer.evaluate()
print(cl_eval_result)

cl_p4_preds = cl_p4_trainer.predict(cl_test_dataset)
y_true, y_pred = cl_p4_preds.label_ids, np.argmax(cl_p4_preds.predictions, axis=-1)
print("Continual Learning Period 4:")
print(classification_report(y_true, y_pred, target_names=["Fake","Real"]))

{'eval_loss': 0.26632776856422424, 'eval_model_preparation_time': 0.0029, 'eval_accuracy': 0.9139784946236559, 'eval_precision_fake': 0.9678714859437751, 'eval_recall_fake': 0.9377431906614786, 'eval_f1_fake': 0.9525691699604744, 'eval_precision_real': 0.4666666666666667, 'eval_recall_real': 0.6363636363636364, 'eval_f1_real': 0.5384615384615384, 'eval_runtime': 1.9374, 'eval_samples_per_second': 144.005, 'eval_steps_per_second': 9.291}
Continual Learning Period 4:
              precision    recall  f1-score   support

        Fake       0.97      0.94      0.95       257
        Real       0.47      0.64      0.54        22

    accuracy                           0.91       279
   macro avg       0.72      0.79      0.75       279
weighted avg       0.93      0.91      0.92       279

