In [1]:
#imports
!pip install transformers datasets torch scikit-learn pandas matplotlib
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime

np.random.seed(42)



In [2]:
import os
os.environ["WANDB_DISABLED"] = "true"


In [3]:
df = pd.read_csv("Liar2_combined.csv", header = 0)

df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d', errors='coerce')
df = df.dropna(subset=['date'])

print(df.head())


   label                                              title       date
0      1  90 percent of Americans "support universal bac... 2017-10-02
1      0  Last year was one of the deadliest years ever ... 2017-05-19
2      0  Bernie Sanders's plan is "to raise your taxes ... 2015-10-28
3      1  Voter ID is supported by an overwhelming major... 2021-12-08
4      0  Says Barack Obama "robbed Medicare (of) $716 b... 2012-08-12


In [4]:
#Defining our Date Ranges
baseline_start, baseline_end = '2007-01-01', '2015-12-31'
update1_start, update1_end   = '2016-01-01', '2017-12-31'
update2_start, update2_end   = '2018-01-01', '2019-12-31'
update3_start, update3_end   = '2020-01-01', '2021-12-31'
update4_start, update4_end   = '2022-01-01', '2022-12-31'
test_start, test_end         = '2023-01-01', '2023-12-31'

# Baseline training set: entries w/ date <= split_date
baseline_df = df[(df['date'] >= baseline_start) & (df['date'] <= baseline_end)].copy()
update1_df = df[(df['date'] >= update1_start) & (df['date'] <= update1_end)].copy()
update2_df = df[(df['date'] >= update2_start) & (df['date'] <= update2_end)].copy()
update3_df = df[(df['date'] >= update3_start) & (df['date'] <= update3_end)].copy()
update4_df = df[(df['date'] >= update4_start) & (df['date'] <= update4_end)].copy()
test_df = df[(df['date'] >= test_start) & (df['date'] <= test_end)].copy()

# Display sample sizes for each block
print("Baseline samples:", len(baseline_df))
print("Update 1 samples:", len(update1_df))
print("Update 2 samples:", len(update2_df))
print("Update 3 samples:", len(update3_df))
print("Update 4 samples:", len(update4_df))
print("Test samples:", len(test_df))

Baseline samples: 10932
Update 1 samples: 3031
Update 2 samples: 2730
Update 3 samples: 3772
Update 4 samples: 1688
Test samples: 807


In [5]:
print("Baseline distribution:")
print(baseline_df['label'].value_counts())

Baseline distribution:
label
1    6147
0    4785
Name: count, dtype: int64


In [6]:
# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenization function
def tokenize_function(texts):
    return tokenizer(texts, padding="max_length", truncation=True, max_length=128)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [7]:
#Baseline Processing

baseline_texts = baseline_df['title'].tolist()
baseline_labels = baseline_df['label'].tolist()


baseline_encodings = tokenize_function(baseline_texts)

# Convert to Hugging Face Dataset format
baseline_dataset = Dataset.from_dict({
    "input_ids": baseline_encodings["input_ids"],
    "attention_mask": baseline_encodings["attention_mask"],
    "labels": baseline_labels,
})


In [8]:
# Test Dataset Processing

test_texts = test_df['title'].tolist()
test_labels = test_df['label'].tolist()


test_encodings = tokenize_function(test_texts)

# Convert to Hugging Face Dataset format
test_dataset = Dataset.from_dict({
    "input_ids": test_encodings["input_ids"],
    "attention_mask": test_encodings["attention_mask"],
    "labels": test_labels,
})


In [9]:
# Trainining Baseline Model

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

training_args = TrainingArguments(
    output_dir="./bert_baseline",
    run_name="baseline_training",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [10]:
# Training

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    return {"accuracy": acc}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=baseline_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.6847,0.55747,0.812887
2,0.5587,0.461265,0.790582
3,0.4562,0.510776,0.76456


TrainOutput(global_step=4101, training_loss=0.5561170930747316, metrics={'train_runtime': 907.9283, 'train_samples_per_second': 36.122, 'train_steps_per_second': 4.517, 'total_flos': 2157247542896640.0, 'train_loss': 0.5561170930747316, 'epoch': 3.0})

In [11]:
# Evaluate Baseline Model
baseline_results = trainer.evaluate()
print(f"Baseline Test Accuracy: {baseline_results['eval_accuracy']:.4f}")

baseline_predictions = trainer.predict(test_dataset).predictions
baseline_pred_labels = np.argmax(baseline_predictions, axis=1)

print(classification_report(test_labels, baseline_pred_labels, target_names=["Fake", "Real"]))


Baseline Test Accuracy: 0.7646
              precision    recall  f1-score   support

        Fake       0.95      0.77      0.85       705
        Real       0.32      0.75      0.44       102

    accuracy                           0.76       807
   macro avg       0.64      0.76      0.65       807
weighted avg       0.87      0.76      0.80       807



In [12]:
from transformers import BertForSequenceClassification

model.save_pretrained("fine_tuned_bert")
tokenizer.save_pretrained("fine_tuned_bert")


('fine_tuned_bert/tokenizer_config.json',
 'fine_tuned_bert/special_tokens_map.json',
 'fine_tuned_bert/vocab.txt',
 'fine_tuned_bert/added_tokens.json')

In [13]:
import shutil
shutil.make_archive('fine_tuned_bert_initial', 'zip', "fine_tuned_bert")

'/content/fine_tuned_bert_initial.zip'

# Continual Model Training and Evaluation

In [14]:
def prepare_update_dataset(df_subset):
    texts = df_subset['title'].tolist()
    labels = df_subset['label'].tolist()
    encodings = tokenize_function(texts)
    dataset = Dataset.from_dict({
        "input_ids": encodings["input_ids"],
        "attention_mask": encodings["attention_mask"],
        "labels": labels,
    })
    return dataset

In [15]:
# Training args for continual updates

update_training_args = TrainingArguments(
    output_dir="./bert_continual",
    run_name="continual_update",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [16]:
# Function to fine-tune the model on a given update dataset

def fine_tune_on_update(model, update_dataset, update_name):
    print(f"\n--- Fine-tuning on {update_name} ---")

    update_trainer = Trainer(
        model=model,
        args=update_training_args,
        train_dataset=update_dataset,
        eval_dataset=test_dataset,  # Evaluate on the unified test set
        compute_metrics=compute_metrics,
    )

    update_trainer.train()


    results = update_trainer.evaluate()
    print(f"{update_name} - Test Accuracy: {results['eval_accuracy']:.4f}")

    preds = update_trainer.predict(test_dataset).predictions
    pred_labels = np.argmax(preds, axis=1)
    print(classification_report(test_labels, pred_labels, target_names=["Fake", "Real"]))

    # Save model
    model_save_path = f"fine_tuned_bert_{update_name.replace(' ', '_').lower()}"
    model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)

    shutil.make_archive(model_save_path, 'zip', model_save_path)

    return model


In [17]:
# Updates

update1_dataset = prepare_update_dataset(update1_df)
update2_dataset = prepare_update_dataset(update2_df)
update3_dataset = prepare_update_dataset(update3_df)
update4_dataset = prepare_update_dataset(update4_df)

# Sequentially fine-tune
model = fine_tune_on_update(model, update1_dataset, "Update 1 (2016-2017)")
model = fine_tune_on_update(model, update2_dataset, "Update 2 (2018-2019)")
model = fine_tune_on_update(model, update3_dataset, "Update 3 (2020-2021)")
model = fine_tune_on_update(model, update4_dataset, "Update 4 (2022)")


--- Fine-tuning on Update 1 (2016-2017) ---


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6585,0.383663,0.815366
2,0.5723,0.390861,0.811648


Update 1 (2016-2017) - Test Accuracy: 0.8116
              precision    recall  f1-score   support

        Fake       0.96      0.81      0.88       705
        Real       0.38      0.79      0.52       102

    accuracy                           0.81       807
   macro avg       0.67      0.80      0.70       807
weighted avg       0.89      0.81      0.84       807


--- Fine-tuning on Update 2 (2018-2019) ---


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6182,0.285984,0.889715
2,0.4757,0.295047,0.864932


Update 2 (2018-2019) - Test Accuracy: 0.8649
              precision    recall  f1-score   support

        Fake       0.95      0.89      0.92       705
        Real       0.48      0.67      0.56       102

    accuracy                           0.86       807
   macro avg       0.71      0.78      0.74       807
weighted avg       0.89      0.86      0.87       807


--- Fine-tuning on Update 3 (2020-2021) ---


Epoch,Training Loss,Validation Loss,Accuracy
1,0.451,0.275917,0.876084
2,0.2973,0.280066,0.89715


Update 3 (2020-2021) - Test Accuracy: 0.8971
              precision    recall  f1-score   support

        Fake       0.94      0.94      0.94       705
        Real       0.60      0.58      0.59       102

    accuracy                           0.90       807
   macro avg       0.77      0.76      0.76       807
weighted avg       0.90      0.90      0.90       807


--- Fine-tuning on Update 4 (2022) ---


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4247,0.228538,0.903346
2,0.1609,0.310626,0.908302


Update 4 (2022) - Test Accuracy: 0.9083
              precision    recall  f1-score   support

        Fake       0.93      0.96      0.95       705
        Real       0.68      0.53      0.59       102

    accuracy                           0.91       807
   macro avg       0.80      0.75      0.77       807
weighted avg       0.90      0.91      0.90       807



In [18]:
# Final eval on the test set

final_results = trainer.evaluate()
print(f"\nFinal Updated Model Test Accuracy: {final_results['eval_accuracy']:.4f}")
final_preds = trainer.predict(test_dataset).predictions
final_pred_labels = np.argmax(final_preds, axis=1)
print(classification_report(test_labels, final_pred_labels, target_names=["Fake", "Real"]))


model.save_pretrained("fine_tuned_bert_continual")
tokenizer.save_pretrained("fine_tuned_bert_continual")



Final Updated Model Test Accuracy: 0.9083
              precision    recall  f1-score   support

        Fake       0.93      0.96      0.95       705
        Real       0.68      0.53      0.59       102

    accuracy                           0.91       807
   macro avg       0.80      0.75      0.77       807
weighted avg       0.90      0.91      0.90       807



('fine_tuned_bert_continual/tokenizer_config.json',
 'fine_tuned_bert_continual/special_tokens_map.json',
 'fine_tuned_bert_continual/vocab.txt',
 'fine_tuned_bert_continual/added_tokens.json')