In [None]:
# BERT_FineTune.ipynb
# Fine-Tuning BERT for Truthseeker Dataset

from transformers import BertTokenizer, BertForSequenceClassification
from transformers import Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import torch
from datasets import Dataset

# Load and preprocess data
data = pd.read_csv('../data/Truth_Seeker_Model_Dataset.csv')
train_data, test_data = train_test_split(data, test_size=0.2, stratify=data['target'])

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def preprocess(data):
    return tokenizer(data['text_column'].tolist(), padding=True, truncation=True, max_length=512)

train_encodings = preprocess(train_data)
test_encodings = preprocess(test_data)

train_dataset = Dataset.from_dict({**train_encodings, 'labels': train_data['target'].tolist()})
test_dataset = Dataset.from_dict({**test_encodings, 'labels': test_data['target'].tolist()})

# Define the model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4)

# Training arguments
training_args = TrainingArguments(
    output_dir='../results/',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train the model
trainer.train()

# Evaluate the model
trainer.evaluate()