In [None]:
import json
import torch
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, f1_score
import numpy as np

# Load the datasets train and val 
with open('/content/rumor_evidence_labels_english_train', 'r') as f:
    train_data = json.load(f)

with open('/content/rumor_evidence_labels_english_val', 'r') as f:
    val_data = json.load(f)

# Prepare texts and labels for train data
rumor_texts = [item['rumor'] for item in train_data]
evidence_texts = [item['evidence'] for item in train_data]
labels = [item['label'] for item in train_data]

# Prepare texts and labels for val data
rumor_texts_val = [item['rumor'] for item in val_data]
evidence_texts_val = [item['evidence'] for item in val_data]
labels_val = [item['label'] for item in val_data]

# Combine rumor and evidence
combined_texts = [f"Rumor: {rumor} [SEP] Evidence: {evidence}" for rumor, evidence in zip(rumor_texts, evidence_texts)]
combined_texts_val = [f"Rumor: {rumor} [SEP] Evidence: {evidence}" for rumor, evidence in zip(rumor_texts_val, evidence_texts_val)]

# Convert labels to numerical format
label_to_id = {'SUPPORTS': 0, 'REFUTES': 1, 'NOT ENOUGH INFO': 2}
numerical_labels = [label_to_id[label] for label in labels]
numerical_labels_val = [label_to_id[label] for label in labels_val]


# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('kornosk/bert-election2020-twitter-stance-biden-KE-MLM')


def tokenize_function(texts):
    return tokenizer(texts, padding=True, truncation=True, max_length=256, return_tensors="pt")

# Tokenize training and val data
train_encodings = tokenize_function(combined_texts)
test_encodings = tokenize_function(combined_texts_val)

# Convert labels to torch tensors
train_labels = torch.tensor(numerical_labels)
test_labels = torch.tensor(numerical_labels_val)

# Create a custom dataset class
class RumorStanceDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

# Create datasets for training and testing
train_dataset = RumorStanceDataset(train_encodings, train_labels)
test_dataset = RumorStanceDataset(test_encodings, test_labels)

# Load kemlm model
model = AutoModelForSequenceClassification.from_pretrained('kornosk/bert-election2020-twitter-stance-biden-KE-MLM', num_labels=3)


# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=10,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    logging_dir='./logs',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
)


def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)

    accuracy = accuracy_score(labels, preds)
    f1_micro = f1_score(labels, preds, average='micro')
    f1_macro = f1_score(labels, preds, average='macro')

    return {
        'accuracy': accuracy,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
    }


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


trainer.train()

# Save the fine-tuned model and tokenizer
model.save_pretrained('/content/fine-tuned-kemlm_English')
tokenizer.save_pretrained('/content/fine-tuned-kemlm_English')

# Evaluate the fine-tuned model
results = trainer.evaluate()
print(f"Results: {results}")
print(f"Accuracy: {results['eval_accuracy']}")
print(f"F1 Score (Micro): {results['eval_f1_micro']}")




Epoch,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro
1,No log,0.802015,0.660377,0.660377,0.409649
2,No log,0.69052,0.735849,0.735849,0.526455
3,No log,0.646315,0.792453,0.792453,0.592433
4,No log,0.658296,0.801887,0.801887,0.604299
5,No log,0.62039,0.801887,0.801887,0.604299
6,No log,0.661677,0.801887,0.801887,0.64958
7,No log,0.741698,0.792453,0.792453,0.660714
8,No log,0.757062,0.792453,0.792453,0.636815
9,No log,0.769532,0.801887,0.801887,0.64958
10,No log,0.774354,0.801887,0.801887,0.64958


Results: {'eval_loss': 0.6582958102226257, 'eval_accuracy': 0.8018867924528302, 'eval_f1_micro': 0.8018867924528302, 'eval_f1_macro': 0.6042985263324246, 'eval_runtime': 1.1003, 'eval_samples_per_second': 96.34, 'eval_steps_per_second': 1.818, 'epoch': 10.0}
Accuracy: 0.8018867924528302
F1 Score (Micro): 0.8018867924528302
