In [None]:
import json
import torch
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, f1_score
import numpy as np

# Load the dataset
with open('/content/rumor_evidence_labels_ea_train', 'r') as f:
    train_data = json.load(f)

with open('/content/rumor_evidence_labels_ea_val', 'r') as f:
    val_data = json.load(f)

# Prepare texts and labels
rumor_texts = [item['rumor'] for item in train_data]
evidence_texts = [item['evidence'] for item in train_data]
labels = [item['label'] for item in train_data]

rumor_texts_val = [item['rumor'] for item in val_data]
evidence_texts_val = [item['evidence'] for item in val_data]
labels_val = [item['label'] for item in val_data]
# Combine rumor and evidence
combined_texts = [f"Rumor: {rumor} </s> Evidence: {evidence}" for rumor, evidence in zip(rumor_texts, evidence_texts)]
combined_texts_val = [f"Rumor: {rumor} </s> Evidence: {evidence}" for rumor, evidence in zip(rumor_texts_val, evidence_texts_val)]

# Convert labels to numerical format
label_to_id = {'SUPPORTS': 0, 'REFUTES': 1, 'NOT ENOUGH INFO': 2}
numerical_labels = [label_to_id[label] for label in labels]
numerical_labels_val = [label_to_id[label] for label in labels_val]

# Load XLM-RoBERTa tokenizer
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')

# Tokenization function
def tokenize_function(texts):
    return tokenizer(texts, padding=True, truncation=True, max_length=256, return_tensors="pt")

# Tokenize training and testing data
train_encodings = tokenize_function(combined_texts)
test_encodings = tokenize_function(combined_texts_val)

# Convert labels to torch tensors
train_labels = torch.tensor(numerical_labels)
test_labels = torch.tensor(numerical_labels_val)

# Create a custom dataset class
class RumorStanceDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

# Create datasets for training and testing
train_dataset = RumorStanceDataset(train_encodings, train_labels)
test_dataset = RumorStanceDataset(test_encodings, test_labels)

# Load the XLM-RoBERTa model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=3)


# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=10,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    logging_dir='./logs',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none"
)


def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)

    accuracy = accuracy_score(labels, preds)
    f1_micro = f1_score(labels, preds, average='micro')
    f1_macro = f1_score(labels, preds, average='macro')

    return {
        'accuracy': accuracy,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
    }


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


trainer.train()

# Save the fine-tuned model and tokenizer
model.save_pretrained('/content/fine-tuned-xlm-roberta_EA_64')
tokenizer.save_pretrained('/content/fine-tuned-xlm-roberta_EA_64')

# Evaluate the fine-tuned model
results = trainer.evaluate()
print(f"Results: {results}")
print(f"Accuracy: {results['eval_accuracy']}")
print(f"F1 Score (Micro): {results['eval_f1_micro']}")

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro
1,No log,0.837736,0.591837,0.591837,0.331016
2,No log,0.74021,0.729592,0.729592,0.504884
3,No log,0.579451,0.760204,0.760204,0.599421
4,No log,0.413739,0.836735,0.836735,0.741412
5,No log,0.791401,0.77551,0.77551,0.739236
6,No log,0.563228,0.806122,0.806122,0.693022
7,No log,0.841659,0.780612,0.780612,0.67549
8,No log,1.579825,0.683673,0.683673,0.692363
9,No log,0.604059,0.862245,0.862245,0.824593
10,No log,0.614878,0.852041,0.852041,0.8198


Results: {'eval_loss': 0.6040592193603516, 'eval_accuracy': 0.8622448979591837, 'eval_f1_micro': 0.8622448979591837, 'eval_f1_macro': 0.8245934511735729, 'eval_runtime': 2.144, 'eval_samples_per_second': 91.42, 'eval_steps_per_second': 1.866, 'epoch': 10.0}
Accuracy: 0.8622448979591837
F1 Score (Micro): 0.8622448979591837


In [None]:
!zip -r fine-tuned-xlm-roberta_EA_64.zip /content/fine-tuned-xlm-roberta_EA_64

  adding: content/fine-tuned-xlm-roberta_EA_64/ (stored 0%)
  adding: content/fine-tuned-xlm-roberta_EA_64/model.safetensors (deflated 31%)
  adding: content/fine-tuned-xlm-roberta_EA_64/tokenizer_config.json (deflated 77%)
  adding: content/fine-tuned-xlm-roberta_EA_64/tokenizer.json (deflated 76%)
  adding: content/fine-tuned-xlm-roberta_EA_64/sentencepiece.bpe.model (deflated 49%)
  adding: content/fine-tuned-xlm-roberta_EA_64/special_tokens_map.json (deflated 52%)
  adding: content/fine-tuned-xlm-roberta_EA_64/config.json (deflated 52%)
