# ============================================================
## Finetune_transformer.ipynb
##  we fine-tune a pre-trained Transformer model (e.g., `roberta-base` or `bert-base-uncased`) for stance classification on the Gun Control dataset. The model learns to predict whether a tweet expresses *support* or *opposition* toward gun control.
# ============================================================


In [7]:
#Libraries 
import os
import numpy as np
import pandas as pd
from datasets import load_dataset, Dataset,load_from_disk
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import torch

seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

# path
path = "C:/Users/diego/Desktop/multimodal-argmining"
os.chdir(path)

# Model name
MODEL_NAME = "roberta-base"   # "bert-base-uncased"

In [8]:
#Load tokenized datasets
tokenized_dir = f"{path}/tokenized/{MODEL_NAME.replace('/', '_')}_maxlen105"

train_dataset = Dataset.load_from_disk(os.path.join(tokenized_dir, "train"))
dev_dataset = Dataset.load_from_disk(os.path.join(tokenized_dir, "dev"))

print(f"Train dataset loaded with samples: {len(train_dataset)}")
print(f"Dev dataset loaded with samples:: {len(dev_dataset)}")

Train dataset loaded with samples: 923
Dev dataset loaded with samples:: 100


In [9]:
#Load pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
# Now, we're going to define some metrics to compute
#Classic Metrics: Accuracy, F1, Precision and Recall
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    precision = precision_score(labels, preds, average="weighted")
    recall = recall_score(labels, preds, average="weighted")
    return {
        "accuracy": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall,
    }


In [11]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./models/roberta_finetuned",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    save_total_limit=2,
    report_to="none")

In [13]:
#Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
)

  trainer = Trainer(


In [14]:
# Fine-tune model
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5313,0.32656,0.85,0.848417,0.886885,0.85
2,0.2557,0.26412,0.91,0.909919,0.924727,0.91
3,0.1966,0.249167,0.91,0.909918,0.91002,0.91
4,0.1012,0.394904,0.91,0.909919,0.924727,0.91




TrainOutput(global_step=232, training_loss=0.27121250588318396, metrics={'train_runtime': 8987.7803, 'train_samples_per_second': 0.513, 'train_steps_per_second': 0.032, 'total_flos': 199214124454800.0, 'train_loss': 0.27121250588318396, 'epoch': 4.0})

In [15]:
#Evaluate on dev set
eval_results = trainer.evaluate()
print("\nEvaluation results:", eval_results)




Evaluation results: {'eval_loss': 0.26412034034729004, 'eval_accuracy': 0.91, 'eval_f1': 0.90991899189919, 'eval_precision': 0.9247272727272727, 'eval_recall': 0.91, 'eval_runtime': 39.954, 'eval_samples_per_second': 2.503, 'eval_steps_per_second': 0.175, 'epoch': 4.0}


In [16]:
# Small example 
example = "I believe stricter gun laws would make our communities safer."
inputs = tokenizer(example, return_tensors="pt")
outputs = model(**inputs)
pred = torch.argmax(outputs.logits).item()
label = "support" if pred == 0 else "oppose"
print(f"Example: {example}\nPrediction: {label}")

Example: I believe stricter gun laws would make our communities safer.
Prediction: oppose


In [17]:
# We save fine-tuned model and tokenizer
model.save_pretrained("./models/roberta_finetuned")
tokenizer.save_pretrained("./models/roberta_finetuned")

('./models/roberta_finetuned\\tokenizer_config.json',
 './models/roberta_finetuned\\special_tokens_map.json',
 './models/roberta_finetuned\\vocab.json',
 './models/roberta_finetuned\\merges.txt',
 './models/roberta_finetuned\\added_tokens.json',
 './models/roberta_finetuned\\tokenizer.json')