In [1]:
from datasets import load_dataset

dataset = load_dataset("liar", trust_remote_code=True)
print(dataset)



DatasetDict({
    train: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 10269
    })
    test: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 1283
    })
    validation: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 1284
    })
})


In [2]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("roberta-base")

def tokenize_function(example):
    return tokenizer(example["subject"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/1284 [00:00<?, ? examples/s]

In [3]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    save_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"]
)

trainer.train()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.292,0.281397
2,0.2799,0.279146


TrainOutput(global_step=3849, training_loss=0.2813442346181892, metrics={'train_runtime': 11352.3435, 'train_samples_per_second': 2.714, 'train_steps_per_second': 0.339, 'total_flos': 8100136950312960.0, 'train_loss': 0.2813442346181892, 'epoch': 2.997955010224949})

# Load the model in Python

In [4]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_path = "/Users/baaki/Documents/Project/FYP/.venv/my_finetuned_roberta_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [5]:
model.save_pretrained("my_finetuned_roberta_model")
tokenizer.save_pretrained("my_finetuned_roberta_model")

('my_finetuned_roberta_model/tokenizer_config.json',
 'my_finetuned_roberta_model/special_tokens_map.json',
 'my_finetuned_roberta_model/vocab.json',
 'my_finetuned_roberta_model/merges.txt',
 'my_finetuned_roberta_model/added_tokens.json',
 'my_finetuned_roberta_model/tokenizer.json')

# Load the model and test dataset

In [6]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer
from datasets import load_dataset

model_name = "roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

dataset = load_dataset("liar")
test_dataset = dataset["test"]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Get predictions on test data from the fine-tuned model

In [7]:
def tokenize_function(examples):
    return tokenizer(examples["subject"], padding="max_length", truncation=True)

tokenized_test = test_dataset.map(tokenize_function, batched=True)

trainer = Trainer(model=model)
predictions = trainer.predict(tokenized_test)

# Evaluate the model performance

In [8]:
from evaluate import load
import numpy as np

metric_acc = load("accuracy")
metric_f1 = load("f1")

preds = np.argmax(predictions.predictions, axis=-1)

accuracy = metric_acc.compute(predictions=preds, references=test_dataset["label"])
f1_score = metric_f1.compute(predictions=preds, references=test_dataset["label"], average="weighted")

print(f"Accuracy: {accuracy['accuracy']:.4f}")
print(f"F1 Score: {f1_score['f1']:.4f}")

Accuracy: 0.2081
F1 Score: 0.0717


# Check the unique labels 

In [10]:
import numpy as np

labels = test_dataset["label"]
unique_labels = np.unique(labels)
print("Unique labels:", unique_labels)
print("Number of classes:", len(unique_labels))

Unique labels: [0 1 2 3 4 5]
Number of classes: 6


# Analyze the results

In [12]:
from sklearn.metrics import confusion_matrix, classification_report

labels = test_dataset["label"]
cm = confusion_matrix(labels, preds)
report = classification_report(labels, preds, target_names=["Class 0", "Class 1", "Class 2", "Class 3", "Class 4", "Class 5"])

print("Confusion Matrix:\n", cm)
print("Classification Report:\n", report)

Confusion Matrix:
 [[  0 250   0   0   0   0]
 [  0 267   0   0   0   0]
 [  0 249   0   0   0   0]
 [  0 211   0   0   0   0]
 [  0 214   0   0   0   0]
 [  0  92   0   0   0   0]]
Classification Report:
               precision    recall  f1-score   support

     Class 0       0.00      0.00      0.00       250
     Class 1       0.21      1.00      0.34       267
     Class 2       0.00      0.00      0.00       249
     Class 3       0.00      0.00      0.00       211
     Class 4       0.00      0.00      0.00       214
     Class 5       0.00      0.00      0.00        92

    accuracy                           0.21      1283
   macro avg       0.03      0.17      0.06      1283
weighted avg       0.04      0.21      0.07      1283



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Test the model with real and unseen data

In [13]:
import torch
device = "mps" if torch.backends.mps.is_available() else "cpu"
model.to(device)

text = "This is a test sentence for hate speech detection."
inputs = tokenizer(text, return_tensors="pt").to(device)
output = model(**inputs)
print(output)

SequenceClassifierOutput(loss=None, logits=tensor([[0.0151, 0.3296]], device='mps:0', grad_fn=<LinearBackward0>), hidden_states=None, attentions=None)


# Covert the model into an API

In [None]:
from fastapi import FastAPI
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

app = FastAPI()

MODEL_PATH = "/Users/baaki/Documents/Project/FYP/.venv/my_finetuned_roberta_model"
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

device = "mps" if torch.backends.mps.is_available() else "cpu"
model.to(device)

@app.post("/predict/")
async def predict(text: str):
    inputs = tokenizer(text, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=-1).item()
    return {"prediction": prediction}
