In [11]:
import torch
from datasets import Dataset
from transformers import (
    RobertaTokenizer, 
    RobertaForSequenceClassification,
    Trainer, 
    TrainingArguments
)
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

In [13]:
# Load and prepare datasets
train_df = pd.read_csv('datasets/imdb_dataset_sampled_300.csv')
test_df = pd.read_csv('datasets/imdb_dataset_test.csv')

In [25]:
# Convert sentiments to numbers (assuming binary sentiment)
label_map = {'positive': 1, 'negative': 0}
train_df['sentiment'] = train_df['sentiment'].map(label_map)
test_df['sentiment'] = test_df['sentiment'].map(label_map)

In [27]:
# Initialize tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

def tokenize_data(examples):
    return tokenizer(examples['text'], padding=True, truncation=True, max_length=512)

# Prepare datasets
train_dataset = Dataset.from_dict({
    'text': train_df['review'].tolist(),
    'label': train_df['sentiment'].tolist()
})
test_dataset = Dataset.from_dict({
    'text': test_df['review'].tolist(),
    'label': test_df['sentiment'].tolist()
})

train_dataset = train_dataset.map(tokenize_data, batched=True)
test_dataset = test_dataset.map(tokenize_data, batched=True)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [28]:
# Set format for PyTorch
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {'accuracy': accuracy_score(labels, predictions)}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

trainer.train()
test_results = trainer.evaluate()
print(f"Test results: {test_results}")



Epoch,Training Loss,Validation Loss,Accuracy
1,0.6943,0.691572,0.636
2,0.6929,0.685921,0.5273
3,0.4864,0.703203,0.6725


Test results: {'eval_loss': 0.6859205365180969, 'eval_accuracy': 0.5273, 'eval_runtime': 47.8941, 'eval_samples_per_second': 208.794, 'eval_steps_per_second': 26.099, 'epoch': 3.0}
