In [1]:
from transformers import RobertaTokenizer
from datasets import Dataset
import evaluate
import pandas as pd
import numpy as np
from transformers import RobertaForSequenceClassification, Trainer, TrainingArguments
from transformers import EarlyStoppingCallback
from sklearn.model_selection import train_test_split
import sys
sys.path.append("../src")
import config

In [2]:
def run_roberta_pipeline(df_train, df_test, max_len):

    # --- Split test into val + test ---
    df_val, df_test = train_test_split(
        df_test, 
        test_size=0.5, 
        random_state=42, 
        stratify=df_test['label']
    )

    # Convert to HF Dataset
    train_dataset = Dataset.from_pandas(df_train)
    val_dataset   = Dataset.from_pandas(df_val)
    test_dataset  = Dataset.from_pandas(df_test)

    # Load Roberta tokenizer
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

    def tokenize_function(example):
        return tokenizer(
            example['text'],
            truncation=True,
            padding="max_length",
            max_length=max_len
        )

    train_dataset = train_dataset.map(tokenize_function, batched=True)
    val_dataset   = val_dataset.map(tokenize_function, batched=True)
    test_dataset  = test_dataset.map(tokenize_function, batched=True)

    # Rename label column
    train_dataset = train_dataset.rename_column("label", "labels")
    val_dataset   = val_dataset.rename_column("label", "labels")
    test_dataset  = test_dataset.rename_column("label", "labels")

    # Set HF format
    train_dataset.set_format(type="torch")
    val_dataset.set_format(type="torch")
    test_dataset.set_format(type="torch")

    # Load model
    model = RobertaForSequenceClassification.from_pretrained(
        "roberta-base",
        num_labels=2
    )

    accuracy = evaluate.load("accuracy")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        return accuracy.compute(predictions=predictions, references=labels)

    training_args = TrainingArguments(
        output_dir="./results",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=5e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=4,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        save_total_limit=2,
        logging_dir="./logs",
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
    )

    trainer.train()

    print("Validation:", trainer.evaluate())
    print("Test:", trainer.evaluate(eval_dataset=test_dataset))

IMDB - Roberta Model

In [3]:
df_train = pd.read_csv(config.IMDB_TRAIN_PATH)
df_test = pd.read_csv(config.IMDB_TEST_PATH)
max_len = 256

In [4]:
run_roberta_pipeline(
    df_train=df_train, 
    df_test=df_test,
    max_len=max_len
)

Map:   0%|          | 0/40000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2619,0.20944,0.9206
2,0.1892,0.216736,0.9286
3,0.1294,0.301949,0.9284
4,0.0732,0.322848,0.9304




Validation: {'eval_loss': 0.3228476941585541, 'eval_accuracy': 0.9304, 'eval_runtime': 27.9762, 'eval_samples_per_second': 178.723, 'eval_steps_per_second': 11.188, 'epoch': 4.0}




Test: {'eval_loss': 0.3106346130371094, 'eval_accuracy': 0.9336, 'eval_runtime': 32.3414, 'eval_samples_per_second': 154.601, 'eval_steps_per_second': 9.678, 'epoch': 4.0}


Rotten Tomatoes - Roberta Model

In [5]:
df_train = pd.read_csv(config.RT_TRAIN_PATH)
df_test = pd.read_csv(config.RT_TEST_PATH)
max_len = 128

In [6]:
run_roberta_pipeline(
    df_train=df_train, 
    df_test=df_test,
    max_len=max_len
)

Map:   0%|          | 0/8529 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

Map:   0%|          | 0/1067 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4588,0.318249,0.879925
2,0.3,0.384856,0.873358
3,0.2063,0.514336,0.877111




Validation: {'eval_loss': 0.31824928522109985, 'eval_accuracy': 0.8799249530956847, 'eval_runtime': 5.8695, 'eval_samples_per_second': 181.616, 'eval_steps_per_second': 11.415, 'epoch': 3.0}




Test: {'eval_loss': 0.3251127600669861, 'eval_accuracy': 0.8687910028116214, 'eval_runtime': 6.3847, 'eval_samples_per_second': 167.119, 'eval_steps_per_second': 10.494, 'epoch': 3.0}
