In [1]:
!pip install transformers==4.52.4 datasets==3.6.0 evaluate==0.4.3 huggingface-hub==0.32.6 --quiet
!pip install optuna --quiet
!pip install --upgrade scikit-learn --quiet



In [2]:
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification, TrainingArguments, Trainer
from datasets import load_from_disk
import evaluate
import os
import numpy as np


In [3]:
import zipfile

with zipfile.ZipFile("tokenised_asap_split.zip", 'r') as zip_ref:
    zip_ref.extractall("tokenised_asap_split")

print(os.listdir("tokenised_asap_split"))  # Should show the extracted folders


['__MACOSX', 'tokenised_asap_split']


In [4]:
dataset_path = "tokenised_asap_split/tokenised_asap_split"  # Adjust if different
dataset_dict = load_from_disk(dataset_path)

# Rename column for Trainer compatibility
dataset_dict = dataset_dict.rename_column("score_scaled", "labels")

print(dataset_dict)


DatasetDict({
    train: Dataset({
        features: ['essay', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 9651
    })
    validation: Dataset({
        features: ['essay', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 1206
    })
    test: Dataset({
        features: ['essay', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 1207
    })
})


In [5]:
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=1)
tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
training_args = TrainingArguments(
    output_dir="./results",
    report_to=[],
    eval_steps=500,           # Number of steps between evaluations (won't be used if no eval strategy)
    save_strategy="steps",    # Save checkpoint every `save_steps`
    save_steps=500,
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs"
    # No evaluation_strategy or load_best_model_at_end
)


In [7]:
mse = evaluate.load("mse")
r2 = evaluate.load("r_squared")

def compute_metrics(eval_pred):
    preds, labels = eval_pred
    preds = preds.squeeze()
    return {
        "mse": mse.compute(predictions=preds, references=labels)["mse"],
        "r2": r2.compute(predictions=preds, references=labels)["r_squared"]
    }


In [8]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_dict["train"],
    eval_dataset=dataset_dict["validation"],  # use validation split if available
    compute_metrics=compute_metrics,
)

#trainer.train()


In [None]:
import optuna
from transformers import RobertaForSequenceClassification, TrainingArguments, Trainer
import evaluate
import numpy as np

# Load metric once
mse = evaluate.load("mse")
r2 = evaluate.load("r_squared")

def compute_metrics(eval_pred):
    preds, labels = eval_pred
    preds = preds.squeeze()
    mse_val = np.mean((preds - labels) ** 2)
    r2_val = 1 - np.sum((labels - preds) ** 2) / np.sum((labels - np.mean(labels)) ** 2)
    return {
        "mse": mse_val,
        "r2": r2_val
    }


def objective(trial):
    # Sample hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-6, 5e-5, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    weight_decay = trial.suggest_float("weight_decay", 0.0, 0.3)
    num_train_epochs = trial.suggest_int("num_train_epochs", 2, 5)

    training_args = TrainingArguments(
        output_dir="./results",
        report_to=[],
        eval_steps=500,           # Number of steps between evaluations (won't be used if no eval strategy)
        save_strategy="steps",    # Save checkpoint every `save_steps`
        save_steps=500,
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=3,
        weight_decay=0.01,
        logging_dir="./logs"
        # No evaluation_strategy or load_best_model_at_end
    )

    model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=1)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset_dict["train"],
        eval_dataset=dataset_dict["validation"],
        compute_metrics=compute_metrics,
    )

    trainer.train()
    eval_result = trainer.evaluate()
    return eval_result["eval_mse"]


# Create study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=25)

print("Best hyperparameters: ", study.best_params)


[I 2025-06-11 16:09:40,412] A new study created in memory with name: no-name-d0b1becc-651c-44b1-bdd8-d193f8c2c1c6
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
