In [1]:
import numpy as np
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    EvalPrediction,
    DataCollatorWithPadding,
)
from datasets import load_dataset
from adapters import AdapterTrainer
from transformers import RobertaTokenizer

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

In [3]:
dataset = load_dataset("rotten_tomatoes")
dataset.num_rows


def preprocess_function(examples):
    # Tokenize the texts
    tokenized = tokenizer(
        examples["text"], truncation=True, padding="max_length", max_length=128
    )
    # Add labels (rename from 'label' if needed)
    tokenized["labels"] = examples["label"]
    return tokenized


tokenized_dataset = dataset.map(preprocess_function, batched=True)

# Convert to torch tensors and set format
tokenized_dataset.set_format(
    type="torch", columns=["input_ids", "attention_mask", "labels"]
)

In [4]:
from transformers import RobertaConfig
from adapters import AutoAdapterModel

config = RobertaConfig.from_pretrained(
    "roberta-base",
    num_labels=2,
)
model = AutoAdapterModel.from_pretrained(
    "roberta-base",
    config=config,
)

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['heads.default.3.bias', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# Add a new adapter
#model.add_adapter("rotten_tomatoes", config="seq_bn")
# Alternatively, e.g.:
model.add_adapter("rotten_tomatoes", config="lora")

# Add a matching classification head
model.add_classification_head(
    "rotten_tomatoes", num_labels=2, id2label={0: "👎", 1: "👍"}
)

# Activate the adapter
model.train_adapter("rotten_tomatoes")

There are adapters available but none are activated for the forward pass.


In [6]:
# 📌 Cell 6: Define training arguments
training_args = TrainingArguments(
    output_dir="./training_output",
    learning_rate=1e-4,
    num_train_epochs=6,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    logging_steps=200,
    overwrite_output_dir=True,
    remove_unused_columns=False,
)


In [7]:
def compute_accuracy(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    return {"acc": (preds == p.label_ids).mean()}

In [8]:
# 📌 Cell 8: Create data collator (to handle padding)
data_collator = DataCollatorWithPadding(tokenizer)

In [9]:
trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_accuracy,
)

In [10]:
trainer.train()

Step,Training Loss
200,0.5567
400,0.3328
600,0.3161
800,0.2944
1000,0.2845
1200,0.2765
1400,0.2848
1600,0.2706


TrainOutput(global_step=1602, training_loss=0.3270753257432383, metrics={'train_runtime': 1001.0829, 'train_samples_per_second': 51.125, 'train_steps_per_second': 1.6, 'total_flos': 3426561839508480.0, 'train_loss': 0.3270753257432383, 'epoch': 6.0})

In [11]:
trainer.evaluate()

{'eval_loss': 0.2810742259025574,
 'eval_acc': 0.8808630393996247,
 'eval_runtime': 11.4932,
 'eval_samples_per_second': 92.751,
 'eval_steps_per_second': 2.958,
 'epoch': 6.0}

In [12]:
from transformers import TextClassificationPipeline

classifier = TextClassificationPipeline(
    model=model, tokenizer=tokenizer, device=training_args.device.index
)

classifier("This is awesome!")

Device set to use cuda:0
The model 'RobertaAdapterModel' is not supported for . Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DiffLlamaForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceClassification', 'ErnieForSequenceClassification', 'ErnieMForSequenceClassification', 'EsmForSequenceClassification', 'FalconForSequenceClassification', 'FlaubertForSequenceClassification', 'FNetForSequenceClassification', 'FunnelForSequenceClassification', 'Ge

[{'label': '👍', 'score': 0.9882262349128723}]

In [13]:
model.save_adapter("./final_adapter", "rotten_tomatoes")

!ls -lh final_adapter

'ls' is not recognized as an internal or external command,
operable program or batch file.
