In [1]:
from transformers import AutoTokenizer, DistilBertConfig, TrainingArguments
from adapters import AutoAdapterModel, AdapterTrainer
from datasets import load_dataset
from utils import compute_metrics

### Dataset

In [2]:
dataset = load_dataset('emotion') # same dataset as in "classification" experiments
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

### Model with adapter (bottle neck)

In [3]:
model_chckpt = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_chckpt)



In [4]:
def tokenize_batch(batch, tokenizer=tokenizer):
    return tokenizer(batch['text'], truncation=True, padding=True)

dataset = dataset.map(tokenize_batch, batched=True, batch_size=None)
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'attention_mask'],
        num_rows: 2000
    })
})

In [5]:
dataset.set_format('pytorch')

In [6]:
config = DistilBertConfig.from_pretrained(model_chckpt, num_labels=6)
model = AutoAdapterModel.from_pretrained(model_chckpt, config=config)

In [7]:
model.add_adapter('emotion', config='seq_bn')
model.add_classification_head('emotion', num_labels=6)
model.train_adapter('emotion')

In [8]:
train_ds = dataset['train']
val_ds = dataset['validation']

In [9]:
batch_size = 4
logging_steps = len(train_ds) // batch_size
num_train_epochs = 3
learning_rate = 1e-4

training_args = TrainingArguments(
    output_dir=f'{model_chckpt}_emotions_adapter',
    num_train_epochs=num_train_epochs,
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy='epoch',
    logging_steps=logging_steps,
    disable_tqdm=False
)

trainer = AdapterTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
)

trainer.train()

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss,F1 Macro,F1 Micro,Acc
1,0.5718,0.331729,0.88321,0.912,0.912
2,0.2965,0.264962,0.900038,0.9255,0.9255
3,0.2248,0.236595,0.896599,0.9275,0.9275


TrainOutput(global_step=12000, training_loss=0.3643390401204427, metrics={'train_runtime': 337.1525, 'train_samples_per_second': 142.369, 'train_steps_per_second': 35.592, 'total_flos': 1107322057728000.0, 'train_loss': 0.3643390401204427, 'epoch': 3.0})

In [10]:
trainer.evaluate()

{'eval_loss': 0.23659512400627136,
 'eval_f1_macro': 0.8965990768596228,
 'eval_f1_micro': 0.9275,
 'eval_acc': 0.9275,
 'eval_runtime': 6.527,
 'eval_samples_per_second': 306.42,
 'eval_steps_per_second': 76.605,
 'epoch': 3.0}