In [7]:
import torch

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device

device(type='mps')

In [8]:
from transformers import AutoTokenizer

model_ckpt = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt, use_fast=True)

def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

In [9]:
from datasets import load_dataset

emotions = load_dataset("emotion")
emotions_encoded = emotions.map(tokenize, batched=True)

No config specified, defaulting to: emotion/split
Found cached dataset emotion (/Users/stefan/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)


  0%|          | 0/3 [00:00<?, ?it/s]

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [10]:
from transformers import AutoModelForSequenceClassification

num_labels = 6
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels=num_labels).to(device)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.bias', 'pre_classifier.we

In [11]:
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(1)
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return {"accuracy":acc, "f1":f1}

In [15]:
from transformers import Trainer, TrainingArguments

batch_size = 64
loggin_steps = len(emotions_encoded["train"])
model_name = f"{model_ckpt}-finetuned-emotion"
train_args = TrainingArguments(output_dir=model_name,
                               num_train_epochs=2,
                               learning_rate=2e-5,
                               per_device_train_batch_size=batch_size,
                               per_device_eval_batch_size=batch_size,
                               weight_decay=0.01,
                               evaluation_strategy="epoch",
                               disable_tqdm=False,
                               logging_steps=loggin_steps,
                               push_to_hub=False,
                               log_level="error",
                               use_mps_device=True
                              )

trainer = Trainer(model=model,
                  args=train_args,
                  compute_metrics=compute_metrics,
                  train_dataset=emotions_encoded["train"],
                  eval_dataset=emotions_encoded["validation"],
                  tokenizer=tokenizer)

print("Trainer device:",trainer.args.device)

trainer.train()

Trainer device: mps




  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.3014465868473053, 'eval_accuracy': 0.9125, 'eval_f1': 0.9100273321574164, 'eval_runtime': 3.5415, 'eval_samples_per_second': 564.737, 'eval_steps_per_second': 9.036, 'epoch': 1.0}


  0%|          | 0/32 [00:00<?, ?it/s]

{'eval_loss': 0.210562601685524, 'eval_accuracy': 0.93, 'eval_f1': 0.9299848058607493, 'eval_runtime': 2.6752, 'eval_samples_per_second': 747.614, 'eval_steps_per_second': 11.962, 'epoch': 2.0}
{'train_runtime': 216.2272, 'train_samples_per_second': 147.992, 'train_steps_per_second': 2.312, 'train_loss': 0.5355270385742188, 'epoch': 2.0}


TrainOutput(global_step=500, training_loss=0.5355270385742188, metrics={'train_runtime': 216.2272, 'train_samples_per_second': 147.992, 'train_steps_per_second': 2.312, 'train_loss': 0.5355270385742188, 'epoch': 2.0})