In [1]:
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
raw_datasets = load_dataset("swag", None)

No config specified, defaulting to: swag/regular
Reusing dataset swag (/home/jacky/.cache/huggingface/datasets/swag/regular/0.0.0/9640de08cdba6a1469ed3834fcab4b8ad8e38caf5d1ba5e7436d8b1fd067ad4c)
100%|██████████| 3/3 [00:00<00:00, 60.59it/s]


In [3]:
raw_datasets["test"][0]

{'video-id': 'anetv_pIUpJihiju0',
 'fold-ind': '11871',
 'startphrase': 'A person shows the bottom of a large dust mop. Then, the person',
 'sent1': 'A person shows the bottom of a large dust mop.',
 'sent2': 'Then, the person',
 'gold-source': 'gold',
 'ending0': 'places the scrub on the floor then cleans the soap evenly.',
 'ending1': 'cleans a gym with the large dust mop.',
 'ending2': 'stops and spray a car with some spray paint.',
 'ending3': 'places the bucket and a bucket.',
 'label': -1}

In [85]:
from transformers import (
    AutoTokenizer
)

tokenizer = AutoTokenizer.from_pretrained(
    "roberta-base",
    use_fast=True,
    model_revision="main"
)

In [5]:
from itertools import chain

ending_names = [f"ending{i}" for i in range(4)]
context_name = "sent1"
question_header_name = "sent2"

max_seq_length = min(tokenizer.model_max_length, 1024)

def preprocess_function(examples):
    first_sentences = [[context] * 4 for context in examples[context_name]]
    question_headers = examples[question_header_name]
    second_sentences = [
        [f"{header} {examples[end][i]}" for end in ending_names] for i, header in enumerate(question_headers)
    ]

    # Flatten out
    first_sentences = list(chain.from_iterable(first_sentences))
    second_sentences = list(chain.from_iterable(second_sentences))

    # Tokenize
    tokenized_examples = tokenizer(
        first_sentences,
        second_sentences,
        truncation=True,
        max_length=max_seq_length,
        padding=False,
    )
    # Un-flatten
    return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}

In [93]:
valid_dataset = raw_datasets["validation"].map(
    preprocess_function,
    batched=True,
    num_proc=None,
    load_from_cache_file=True,
)
train_dataset = raw_datasets["train"].map(
    preprocess_function,
    batched=True,
    num_proc=None,
    load_from_cache_file=True,
)

Loading cached processed dataset at /home/jacky/.cache/huggingface/datasets/swag/regular/0.0.0/9640de08cdba6a1469ed3834fcab4b8ad8e38caf5d1ba5e7436d8b1fd067ad4c/cache-7c46fde38bb83186.arrow
100%|██████████| 74/74 [00:13<00:00,  5.45ba/s]


In [62]:
valid_dataset

Dataset({
    features: ['video-id', 'fold-ind', 'startphrase', 'sent1', 'sent2', 'gold-source', 'ending0', 'ending1', 'ending2', 'ending3', 'label', 'input_ids', 'attention_mask'],
    num_rows: 20006
})

In [8]:
raw_datasets["validation"][0]

{'video-id': 'lsmdc1052_Harry_Potter_and_the_order_of_phoenix-94857',
 'fold-ind': '18313',
 'startphrase': 'Students lower their eyes nervously. She',
 'sent1': 'Students lower their eyes nervously.',
 'sent2': 'She',
 'gold-source': 'gold',
 'ending0': 'pats her shoulder, then saunters toward someone.',
 'ending1': 'turns with two students.',
 'ending2': 'walks slowly towards someone.',
 'ending3': 'wheels around as her dog thunders out.',
 'label': 2}

In [11]:
from dataclasses import dataclass
from typing import Optional, Union
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from transformers.tokenization_utils import PaddingStrategy

@dataclass
class DataCollatorForMultipleChoice:

    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        label_name = "label" if "label" in features[0].keys() else "labels"
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [
            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = list(chain.from_iterable(flattened_features))

        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )

        # Un-flatten
        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
        # Add back labels
        batch["labels"] = torch.tensor(labels, dtype=torch.int64)
        return batch

In [13]:
data_collator = DataCollatorForMultipleChoice(
    tokenizer=tokenizer,
    pad_to_multiple_of=None
)

In [81]:
import torch

features = [valid_dataset[i] for i in range(3)]
label_name = "label" if "label" in features[0].keys() else "labels"
labels = [feature.pop(label_name) for feature in features]
batch_size = len(features)
num_choices = len(features[0]["input_ids"])
cols = ("input_ids", "attention_mask")
flattened_features = [
    [{col: feature[col][i] for col in cols} for i in range(num_choices)] for feature in features
]
flattened_features = list(chain.from_iterable(flattened_features))
batch = tokenizer.pad(
    flattened_features,
    padding=True,
    max_length=None,
    pad_to_multiple_of=None,
    return_tensors="pt",
)
batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
batch["labels"] = torch.tensor(labels, dtype=torch.int64)

In [90]:
from transformers import (
    AutoConfig,
    AutoModelForMultipleChoice
)

config = AutoConfig.from_pretrained(
    "roberta-base",
    cache_dir=None,
    revision="main",
    use_auth_token=None,
)

model = AutoModelForMultipleChoice.from_pretrained(
    "roberta-base",
    from_tf=False,
    config=config,
    cache_dir=None,
    revision="main",
    use_auth_token=None,
)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForMultipleChoice: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForMultipleChoice from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMultipleChoice from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForMultipleChoice were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predi

In [110]:
import numpy as np
def compute_metrics(eval_predictions):
    predictions, label_ids = eval_predictions
    preds = np.argmax(predictions, axis=1)
    return {"accuracy": (preds == label_ids).astype(np.float32).mean().item()}

In [111]:
from transformers import (
    TrainingArguments,
    Trainer
)

trainer = Trainer(
    model=model,
    args=TrainingArguments( output_dir="/tmp/swag_base", no_cuda=True),
    train_dataset=train_dataset.select([0, 10, 20, 30, 40, 50]),
    eval_dataset=valid_dataset.select([0, 10, 20, 30, 40, 50]),
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [112]:
trainer.evaluate()

The following columns in the evaluation set  don't have a corresponding argument in `RobertaForMultipleChoice.forward` and have been ignored: startphrase, fold-ind, ending3, video-id, ending1, sent1, ending2, sent2, ending0, gold-source. If startphrase, fold-ind, ending3, video-id, ending1, sent1, ending2, sent2, ending0, gold-source are not expected by `RobertaForMultipleChoice.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 6
  Batch size = 8


{'eval_loss': 1.38624906539917,
 'eval_accuracy': 0.3333333432674408,
 'eval_runtime': 0.3616,
 'eval_samples_per_second': 16.593,
 'eval_steps_per_second': 2.766}