In [None]:
!pip install transformers
!pip install datasets
!pip install evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import numpy as np
import json
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
from datasets import load_dataset
from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import torch
import evaluate
accuracy = evaluate.load("accuracy")
from datasets.dataset_dict import DatasetDict
from datasets import Dataset



from google.colab import data_table
data_table.enable_dataframe_formatter()

In [None]:
question_set = [
    "What is or could be the cause of target?",
    "What is or could be the prerequisite of target?",
    "What is the possible emotional reaction of the listener in response to target?",
    "What is or could be the motivation of target?",
    "What subsequent event happens or could happen following the target?"
]


data = [json.loads(line) for line in open("test.json").readlines()]
sep = " \\n "


f = open("better_test.json", "w")

for instance in data:
    if len(instance["Correct Answers"]) == 1 and instance["Question"] in question_set:
        choices = instance["Choices"]
        context = sep.join([instance["Question"], "target: " + instance["Target"], "context: " + " <utt> ".join(instance["Dialogue"])])
        line = {
            "ID": instance["ID"], "context": context, "choice0": choices[0], "choice1": choices[1], 
            "choice2": choices[2], "choice3": choices[3], "choice4": choices[4], 
            "label": instance["Correct Answers"][0]
        }
        f.write(json.dumps(line) + "\n")
f.close()

In [None]:
ending_names = [f"choice{i}" for i in range(5)]
context_name = "context"

def preprocess_function(examples):
    first_sentences = [[context] * 5 for context in examples[context_name]]
    second_sentences = [
        [f"{examples[end][i]}" for end in ending_names] for i in range(len(examples[context_name]))
    ]

    # Flatten out
    first_sentences = sum(first_sentences, [])
    second_sentences = sum(second_sentences, [])

    # Tokenize
    # Use the choices as the first item in tokenizer to avoid choice trimming if tokenized length exceeds 512
    tokenized_examples = tokenizer(
        second_sentences,
        first_sentences,
        truncation=True
    )
    
    # Un-flatten
    return {k: [v[i : i + 5] for i in range(0, len(v), 5)] for k, v in tokenized_examples.items()}

In [None]:
dataset = load_dataset("json", data_files="better_test.json")

Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-f1b42461b02575e2/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/json/default-f1b42461b02575e2/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['ID', 'context', 'choice0', 'choice1', 'choice2', 'choice3', 'choice4', 'label'],
        num_rows: 9064
    })
})

In [None]:
dataset = dataset["train"].train_test_split(test_size=0.05)["test"]
# 90% train, 10% test + validation
train_testvalid = dataset.train_test_split(test_size=0.1)
# Split the 10% test + valid in half test, half valid
test_valid = train_testvalid['test'].train_test_split(test_size=0.5)

train_test_valid_dataset = DatasetDict({
    'train': train_testvalid['train'],
    'test': test_valid['test'],
    'validation': test_valid['train']})

In [None]:
tokenized_dataset = train_test_valid_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/23 [00:00<?, ? examples/s]

Map:   0%|          | 0/23 [00:00<?, ? examples/s]

In [None]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['ID', 'context', 'choice0', 'choice1', 'choice2', 'choice3', 'choice4', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 408
    })
    test: Dataset({
        features: ['ID', 'context', 'choice0', 'choice1', 'choice2', 'choice3', 'choice4', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 23
    })
    validation: Dataset({
        features: ['ID', 'context', 'choice0', 'choice1', 'choice2', 'choice3', 'choice4', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 23
    })
})

In [None]:
@dataclass
class DataCollatorForMultipleChoice:
    """
    Data collator that will dynamically pad the inputs for multiple choice received.
    """

    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        label_name = "label" if "label" in features[0].keys() else "labels"
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [
            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = sum(flattened_features, [])

        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=256,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )

        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
        batch["labels"] = torch.tensor(labels, dtype=torch.int64)
        return batch

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer

model = AutoModelForMultipleChoice.from_pretrained("bert-base-uncased")

training_args = TrainingArguments(
    output_dir="model_model_super_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=3e-6,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=1,
    weight_decay=0.005,
    push_to_hub=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer.train()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMultipleChoice: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForMultipleChoice from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMultipleChoice from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForMultipleChoice were not initialized from the model checkpoint at bert-base-uncased and are newly

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.56535,0.26087


TrainOutput(global_step=408, training_loss=1.5884499643363206, metrics={'train_runtime': 4713.9089, 'train_samples_per_second': 0.087, 'train_steps_per_second': 0.087, 'total_flos': 232573695848280.0, 'train_loss': 1.5884499643363206, 'epoch': 1.0})

Timepass