In [29]:
import os
from random import shuffle

import evaluate
import numpy as np
import torch
from datasets import Dataset
from termcolor import colored
from transformers import AutoModelForSequenceClassification, AutoTokenizer, \
    TrainingArguments, Trainer

In [2]:
CLASSES = {
    'yes': 0,
    'irrelevant': 1,
    'no': 2,
}
STORY_FILE = 'dataset/story.txt'
DATASET_PATH = 'dataset/'
MODEL_NAME = "cross-encoder/nli-deberta-v3-base"
BATCH_SIZE = 8
EPOCHS = 4
LEARNING_RATE = 2e-5
MAX_LENGTH = 512
DEVICE = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")
DEVICE

device(type='cuda')

Here we define the tokenizer and the model using the handy `transformer` library from *HuggingFace*.

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, do_lower_case=True)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3)
model = model.to(DEVICE)
pass

Next, we load the data set and split it into training and test sets.

In [6]:
story = open(STORY_FILE).read().replace("\n\n", "\n").replace("\n", " ").strip()

dataset: list[dict] = []
for file in CLASSES.keys():
    with open(os.path.join(DATASET_PATH, f'{file}.txt')) as f:
        lines = f.readlines()[:]
        print(f'Read {len(lines)} "{file}" questions')
        dataset.extend(map(lambda e: {'question': e.replace(
            '\n', '').strip(), 'answer': CLASSES[file]}, lines))

shuffle(dataset)


def preprocess(sample):
    inputs = tokenizer(
        story,
        sample["question"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    inputs["label"] = sample["answer"]
    return inputs


hf_dataset = Dataset.from_list(dataset)
tokenized_dataset = hf_dataset.map(preprocess, remove_columns=["question"])

split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = tokenized_dataset  #split["train"]
eval_dataset = split["test"]

Read 651 "yes" questions
Read 658 "irrelevant" questions
Read 653 "no" questions


Map:   0%|          | 0/1962 [00:00<?, ? examples/s]

For Ġ, look at https://discuss.huggingface.co/t/bpe-tokenizers-and-spaces-before-words/475

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    report_to='none',
    eval_strategy="epoch",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    eval_accumulation_steps=10,
    disable_tqdm=False
)

metric = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    return metric.compute(predictions=predictions, references=labels)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

Executing the training (~ 1h $\times$ epoch using `cross-encoder/nli-deberta-v3-base`).

In [None]:
trainer.train()

Forcibly saving the meodel in memory.

In [None]:
checkpoint_path = "checkpoint/deberta_seagull_ep_4_lr_2e-5_train_0.09_test_0.23_half"
model.save_pretrained(checkpoint_path)
tokenizer.save_pretrained(checkpoint_path)

Loading model and tokenizer from a checkpoint. 

In [49]:
checkpoint_path = "checkpoint/deberta_seagull_ep_4_lr_2e-5_train_0.09_test_0.23_half"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path).to(DEVICE)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)

Evaluation over the test set.

In [9]:
training_args = TrainingArguments(
    output_dir="results",
    per_device_eval_batch_size=16,
    logging_dir="logs",
    do_train=False,
    do_eval=True,
)
tester = Trainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)
results = tester.evaluate(eval_dataset=eval_dataset)
results

  tester = Trainer(
[codecarbon ERROR @ 16:58:37] Error: Another instance of codecarbon is already running. Turn off the other instance to be able to run this one. Exiting.


{'eval_loss': 0.152888223528862,
 'eval_model_preparation_time': 0.0017,
 'eval_accuracy': 0.9695431472081218,
 'eval_runtime': 6.3388,
 'eval_samples_per_second': 31.078,
 'eval_steps_per_second': 2.051}

Inference of a single question.

In [50]:
questions = [("Albert shoot himself for a reason", 0),
             ('Lucy is an ugly woman', 1),
             ('Albert has a wife', 0),
             ('Dave has a watch on his wrist', 1),
             ('Time has come for Miyamoto Musashi to die honorably', 1),
             ('Someone brought Albert and Dave on the pier', 0),
             ('Albert and Dave came to the pier on their own', 2),
             ('Politics is important to unravel the mistery of this story', 2),
             ('Politics is important for this story', 2),
             ('Something really sad happened to Albert and Dave before coming to the pier', 0),
             ('A friend of Albert and Dave brought them to the pier', 2),
             ('A pirate brought Albert and Dave to the pier', 2),
             ('A soldier brought Albert and Dave to the pier', 2),
             ('A communist brought Albert and Dave to the pier', 2),
             ('A sailor helped Albert and Dave to the pier', 0),
             ('Albert ordered spaghetti together with the seagull meat', 2),
             ('Dave was hungry', 1),
             ('Dave is married', 1),
             ('Albert and Dave were alone before coming to the pier', 2),
             ('Albert and Dave were on an island before the pier', 0),
             ('Dave suspected that Albert would kill himself', 0),
             ('There were seagulls flying over at the pier', 1),
             ('The seagull meat that Albert ordered tasted good', 1),
             ('Albert put salt on the meat before eating it', 1),
             ('cancer is the cause of Albert\'s death', 2),
             ('Dave was happy that Albert decided to kill himself', 2),
             ('Dave secretly hated Albert', 2),
             ('Dave secretly loved Lucy', 2),
             ('Albert was happy about Lucy\'s death', 2),
             ('The seagull meat was on the menu', 0),
             ('The cook is really talented', 1),
             ]
correct = 0
for question in questions:
    inputs = tokenizer(story, question[0], truncation=True, padding=True, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = model(**inputs)
        prediction = torch.argmax(outputs.logits, dim=-1)
    if prediction.item() == question[1]:
        correct += 1
    print(colored(
        f'[{question[0]}] ---> {np.round(torch.softmax(outputs.logits, 1).squeeze().cpu().numpy(), 3)} ({[key for key, value in CLASSES.items() if value == prediction.item()][0]})',
        'green' if prediction.item() == question[1] else 'red'))
print(f'Accuracy: {correct}/{len(questions)} ->', round(correct / len(questions), 4))

[32m[Albert shoot himself for a reason] ---> [0.595 0.068 0.337] (yes)[0m
[32m[Lucy is an ugly woman] ---> [0.003 0.989 0.008] (irrelevant)[0m
[32m[Albert has a wife] ---> [0.448 0.311 0.241] (yes)[0m
[32m[Dave has a watch on his wrist] ---> [0.    0.999 0.001] (irrelevant)[0m
[31m[Time has come for Miyamoto Musashi to die honorably] ---> [0.56  0.103 0.336] (yes)[0m
[32m[Someone brought Albert and Dave on the pier] ---> [0.941 0.013 0.046] (yes)[0m
[32m[Albert and Dave came to the pier on their own] ---> [0.002 0.005 0.992] (no)[0m
[31m[Politics is important to unravel the mistery of this story] ---> [0.99  0.001 0.008] (yes)[0m
[32m[Politics is important for this story] ---> [0.006 0.022 0.972] (no)[0m
[32m[Something really sad happened to Albert and Dave before coming to the pier] ---> [1. 0. 0.] (yes)[0m
[32m[A friend of Albert and Dave brought them to the pier] ---> [0.    0.004 0.996] (no)[0m
[32m[A pirate brought Albert and Dave to the pier] ---> [0.    0.

Publish the model on HuggingFace.

In [52]:
model.half()
model.push_to_hub("TheSeagullStory-nli-deberta-v3-base", use_auth_token='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')

model.safetensors:   0%|          | 0.00/369M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/MrPio/TheSeagullStory-nli-deberta-v3-base/commit/4223619e63238e3b1f73cec22849a61d84318037', commit_message='Upload DebertaV2ForSequenceClassification', commit_description='', oid='4223619e63238e3b1f73cec22849a61d84318037', pr_url=None, pr_revision=None, pr_num=None)