In [1]:
import os
os.environ['HF_HOME'] = '/data1/malto/cache'

In [2]:
import evaluate
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding
import numpy as np
from datasets import load_dataset
from pathlib import Path

BASE_DIR = Path("/data1/malto/shroom/")
BATCH_SIZE = 4
NUM_EPOCHS = 10
FREEZE = True
FROZEN_LAYERS = 15

BASE_DIR = Path("/data1/malto/shroom/")

checkpoint = "microsoft/deberta-v2-xxlarge-mnli"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [3]:
def preprocess_function(examples): # not batched
    model_inputs = tokenizer(examples['hyp'], examples['tgt'] if examples['ref'] != 'src' else examples['src'], truncation=True, max_length=80)
    model_inputs["label"] = 1 if examples['p(Hallucination)'] > 0.5 else 0
    return model_inputs

def compute_metrics(eval_pred):
    #print(eval_pred)
    accuracy = evaluate.load("accuracy")
    predictions, labels = eval_pred
    #print(predictions, labels)
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

id2label = {0: "Not Hallucination", 1: "Hallucination"}
label2id = {"Not Hallucination": 0, "Hallucination": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint, num_labels=2, id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True
)

  return self.fget.__get__(instance, owner)()
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v2-xxlarge-mnli and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 1536]) in the checkpoint and torch.Size([2, 1536]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
if FREEZE == True and checkpoint.startswith("microsoft"):
    print("freezing...")
    for param in model.deberta.embeddings.parameters():
        param.requires_grad = False
    for param in model.deberta.encoder.layer[:FROZEN_LAYERS].parameters():
        param.requires_grad = False

freezing...


In [5]:
ds_val = load_dataset("json", data_files=[str(BASE_DIR / f"val.model-agnostic.json")]).map(preprocess_function)
ds_val_aware = load_dataset("json", data_files=[str(BASE_DIR / f"val.model-aware.json")]).map(preprocess_function)

In [6]:
ds_val = ds_val.remove_columns(['labels', 'model', 'ref', 'hyp', 'task', 'tgt', 'p(Hallucination)', 'src', 'C-W'])
ds_val_aware = ds_val_aware.remove_columns(['labels', 'model', 'ref', 'hyp', 'task', 'tgt', 'p(Hallucination)', 'src', 'C-W'])

In [7]:
training_args = TrainingArguments(
    output_dir=BASE_DIR / "checkpoint" / "sequential",
    learning_rate=1e-6,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=NUM_EPOCHS,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    report_to="none",
    save_strategy="no",
    logging_steps=1,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds_val['train'].shuffle(),
    eval_dataset=ds_val_aware['train'],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5723,0.569168,0.702595
2,0.4665,0.612332,0.714571
3,0.4258,0.689217,0.748503
4,0.3653,0.837214,0.752495
5,0.3399,0.952529,0.744511
6,0.3131,1.054403,0.744511
7,0.2995,1.101317,0.746507
8,0.2689,1.141086,0.746507
9,0.2283,1.167718,0.738523
10,0.2332,1.172992,0.738523


TrainOutput(global_step=1250, training_loss=0.3512808197021484, metrics={'train_runtime': 472.384, 'train_samples_per_second': 10.563, 'train_steps_per_second': 2.646, 'total_flos': 1299957845962884.0, 'train_loss': 0.3512808197021484, 'epoch': 10.0})