# Baseline (Text Classification)

In [1]:
from transformers import TrainingArguments
import evaluate
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import AutoTokenizer
import numpy as np

BATCH_SIZE = 100
NUM_EPOCHS = 25

checkpoint = "distilbert-base-uncased"
#checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [2]:
from datasets import load_dataset

ds = load_dataset("json", data_files=["/data1/malto/shroom/val.model-agnostic.json"])
ds2 = load_dataset("json", data_files=["/data1/malto/shroom/trial-v1.json"])
#ds = ds['train'].train_test_split(train_size=0.8)
ds['test'] = ds2['train']
ds

DatasetDict({
    train: Dataset({
        features: ['ref', 'labels', 'task', 'hyp', 'tgt', 'src', 'model', 'label', 'p(Hallucination)'],
        num_rows: 499
    })
    test: Dataset({
        features: ['hyp', 'ref', 'task', 'p(Hallucination)', 'labels', 'tgt', 'label', 'model', 'src'],
        num_rows: 80
    })
})

In [3]:
def preprocess_function(examples):
    inputs = [f"Hyp: < {hyp} > Tgt: < {tgt} >" for hyp, tgt in zip(examples["hyp"], examples['tgt'])]
    model_inputs = tokenizer(inputs)
    model_inputs["label"] = [1 if t == "Hallucination" else 0 for t in examples['label']]
    return model_inputs

In [4]:
ds = ds.map(preprocess_function, batched=True)
ds = ds.remove_columns(['hyp', 'ref', 'task', 'p(Hallucination)', 'labels', 'tgt', 'model', 'src'])

In [5]:
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [6]:
import evaluate
import numpy as np

def compute_metrics(eval_pred):
    accuracy = evaluate.load("accuracy")
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [7]:
id2label = {0: "Not Hallucination", 1: "Hallucination"}
label2id = {"Not Hallucination": 0, "Hallucination": 1}

In [8]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint, num_labels=2, id2label=id2label, label2id=label2id
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [1]:
training_args = TrainingArguments(
    output_dir="local_model",
    learning_rate=1e-5,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=NUM_EPOCHS,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=1,
    load_best_model_at_end=True,
    per_gpu_train_batch_size=4
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds["train"],
    eval_dataset=ds["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

NameError: name 'TrainingArguments' is not defined

### Overfitting ?

In [10]:
trainer.evaluate(ds['test'])

{'eval_loss': 0.5581136345863342,
 'eval_accuracy': 0.75,
 'eval_runtime': 1.3105,
 'eval_samples_per_second': 61.046,
 'eval_steps_per_second': 0.763,
 'epoch': 25.0}

In [11]:
altro = load_dataset("json", data_files=["/data1/malto/shroom/val.model-aware.json"])
#altro = altro['train'].train_test_split(train_size=0.8)
altro = altro.map(preprocess_function, batched=True)
altro = altro.remove_columns(['hyp', 'ref', 'task', 'p(Hallucination)', 'labels', 'tgt', 'model', 'src'])
trainer.evaluate(altro['train'])

{'eval_loss': 0.7176768779754639,
 'eval_accuracy': 0.5329341317365269,
 'eval_runtime': 1.6202,
 'eval_samples_per_second': 309.212,
 'eval_steps_per_second': 3.703,
 'epoch': 25.0}