In [None]:
from dotenv import load_dotenv
load_dotenv()
import torch
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from evaluate import load
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset
from huggingface_hub import notebook_login
import sentencepiece

In [None]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")

In [None]:
main_dir = os.getenv("MAIN_DIR")
print(os.listdir(main_dir))

In [None]:
data_dir = main_dir + "/liar2"
dataset = load_dataset(data_dir)
model_dir = main_dir + "/model_deberta_v3_xsmall"
tokenizer = DebertaV2Tokenizer.from_pretrained(model_dir)
model = DebertaV2ForSequenceClassification.from_pretrained(
    model_dir,
    num_labels=6
)

In [None]:
def preprocess(examples):
    return tokenizer(examples["statement"], padding='max_length', truncation=True, max_length=128)

tokenized_ds = dataset.map(preprocess, batched=True)
tokenized_ds = tokenized_ds.rename_column("label", "labels")
tokenized_ds.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

In [None]:
training_args = TrainingArguments(
    output_dir= main_dir + "/saved",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=4e-5,
    per_device_train_batch_size=128,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    warmup_ratio=0.1,
    logging_steps=1,
    load_best_model_at_end=True,
    fp16=True,
    #gradient_accumulation_steps=4,
    dataloader_num_workers=8,
    save_total_limit=2,
    report_to="tensorboard"
)
model.gradient_checkpointing_enable()

In [None]:
lens = [len(x) for x in tokenized_ds['train']['input_ids']]
print(tokenized_ds['train']['input_ids'])
print(min(lens), max(lens))

In [None]:
labels = dataset['train']['label']
class_weights = compute_class_weight("balanced", classes=np.unique(labels), y=labels)
class_weights = torch.tensor(class_weights, dtype=torch.float)
print(class_weights)

In [None]:
accuracy = load("accuracy")
f1 = load("f1")
precision = load("precision")
recall = load("recall")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = np.argmax(predictions, axis=1)
    acc = accuracy.compute(predictions=preds, references=labels)
    f1_per_class = f1.compute(predictions=preds, references=labels, average=None)
    precision_per_class = precision.compute(predictions=preds, references=labels, average=None)
    recall_per_class = recall.compute(predictions=preds, references=labels, average=None)

    # You can also compute macro/micro averages if you like:
    f1_macro = f1.compute(predictions=preds, references=labels, average="macro")["f1"]
    f1_micro = f1.compute(predictions=preds, references=labels, average="micro")["f1"]

    # Prepare output
    metrics = {
        "accuracy": acc["accuracy"],
        "f1_macro": f1_macro,
        "f1_micro": f1_micro,
    }
    # Add per-class metrics
    for i, (f1c, prec, rec) in enumerate(zip(f1_per_class["f1"], precision_per_class["precision"], recall_per_class["recall"])):
        metrics[f"f1_class_{i}"] = f1c
        metrics[f"precision_class_{i}"] = prec
        metrics[f"recall_class_{i}"] = rec
    return metrics

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["validation"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer
)

In [None]:
model.to(torch.device("cuda"))
res_before_train = trainer.evaluate(eval_dataset=tokenized_ds["validation"])["eval_accuracy"]
print(res_before_train)
trainer.train()
res_after_train = trainer.evaluate(eval_dataset=tokenized_ds["validation"])["eval_accuracy"]
if (res_before_train < res_after_train):
  trainer.save_model(r"") # REMOVED PATH
  print(f"Improved by {res_after_train - res_before_train}")
else:
  print("Didn't improve.")

In [None]:
torch.cuda.empty_cache()
model.to(torch.device("cuda"))
trainer.train()

In [None]:
test_dir = r"" # REMOVED PATH
test_tokenizer = AutoTokenizer.from_pretrained(test_dir)
test_model = DebertaV2ForSequenceClassification.from_pretrained(test_dir)
test_trainer = Trainer(model=test_model, tokenizer=test_tokenizer)
metrics = trainer.evaluate(eval_dataset=tokenized_ds["test"])
print(metrics)

In [None]:
# id2label = {0: "Lie",
#             1: "Deception by omission",
#             2: "Denial",
#             3: "An accusation of cowardice",
#             4: "Rationalization",
#             5: "Minimization",
#             6: "Selective inattention",
#             7: "Selective attention",
#             8: "Distraction",
#             9: "The excuse",
#             10: "Hidden intimidation",
#             11: "False guilt",
#             12: "Attraction",
#             13: "Victim's condemnation",
#             14: "Playing the victim role",
#             15: "Playing the role of a servant",
#             16: "Seduction",
#             17: "Projecting guilt",
#             18: "Feigning innocence",
#             19: "Simulation of confusion",
#             20: "Aggressive anger",
#             21: "Declassification"}
# label2id = {"Lie": 0,
#             "Deception by omission": 1,
#             "Denial": 2,
#             "An accusation of cowardice": 3,
#             "Rationalization": 4,
#             "Minimization": 5,
#             "Selective inattention": 6,
#             "Selective attention": 7,
#             "Distraction": 8,
#             "The excuse": 9,
#             "Hidden intimidation": 10,
#             "False guilt": 11,
#             "Attraction": 12,
#             "Victim's condemnation": 13,
#             "Playing the victim role": 14,
#             "Playing the role of a servant": 15,
#             "Seduction": 16,
#             "Projecting guilt": 17,
#             "Feigning innocence": 18,
#             "Simulation of confusion": 19,
#             "Aggressive anger": 20,
#             "Declassification": 21}