In [None]:
import os 

os.chdir("../..")

from datasets import Dataset, load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq, AutoModelForSequenceClassification
from peft import get_peft_model, LoraConfig, TaskType
import evaluate
import numpy as np
from task1.config import ProjectPaths
import pandas as pd
import torch


# === 3. Set device ===
device = "mps" if torch.backends.mps.is_available() else "cpu"

def load_datasets(path):
    df = pd.read_csv(path, sep='\t')
    df = df[df['label'].isin(['SUBJ', 'OBJ'])].copy()
    df['label'] = df['label'].map({'OBJ': 0, 'SUBJ': 1})
    df = df[['sentence', 'label']]
    return Dataset.from_pandas(df)

train_dataset = load_datasets(paths.english_data_dir / "train_en.tsv")
val_dataset   = load_datasets(paths.english_data_dir / "dev_en.tsv")
test_dataset  = load_datasets(paths.english_data_dir / "dev_test_en.tsv")
competition_test_dataset = load_datasets(paths.english_data_dir / "test_en_labeled.tsv")

W0615 23:35:53.990000 41040 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
model_name = "microsoft/deberta-v3-base"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    task_type=TaskType.SEQ_CLS,
    target_modules=["query_proj", "key_proj", "value_proj", "dense"] 
)

model = get_peft_model(model, lora_config).to(device)

In [4]:
f1 = evaluate.load("f1")
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
        "precision": precision.compute(predictions=preds, references=labels, average="macro")["precision"],
        "recall": recall.compute(predictions=preds, references=labels, average="macro")["recall"],
    }

In [5]:
def tokenize_fn(examples):
    return tokenizer(
        examples["sentence"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_fn, batched=True)
val_dataset = val_dataset.map(tokenize_fn, batched=True)
test_dataset = test_dataset.map(tokenize_fn, batched=True)
competition_test_dataset = competition_test_dataset.map(tokenize_fn, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
val_dataset = val_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")
competition_test_dataset = competition_test_dataset.rename_column("label", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
competition_test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/830 [00:00<?, ? examples/s]

Map:   0%|          | 0/462 [00:00<?, ? examples/s]

Map:   0%|          | 0/484 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

In [6]:
# === 8. TrainingArguments ===
training_args = TrainingArguments(
    output_dir="./results/english_lora",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
)

In [7]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

# === 10. Train ===
trainer.train()

trainer.save_model("./results/english_lora/final_checkpoint")
# === 11. Evaluate on test set ===
trainer.evaluate(eval_dataset=test_dataset)

  0%|          | 0/2080 [00:00<?, ?it/s]



{'loss': 0.6861, 'grad_norm': 1.649037480354309, 'learning_rate': 4.9759615384615386e-05, 'epoch': 0.05}
{'loss': 0.627, 'grad_norm': 3.4568028450012207, 'learning_rate': 4.9519230769230776e-05, 'epoch': 0.1}
{'loss': 0.6868, 'grad_norm': 1.6904819011688232, 'learning_rate': 4.927884615384616e-05, 'epoch': 0.14}
{'loss': 0.6399, 'grad_norm': 1.4102416038513184, 'learning_rate': 4.9038461538461536e-05, 'epoch': 0.19}
{'loss': 0.6832, 'grad_norm': 1.3337684869766235, 'learning_rate': 4.8798076923076926e-05, 'epoch': 0.24}
{'loss': 0.6224, 'grad_norm': 3.2283198833465576, 'learning_rate': 4.855769230769231e-05, 'epoch': 0.29}
{'loss': 0.5883, 'grad_norm': 1.1131833791732788, 'learning_rate': 4.8317307692307693e-05, 'epoch': 0.34}
{'loss': 0.6476, 'grad_norm': 3.1562931537628174, 'learning_rate': 4.8076923076923084e-05, 'epoch': 0.38}
{'loss': 0.6832, 'grad_norm': 1.3199255466461182, 'learning_rate': 4.783653846153847e-05, 'epoch': 0.43}
{'loss': 0.6497, 'grad_norm': 1.0305471420288086, 'l

  0%|          | 0/116 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


{'eval_loss': 0.7434971928596497, 'eval_accuracy': 0.4805194805194805, 'eval_f1_macro': 0.32456140350877194, 'eval_precision': 0.24025974025974026, 'eval_recall': 0.5, 'eval_runtime': 40.5613, 'eval_samples_per_second': 11.39, 'eval_steps_per_second': 2.86, 'epoch': 1.0}




{'loss': 0.5915, 'grad_norm': 1.0824525356292725, 'learning_rate': 4.495192307692308e-05, 'epoch': 1.01}
{'loss': 0.6575, 'grad_norm': 3.653639793395996, 'learning_rate': 4.4711538461538466e-05, 'epoch': 1.06}
{'loss': 0.6252, 'grad_norm': 4.088044166564941, 'learning_rate': 4.447115384615384e-05, 'epoch': 1.11}
{'loss': 0.6463, 'grad_norm': 2.0250017642974854, 'learning_rate': 4.423076923076923e-05, 'epoch': 1.15}
{'loss': 0.579, 'grad_norm': 1.7840590476989746, 'learning_rate': 4.3990384615384616e-05, 'epoch': 1.2}
{'loss': 0.6501, 'grad_norm': 2.4668331146240234, 'learning_rate': 4.375e-05, 'epoch': 1.25}
{'loss': 0.5557, 'grad_norm': 8.028425216674805, 'learning_rate': 4.350961538461539e-05, 'epoch': 1.3}
{'loss': 0.6255, 'grad_norm': 9.32260513305664, 'learning_rate': 4.326923076923077e-05, 'epoch': 1.35}
{'loss': 0.651, 'grad_norm': 7.821113586425781, 'learning_rate': 4.302884615384616e-05, 'epoch': 1.39}
{'loss': 0.4405, 'grad_norm': 12.49606990814209, 'learning_rate': 4.2788461

  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 0.5701707601547241, 'eval_accuracy': 0.7597402597402597, 'eval_f1_macro': 0.759712115603578, 'eval_precision': 0.7618828176431863, 'eval_recall': 0.7613175675675676, 'eval_runtime': 32.1374, 'eval_samples_per_second': 14.376, 'eval_steps_per_second': 3.61, 'epoch': 2.0}




{'loss': 0.5, 'grad_norm': 2.2521331310272217, 'learning_rate': 3.9903846153846155e-05, 'epoch': 2.02}
{'loss': 0.4291, 'grad_norm': 0.8822720050811768, 'learning_rate': 3.966346153846154e-05, 'epoch': 2.07}
{'loss': 0.3779, 'grad_norm': 9.203801155090332, 'learning_rate': 3.942307692307692e-05, 'epoch': 2.12}
{'loss': 0.5242, 'grad_norm': 20.056825637817383, 'learning_rate': 3.918269230769231e-05, 'epoch': 2.16}
{'loss': 0.4541, 'grad_norm': 2.2704241275787354, 'learning_rate': 3.8942307692307696e-05, 'epoch': 2.21}
{'loss': 0.3139, 'grad_norm': 0.6337361335754395, 'learning_rate': 3.870192307692308e-05, 'epoch': 2.26}
{'loss': 0.8588, 'grad_norm': 46.92422103881836, 'learning_rate': 3.846153846153846e-05, 'epoch': 2.31}
{'loss': 0.2927, 'grad_norm': 1.4375932216644287, 'learning_rate': 3.8221153846153846e-05, 'epoch': 2.36}
{'loss': 0.234, 'grad_norm': 2.240623950958252, 'learning_rate': 3.798076923076923e-05, 'epoch': 2.4}
{'loss': 0.6237, 'grad_norm': 1.3882538080215454, 'learning_

  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 0.7192208170890808, 'eval_accuracy': 0.7792207792207793, 'eval_f1_macro': 0.7782859078590786, 'eval_precision': 0.7907986111111112, 'eval_recall': 0.7829391891891893, 'eval_runtime': 31.2698, 'eval_samples_per_second': 14.775, 'eval_steps_per_second': 3.71, 'epoch': 3.0}




{'loss': 0.3899, 'grad_norm': 4.4278459548950195, 'learning_rate': 3.485576923076923e-05, 'epoch': 3.03}
{'loss': 0.524, 'grad_norm': 20.9962215423584, 'learning_rate': 3.461538461538462e-05, 'epoch': 3.08}
{'loss': 0.2862, 'grad_norm': 0.5747397541999817, 'learning_rate': 3.4375e-05, 'epoch': 3.12}
{'loss': 0.6533, 'grad_norm': 37.80730056762695, 'learning_rate': 3.4134615384615386e-05, 'epoch': 3.17}
{'loss': 0.2256, 'grad_norm': 18.552936553955078, 'learning_rate': 3.3894230769230776e-05, 'epoch': 3.22}
{'loss': 0.2273, 'grad_norm': 31.02417755126953, 'learning_rate': 3.365384615384616e-05, 'epoch': 3.27}
{'loss': 0.1236, 'grad_norm': 0.33926042914390564, 'learning_rate': 3.3413461538461536e-05, 'epoch': 3.32}
{'loss': 0.4691, 'grad_norm': 0.1622362583875656, 'learning_rate': 3.3173076923076926e-05, 'epoch': 3.37}
{'loss': 0.5624, 'grad_norm': 0.6241254210472107, 'learning_rate': 3.293269230769231e-05, 'epoch': 3.41}
{'loss': 0.6302, 'grad_norm': 27.10350799560547, 'learning_rate': 

  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 0.7433408498764038, 'eval_accuracy': 0.7835497835497836, 'eval_f1_macro': 0.7831433882202737, 'eval_precision': 0.7905750190403655, 'eval_recall': 0.7864301801801802, 'eval_runtime': 32.4007, 'eval_samples_per_second': 14.259, 'eval_steps_per_second': 3.58, 'epoch': 4.0}




{'loss': 0.3161, 'grad_norm': 12.193284034729004, 'learning_rate': 2.9807692307692308e-05, 'epoch': 4.04}
{'loss': 0.2193, 'grad_norm': 13.544821739196777, 'learning_rate': 2.9567307692307695e-05, 'epoch': 4.09}
{'loss': 0.6521, 'grad_norm': 16.085887908935547, 'learning_rate': 2.932692307692308e-05, 'epoch': 4.13}
{'loss': 0.3033, 'grad_norm': 0.26436716318130493, 'learning_rate': 2.9086538461538465e-05, 'epoch': 4.18}
{'loss': 0.1871, 'grad_norm': 1.8752810955047607, 'learning_rate': 2.8846153846153845e-05, 'epoch': 4.23}
{'loss': 0.1538, 'grad_norm': 4.14263391494751, 'learning_rate': 2.860576923076923e-05, 'epoch': 4.28}
{'loss': 0.2859, 'grad_norm': 0.18656043708324432, 'learning_rate': 2.8365384615384616e-05, 'epoch': 4.33}
{'loss': 0.2818, 'grad_norm': 0.19122512638568878, 'learning_rate': 2.8125000000000003e-05, 'epoch': 4.38}
{'loss': 0.14, 'grad_norm': 2.6461126804351807, 'learning_rate': 2.7884615384615386e-05, 'epoch': 4.42}
{'loss': 0.4209, 'grad_norm': 0.2558519244194031,

  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 0.7275905609130859, 'eval_accuracy': 0.8116883116883117, 'eval_f1_macro': 0.8111352839737054, 'eval_precision': 0.8118350437811882, 'eval_recall': 0.8108108108108109, 'eval_runtime': 30.7533, 'eval_samples_per_second': 15.023, 'eval_steps_per_second': 3.772, 'epoch': 5.0}




{'loss': 0.0778, 'grad_norm': 5.448073387145996, 'learning_rate': 2.4759615384615388e-05, 'epoch': 5.05}
{'loss': 0.1845, 'grad_norm': 1.3960145711898804, 'learning_rate': 2.4519230769230768e-05, 'epoch': 5.1}
{'loss': 0.1983, 'grad_norm': 25.009845733642578, 'learning_rate': 2.4278846153846155e-05, 'epoch': 5.14}
{'loss': 0.235, 'grad_norm': 1.960586428642273, 'learning_rate': 2.4038461538461542e-05, 'epoch': 5.19}
{'loss': 0.4586, 'grad_norm': 0.12392983585596085, 'learning_rate': 2.3798076923076922e-05, 'epoch': 5.24}
{'loss': 0.5793, 'grad_norm': 0.1638999879360199, 'learning_rate': 2.355769230769231e-05, 'epoch': 5.29}
{'loss': 0.3525, 'grad_norm': 0.7131265997886658, 'learning_rate': 2.3317307692307692e-05, 'epoch': 5.34}
{'loss': 0.6417, 'grad_norm': 1.9946179389953613, 'learning_rate': 2.307692307692308e-05, 'epoch': 5.38}
{'loss': 0.3909, 'grad_norm': 5.824733734130859, 'learning_rate': 2.2836538461538463e-05, 'epoch': 5.43}
{'loss': 0.4386, 'grad_norm': 0.5047049522399902, 'l

  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 0.872404932975769, 'eval_accuracy': 0.7770562770562771, 'eval_f1_macro': 0.7759129759129759, 'eval_precision': 0.7901912901912902, 'eval_recall': 0.7810247747747747, 'eval_runtime': 33.2346, 'eval_samples_per_second': 13.901, 'eval_steps_per_second': 3.49, 'epoch': 6.0}




{'loss': 0.7006, 'grad_norm': 26.961240768432617, 'learning_rate': 1.9951923076923078e-05, 'epoch': 6.01}
{'loss': 0.2028, 'grad_norm': 1.8992950916290283, 'learning_rate': 1.971153846153846e-05, 'epoch': 6.06}
{'loss': 0.0309, 'grad_norm': 57.054298400878906, 'learning_rate': 1.9471153846153848e-05, 'epoch': 6.11}
{'loss': 0.1845, 'grad_norm': 11.323291778564453, 'learning_rate': 1.923076923076923e-05, 'epoch': 6.15}
{'loss': 0.3421, 'grad_norm': 5.613903045654297, 'learning_rate': 1.8990384615384615e-05, 'epoch': 6.2}
{'loss': 0.5604, 'grad_norm': 8.501938819885254, 'learning_rate': 1.8750000000000002e-05, 'epoch': 6.25}
{'loss': 0.3155, 'grad_norm': 0.6706168055534363, 'learning_rate': 1.8509615384615385e-05, 'epoch': 6.3}
{'loss': 0.1191, 'grad_norm': 34.96805191040039, 'learning_rate': 1.826923076923077e-05, 'epoch': 6.35}
{'loss': 0.2867, 'grad_norm': 36.70364761352539, 'learning_rate': 1.8028846153846156e-05, 'epoch': 6.39}
{'loss': 0.1575, 'grad_norm': 19.655858993530273, 'lear

  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 1.0803571939468384, 'eval_accuracy': 0.7619047619047619, 'eval_f1_macro': 0.759082979368932, 'eval_precision': 0.7855052125100241, 'eval_recall': 0.7672860360360361, 'eval_runtime': 31.7561, 'eval_samples_per_second': 14.548, 'eval_steps_per_second': 3.653, 'epoch': 7.0}




{'loss': 0.263, 'grad_norm': 0.3002159595489502, 'learning_rate': 1.4903846153846154e-05, 'epoch': 7.02}
{'loss': 0.4022, 'grad_norm': 11.989590644836426, 'learning_rate': 1.466346153846154e-05, 'epoch': 7.07}
{'loss': 0.0053, 'grad_norm': 0.08239209651947021, 'learning_rate': 1.4423076923076923e-05, 'epoch': 7.12}
{'loss': 0.2319, 'grad_norm': 0.10355109721422195, 'learning_rate': 1.4182692307692308e-05, 'epoch': 7.16}
{'loss': 0.3997, 'grad_norm': 0.08134032040834427, 'learning_rate': 1.3942307692307693e-05, 'epoch': 7.21}
{'loss': 0.5432, 'grad_norm': 9.10563850402832, 'learning_rate': 1.3701923076923078e-05, 'epoch': 7.26}
{'loss': 0.0887, 'grad_norm': 0.14864417910575867, 'learning_rate': 1.3461538461538462e-05, 'epoch': 7.31}
{'loss': 0.3861, 'grad_norm': 0.4651874601840973, 'learning_rate': 1.3221153846153847e-05, 'epoch': 7.36}
{'loss': 0.4504, 'grad_norm': 0.2793452739715576, 'learning_rate': 1.2980769230769232e-05, 'epoch': 7.4}
{'loss': 0.0181, 'grad_norm': 23.67063331604004

  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 1.105589509010315, 'eval_accuracy': 0.7683982683982684, 'eval_f1_macro': 0.7659762823395744, 'eval_precision': 0.7901742160278746, 'eval_recall': 0.773536036036036, 'eval_runtime': 30.633, 'eval_samples_per_second': 15.082, 'eval_steps_per_second': 3.787, 'epoch': 8.0}




{'loss': 0.5751, 'grad_norm': 0.1051291823387146, 'learning_rate': 9.85576923076923e-06, 'epoch': 8.03}
{'loss': 0.0924, 'grad_norm': 0.15873047709465027, 'learning_rate': 9.615384615384616e-06, 'epoch': 8.08}
{'loss': 0.134, 'grad_norm': 0.06257347017526627, 'learning_rate': 9.375000000000001e-06, 'epoch': 8.12}
{'loss': 0.1238, 'grad_norm': 0.16200163960456848, 'learning_rate': 9.134615384615384e-06, 'epoch': 8.17}
{'loss': 0.1126, 'grad_norm': 0.5389923453330994, 'learning_rate': 8.89423076923077e-06, 'epoch': 8.22}
{'loss': 0.3124, 'grad_norm': 38.65231704711914, 'learning_rate': 8.653846153846155e-06, 'epoch': 8.27}
{'loss': 0.5543, 'grad_norm': 0.08911880850791931, 'learning_rate': 8.41346153846154e-06, 'epoch': 8.32}
{'loss': 0.0483, 'grad_norm': 0.09940125793218613, 'learning_rate': 8.173076923076923e-06, 'epoch': 8.37}
{'loss': 0.2505, 'grad_norm': 0.06847548484802246, 'learning_rate': 7.932692307692308e-06, 'epoch': 8.41}
{'loss': 0.3539, 'grad_norm': 0.09796755760908127, 'le

  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 1.052640676498413, 'eval_accuracy': 0.7770562770562771, 'eval_f1_macro': 0.7757693703202824, 'eval_precision': 0.7913368983957219, 'eval_recall': 0.7811936936936937, 'eval_runtime': 31.5826, 'eval_samples_per_second': 14.628, 'eval_steps_per_second': 3.673, 'epoch': 9.0}




{'loss': 0.1277, 'grad_norm': 0.0643099695444107, 'learning_rate': 4.807692307692308e-06, 'epoch': 9.04}
{'loss': 0.3252, 'grad_norm': 5.323233127593994, 'learning_rate': 4.567307692307692e-06, 'epoch': 9.09}
{'loss': 0.1792, 'grad_norm': 1.1050876379013062, 'learning_rate': 4.326923076923077e-06, 'epoch': 9.13}
{'loss': 0.158, 'grad_norm': 0.06537039577960968, 'learning_rate': 4.086538461538462e-06, 'epoch': 9.18}
{'loss': 0.042, 'grad_norm': 0.13617822527885437, 'learning_rate': 3.846153846153847e-06, 'epoch': 9.23}
{'loss': 0.6979, 'grad_norm': 8.7777099609375, 'learning_rate': 3.6057692307692307e-06, 'epoch': 9.28}
{'loss': 0.2529, 'grad_norm': 0.14794965088367462, 'learning_rate': 3.3653846153846154e-06, 'epoch': 9.33}
{'loss': 0.2873, 'grad_norm': 0.6386016011238098, 'learning_rate': 3.125e-06, 'epoch': 9.38}
{'loss': 0.3563, 'grad_norm': 53.162845611572266, 'learning_rate': 2.884615384615385e-06, 'epoch': 9.42}
{'loss': 0.1547, 'grad_norm': 0.04436811804771423, 'learning_rate': 

  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 0.9946100115776062, 'eval_accuracy': 0.79004329004329, 'eval_f1_macro': 0.7895216400911163, 'eval_precision': 0.7984695339816955, 'eval_recall': 0.7931869369369369, 'eval_runtime': 30.974, 'eval_samples_per_second': 14.916, 'eval_steps_per_second': 3.745, 'epoch': 10.0}




{'train_runtime': 3267.4525, 'train_samples_per_second': 2.54, 'train_steps_per_second': 0.637, 'train_loss': 0.3846845748798492, 'epoch': 10.0}




  0%|          | 0/121 [00:00<?, ?it/s]

{'eval_loss': 0.8125251531600952,
 'eval_accuracy': 0.8057851239669421,
 'eval_f1_macro': 0.6946413230240549,
 'eval_precision': 0.7618357487922705,
 'eval_recall': 0.6718141472692691,
 'eval_runtime': 35.1839,
 'eval_samples_per_second': 13.756,
 'eval_steps_per_second': 3.439,
 'epoch': 10.0}

In [8]:
trainer.evaluate(eval_dataset=competition_test_dataset)



  0%|          | 0/75 [00:00<?, ?it/s]

{'eval_loss': 0.977461576461792,
 'eval_accuracy': 0.7466666666666667,
 'eval_f1_macro': 0.7135534448967285,
 'eval_precision': 0.7057403814301264,
 'eval_recall': 0.7378932968536251,
 'eval_runtime': 20.338,
 'eval_samples_per_second': 14.751,
 'eval_steps_per_second': 3.688,
 'epoch': 10.0}