In [1]:
import os 

os.chdir("../..")

from datasets import Dataset, load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq, AutoModelForSequenceClassification
from peft import get_peft_model, LoraConfig, TaskType
import evaluate
import numpy as np
from task1.config import ProjectPaths
import pandas as pd
import torch

paths = ProjectPaths()

# === 3. Set device ===
device = "mps" if torch.backends.mps.is_available() else "cpu"

# === 4. Load and preprocess data ===
def load_datasets(path):
    df = pd.read_csv(path, sep='\t')
    df = df[df['label'].isin(['SUBJ', 'OBJ'])].copy()
    df['label'] = df['label'].map({'OBJ': 0, 'SUBJ': 1})
    df = df[['sentence', 'label']]
    return Dataset.from_pandas(df)

train_dataset = load_datasets(paths.bulgarian_data_dir / "train_bg.tsv")
val_dataset   = load_datasets(paths.bulgarian_data_dir / "dev_bg.tsv")
test_dataset  = load_datasets(paths.bulgarian_data_dir / "dev_test_bg.tsv")

W0614 22:57:38.427000 37924 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
model_name = "iarfmoose/roberta-base-bulgarian"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/516 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/507M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at iarfmoose/roberta-base-bulgarian and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/78.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.79M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.44M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [3]:
def tokenize_fn(examples):
    return tokenizer(
        examples["sentence"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_fn, batched=True)
val_dataset = val_dataset.map(tokenize_fn, batched=True)
test_dataset = test_dataset.map(tokenize_fn, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
val_dataset = val_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/691 [00:00<?, ? examples/s]

Map:   0%|          | 0/306 [00:00<?, ? examples/s]

Map:   0%|          | 0/250 [00:00<?, ? examples/s]

In [4]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    task_type=TaskType.SEQ_CLS,
    target_modules=["query_proj", "key_proj", "value_proj", "dense"] 
)

model = get_peft_model(model, lora_config).to(device)

In [5]:
f1 = evaluate.load("f1")
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
        "precision": precision.compute(predictions=preds, references=labels, average="macro")["precision"],
        "recall": recall.compute(predictions=preds, references=labels, average="macro")["recall"],
    }

In [6]:
# === 8. TrainingArguments ===
training_args = TrainingArguments(
    output_dir="./results/bulgarian-lora",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
)

In [7]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

# === 10. Train ===
trainer.train()

# === 11. Evaluate on test set ===
trainer.evaluate(eval_dataset=test_dataset)

  0%|          | 0/1730 [00:00<?, ?it/s]



{'loss': 0.7169, 'grad_norm': 6.391135215759277, 'learning_rate': 4.971098265895954e-05, 'epoch': 0.06}
{'loss': 0.7372, 'grad_norm': 4.695607662200928, 'learning_rate': 4.942196531791908e-05, 'epoch': 0.12}
{'loss': 0.6815, 'grad_norm': 4.262930393218994, 'learning_rate': 4.913294797687861e-05, 'epoch': 0.17}
{'loss': 0.7095, 'grad_norm': 8.437414169311523, 'learning_rate': 4.8843930635838154e-05, 'epoch': 0.23}
{'loss': 0.639, 'grad_norm': 5.070406436920166, 'learning_rate': 4.855491329479769e-05, 'epoch': 0.29}
{'loss': 0.6745, 'grad_norm': 7.1451096534729, 'learning_rate': 4.826589595375723e-05, 'epoch': 0.35}
{'loss': 0.6445, 'grad_norm': 5.549628734588623, 'learning_rate': 4.7976878612716764e-05, 'epoch': 0.4}
{'loss': 0.6632, 'grad_norm': 7.040141582489014, 'learning_rate': 4.7687861271676305e-05, 'epoch': 0.46}
{'loss': 0.6615, 'grad_norm': 5.789669513702393, 'learning_rate': 4.739884393063584e-05, 'epoch': 0.52}
{'loss': 0.6017, 'grad_norm': 4.460951805114746, 'learning_rate':

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 0.5747998952865601, 'eval_accuracy': 0.7124183006535948, 'eval_f1_macro': 0.707913918778202, 'eval_precision': 0.7104191302062803, 'eval_recall': 0.7069745401283763, 'eval_runtime': 16.7972, 'eval_samples_per_second': 18.217, 'eval_steps_per_second': 4.584, 'epoch': 1.0}




{'loss': 0.5101, 'grad_norm': 2.5632503032684326, 'learning_rate': 4.4797687861271684e-05, 'epoch': 1.04}
{'loss': 0.518, 'grad_norm': 5.770907402038574, 'learning_rate': 4.450867052023122e-05, 'epoch': 1.1}
{'loss': 0.6454, 'grad_norm': 6.008738040924072, 'learning_rate': 4.421965317919075e-05, 'epoch': 1.16}
{'loss': 0.4835, 'grad_norm': 6.237147808074951, 'learning_rate': 4.3930635838150294e-05, 'epoch': 1.21}
{'loss': 0.5079, 'grad_norm': 8.138538360595703, 'learning_rate': 4.364161849710983e-05, 'epoch': 1.27}
{'loss': 0.5629, 'grad_norm': 1.8587298393249512, 'learning_rate': 4.335260115606937e-05, 'epoch': 1.33}
{'loss': 0.4119, 'grad_norm': 5.153204917907715, 'learning_rate': 4.3063583815028904e-05, 'epoch': 1.39}
{'loss': 0.5818, 'grad_norm': 11.774129867553711, 'learning_rate': 4.2774566473988445e-05, 'epoch': 1.45}
{'loss': 0.4605, 'grad_norm': 2.286097288131714, 'learning_rate': 4.248554913294798e-05, 'epoch': 1.5}
{'loss': 0.4805, 'grad_norm': 4.888607501983643, 'learning_r

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 0.7249338030815125, 'eval_accuracy': 0.6862745098039216, 'eval_f1_macro': 0.6667876588021778, 'eval_precision': 0.6989247311827957, 'eval_recall': 0.669754017145565, 'eval_runtime': 25.7342, 'eval_samples_per_second': 11.891, 'eval_steps_per_second': 2.992, 'epoch': 2.0}




{'loss': 0.3987, 'grad_norm': 6.583398818969727, 'learning_rate': 3.988439306358382e-05, 'epoch': 2.02}
{'loss': 0.4369, 'grad_norm': 7.3686394691467285, 'learning_rate': 3.959537572254335e-05, 'epoch': 2.08}
{'loss': 0.4946, 'grad_norm': 4.71972131729126, 'learning_rate': 3.930635838150289e-05, 'epoch': 2.14}
{'loss': 0.3689, 'grad_norm': 9.574148178100586, 'learning_rate': 3.901734104046243e-05, 'epoch': 2.2}
{'loss': 0.4922, 'grad_norm': 13.31404972076416, 'learning_rate': 3.872832369942196e-05, 'epoch': 2.25}
{'loss': 0.4826, 'grad_norm': 2.911792278289795, 'learning_rate': 3.84393063583815e-05, 'epoch': 2.31}
{'loss': 0.4803, 'grad_norm': 3.48557448387146, 'learning_rate': 3.815028901734104e-05, 'epoch': 2.37}
{'loss': 0.8237, 'grad_norm': 4.774317741394043, 'learning_rate': 3.786127167630058e-05, 'epoch': 2.43}
{'loss': 0.7175, 'grad_norm': 10.32775592803955, 'learning_rate': 3.757225433526011e-05, 'epoch': 2.49}
{'loss': 0.5436, 'grad_norm': 6.715440273284912, 'learning_rate': 3

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 0.8032066822052002, 'eval_accuracy': 0.696078431372549, 'eval_f1_macro': 0.6946402704007726, 'eval_precision': 0.6943921232876712, 'eval_recall': 0.695623142204799, 'eval_runtime': 25.983, 'eval_samples_per_second': 11.777, 'eval_steps_per_second': 2.963, 'epoch': 3.0}




{'loss': 0.3017, 'grad_norm': 7.720668315887451, 'learning_rate': 3.497109826589596e-05, 'epoch': 3.01}
{'loss': 0.3571, 'grad_norm': 7.114643573760986, 'learning_rate': 3.468208092485549e-05, 'epoch': 3.06}
{'loss': 0.267, 'grad_norm': 4.712103366851807, 'learning_rate': 3.439306358381503e-05, 'epoch': 3.12}
{'loss': 0.5322, 'grad_norm': 14.041618347167969, 'learning_rate': 3.410404624277457e-05, 'epoch': 3.18}
{'loss': 0.3617, 'grad_norm': 9.11591625213623, 'learning_rate': 3.381502890173411e-05, 'epoch': 3.24}
{'loss': 0.403, 'grad_norm': 3.099885940551758, 'learning_rate': 3.352601156069364e-05, 'epoch': 3.29}
{'loss': 0.2954, 'grad_norm': 5.0465803146362305, 'learning_rate': 3.323699421965318e-05, 'epoch': 3.35}
{'loss': 0.4931, 'grad_norm': 10.349910736083984, 'learning_rate': 3.294797687861272e-05, 'epoch': 3.41}
{'loss': 0.2479, 'grad_norm': 6.267472743988037, 'learning_rate': 3.265895953757225e-05, 'epoch': 3.47}
{'loss': 0.5907, 'grad_norm': 8.601286888122559, 'learning_rate'

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 0.9310954213142395, 'eval_accuracy': 0.6993464052287581, 'eval_f1_macro': 0.6992307692307693, 'eval_precision': 0.7022917112872136, 'eval_recall': 0.7034420367897298, 'eval_runtime': 26.4629, 'eval_samples_per_second': 11.563, 'eval_steps_per_second': 2.91, 'epoch': 4.0}




{'loss': 0.4969, 'grad_norm': 8.867109298706055, 'learning_rate': 2.9768786127167632e-05, 'epoch': 4.05}
{'loss': 0.4031, 'grad_norm': 9.616604804992676, 'learning_rate': 2.947976878612717e-05, 'epoch': 4.1}
{'loss': 0.51, 'grad_norm': 0.6077240109443665, 'learning_rate': 2.9190751445086707e-05, 'epoch': 4.16}
{'loss': 0.2293, 'grad_norm': 2.187213897705078, 'learning_rate': 2.8901734104046245e-05, 'epoch': 4.22}
{'loss': 0.2786, 'grad_norm': 12.27725887298584, 'learning_rate': 2.861271676300578e-05, 'epoch': 4.28}
{'loss': 0.5511, 'grad_norm': 10.162144660949707, 'learning_rate': 2.832369942196532e-05, 'epoch': 4.34}
{'loss': 0.1446, 'grad_norm': 12.814311027526855, 'learning_rate': 2.8034682080924855e-05, 'epoch': 4.39}
{'loss': 0.411, 'grad_norm': 9.853235244750977, 'learning_rate': 2.7745664739884393e-05, 'epoch': 4.45}
{'loss': 0.3627, 'grad_norm': 0.022221120074391365, 'learning_rate': 2.745664739884393e-05, 'epoch': 4.51}
{'loss': 0.511, 'grad_norm': 19.434661865234375, 'learnin

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 1.1017452478408813, 'eval_accuracy': 0.6895424836601307, 'eval_f1_macro': 0.6766946560640605, 'eval_precision': 0.6937735849056603, 'eval_recall': 0.6769698014043855, 'eval_runtime': 26.4913, 'eval_samples_per_second': 11.551, 'eval_steps_per_second': 2.907, 'epoch': 5.0}




{'loss': 0.437, 'grad_norm': 2.3072352409362793, 'learning_rate': 2.485549132947977e-05, 'epoch': 5.03}
{'loss': 0.4282, 'grad_norm': 1.4059523344039917, 'learning_rate': 2.4566473988439306e-05, 'epoch': 5.09}
{'loss': 0.5153, 'grad_norm': 0.38222524523735046, 'learning_rate': 2.4277456647398844e-05, 'epoch': 5.14}
{'loss': 0.3304, 'grad_norm': 2.0769643783569336, 'learning_rate': 2.3988439306358382e-05, 'epoch': 5.2}
{'loss': 0.1672, 'grad_norm': 4.187455654144287, 'learning_rate': 2.369942196531792e-05, 'epoch': 5.26}
{'loss': 0.3121, 'grad_norm': 5.744872093200684, 'learning_rate': 2.3410404624277458e-05, 'epoch': 5.32}
{'loss': 0.6128, 'grad_norm': 8.254083633422852, 'learning_rate': 2.3121387283236996e-05, 'epoch': 5.38}
{'loss': 0.5979, 'grad_norm': 1.5035219192504883, 'learning_rate': 2.2832369942196533e-05, 'epoch': 5.43}
{'loss': 0.3033, 'grad_norm': 13.069225311279297, 'learning_rate': 2.254335260115607e-05, 'epoch': 5.49}
{'loss': 0.551, 'grad_norm': 4.943527698516846, 'lear

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 1.0931638479232788, 'eval_accuracy': 0.7156862745098039, 'eval_f1_macro': 0.7127380631238198, 'eval_precision': 0.7132352941176471, 'eval_recall': 0.7123809934088657, 'eval_runtime': 26.5026, 'eval_samples_per_second': 11.546, 'eval_steps_per_second': 2.905, 'epoch': 6.0}




{'loss': 0.2757, 'grad_norm': 5.827968597412109, 'learning_rate': 1.994219653179191e-05, 'epoch': 6.01}
{'loss': 0.1449, 'grad_norm': 6.6317853927612305, 'learning_rate': 1.9653179190751446e-05, 'epoch': 6.07}
{'loss': 0.1619, 'grad_norm': 0.3979451060295105, 'learning_rate': 1.936416184971098e-05, 'epoch': 6.13}
{'loss': 0.5137, 'grad_norm': 1.9354465007781982, 'learning_rate': 1.907514450867052e-05, 'epoch': 6.18}
{'loss': 0.172, 'grad_norm': 0.8660446405410767, 'learning_rate': 1.8786127167630057e-05, 'epoch': 6.24}
{'loss': 0.5201, 'grad_norm': 5.974310874938965, 'learning_rate': 1.8497109826589594e-05, 'epoch': 6.3}
{'loss': 0.4195, 'grad_norm': 3.339695692062378, 'learning_rate': 1.8208092485549132e-05, 'epoch': 6.36}
{'loss': 0.2364, 'grad_norm': 22.53510093688965, 'learning_rate': 1.791907514450867e-05, 'epoch': 6.42}
{'loss': 0.4671, 'grad_norm': 2.4556586742401123, 'learning_rate': 1.7630057803468208e-05, 'epoch': 6.47}
{'loss': 0.4879, 'grad_norm': 0.720646858215332, 'learni

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 1.1703863143920898, 'eval_accuracy': 0.7058823529411765, 'eval_f1_macro': 0.7002351097178683, 'eval_precision': 0.704353591160221, 'eval_recall': 0.6991771851979494, 'eval_runtime': 17.3091, 'eval_samples_per_second': 17.679, 'eval_steps_per_second': 4.449, 'epoch': 7.0}




{'loss': 0.5531, 'grad_norm': 11.481873512268066, 'learning_rate': 1.4739884393063585e-05, 'epoch': 7.05}
{'loss': 0.0847, 'grad_norm': 0.07952035218477249, 'learning_rate': 1.4450867052023123e-05, 'epoch': 7.11}
{'loss': 0.7055, 'grad_norm': 12.138911247253418, 'learning_rate': 1.416184971098266e-05, 'epoch': 7.17}
{'loss': 0.2054, 'grad_norm': 1.1952322721481323, 'learning_rate': 1.3872832369942197e-05, 'epoch': 7.23}
{'loss': 0.0857, 'grad_norm': 4.5800275802612305, 'learning_rate': 1.3583815028901733e-05, 'epoch': 7.28}
{'loss': 0.4414, 'grad_norm': 9.918490409851074, 'learning_rate': 1.329479768786127e-05, 'epoch': 7.34}
{'loss': 0.1137, 'grad_norm': 5.466392993927002, 'learning_rate': 1.3005780346820809e-05, 'epoch': 7.4}
{'loss': 0.2163, 'grad_norm': 15.372408866882324, 'learning_rate': 1.2716763005780346e-05, 'epoch': 7.46}
{'loss': 0.5174, 'grad_norm': 6.579858779907227, 'learning_rate': 1.2427745664739884e-05, 'epoch': 7.51}
{'loss': 0.2607, 'grad_norm': 5.431713104248047, 'l

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 1.2684245109558105, 'eval_accuracy': 0.7058823529411765, 'eval_f1_macro': 0.6996728462377317, 'eval_precision': 0.7047847527655604, 'eval_recall': 0.6985740748718391, 'eval_runtime': 17.7106, 'eval_samples_per_second': 17.278, 'eval_steps_per_second': 4.348, 'epoch': 8.0}




{'loss': 0.1812, 'grad_norm': 21.978464126586914, 'learning_rate': 9.826589595375723e-06, 'epoch': 8.03}
{'loss': 0.3765, 'grad_norm': 4.030550003051758, 'learning_rate': 9.53757225433526e-06, 'epoch': 8.09}
{'loss': 0.3329, 'grad_norm': 0.008379404433071613, 'learning_rate': 9.248554913294797e-06, 'epoch': 8.15}
{'loss': 0.297, 'grad_norm': 14.608796119689941, 'learning_rate': 8.959537572254335e-06, 'epoch': 8.21}
{'loss': 0.3083, 'grad_norm': 2.5587987899780273, 'learning_rate': 8.670520231213873e-06, 'epoch': 8.27}
{'loss': 0.3966, 'grad_norm': 0.049117762595415115, 'learning_rate': 8.38150289017341e-06, 'epoch': 8.32}
{'loss': 0.0808, 'grad_norm': 0.09989330917596817, 'learning_rate': 8.092485549132949e-06, 'epoch': 8.38}
{'loss': 0.6771, 'grad_norm': 0.9374293684959412, 'learning_rate': 7.803468208092486e-06, 'epoch': 8.44}
{'loss': 0.3571, 'grad_norm': 0.32272347807884216, 'learning_rate': 7.514450867052024e-06, 'epoch': 8.5}
{'loss': 0.2178, 'grad_norm': 0.5035948753356934, 'lea

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 1.2874430418014526, 'eval_accuracy': 0.7156862745098039, 'eval_f1_macro': 0.7104888260562232, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.709365441778314, 'eval_runtime': 17.3417, 'eval_samples_per_second': 17.645, 'eval_steps_per_second': 4.44, 'epoch': 9.0}




{'loss': 0.4297, 'grad_norm': 0.5448151230812073, 'learning_rate': 4.913294797687862e-06, 'epoch': 9.02}
{'loss': 0.1811, 'grad_norm': 0.43625694513320923, 'learning_rate': 4.624277456647399e-06, 'epoch': 9.08}
{'loss': 0.428, 'grad_norm': 4.5861382484436035, 'learning_rate': 4.3352601156069365e-06, 'epoch': 9.13}
{'loss': 0.1241, 'grad_norm': 11.271738052368164, 'learning_rate': 4.046242774566474e-06, 'epoch': 9.19}
{'loss': 0.8872, 'grad_norm': 17.191640853881836, 'learning_rate': 3.757225433526012e-06, 'epoch': 9.25}
{'loss': 0.1952, 'grad_norm': 0.07876572012901306, 'learning_rate': 3.468208092485549e-06, 'epoch': 9.31}
{'loss': 0.1447, 'grad_norm': 0.057581398636102676, 'learning_rate': 3.1791907514450866e-06, 'epoch': 9.36}
{'loss': 0.3575, 'grad_norm': 0.114822618663311, 'learning_rate': 2.8901734104046244e-06, 'epoch': 9.42}
{'loss': 0.2187, 'grad_norm': 10.691125869750977, 'learning_rate': 2.601156069364162e-06, 'epoch': 9.48}
{'loss': 0.2327, 'grad_norm': 0.04104460030794144,

  0%|          | 0/77 [00:00<?, ?it/s]

{'eval_loss': 1.2931360006332397, 'eval_accuracy': 0.7124183006535948, 'eval_f1_macro': 0.7083820662768031, 'eval_precision': 0.7101853871319521, 'eval_recall': 0.7075776504544867, 'eval_runtime': 18.3656, 'eval_samples_per_second': 16.662, 'eval_steps_per_second': 4.193, 'epoch': 10.0}




{'train_runtime': 2534.6994, 'train_samples_per_second': 2.726, 'train_steps_per_second': 0.683, 'train_loss': 0.42331261124914094, 'epoch': 10.0}




  0%|          | 0/63 [00:00<?, ?it/s]

{'eval_loss': 0.8820622563362122,
 'eval_accuracy': 0.744,
 'eval_f1_macro': 0.7385791778315143,
 'eval_precision': 0.7385791778315143,
 'eval_recall': 0.7385791778315143,
 'eval_runtime': 14.1982,
 'eval_samples_per_second': 17.608,
 'eval_steps_per_second': 4.437,
 'epoch': 10.0}