In [1]:
import os 

os.chdir("../..")

from datasets import Dataset, load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq, AutoModelForSequenceClassification
from peft import get_peft_model, LoraConfig, TaskType
import evaluate
import numpy as np
from task1.config import ProjectPaths
import pandas as pd
import torch

paths = ProjectPaths()

# === 3. Set device ===
device = "mps" if torch.backends.mps.is_available() else "cpu"

# === 4. Load and preprocess data ===
def load_datasets(path):
    df = pd.read_csv(path, sep='\t')
    df = df[df['label'].isin(['SUBJ', 'OBJ'])].copy()
    df['label'] = df['label'].map({'OBJ': 0, 'SUBJ': 1})
    df = df[['sentence', 'label']]
    return Dataset.from_pandas(df)

train_dataset = load_datasets(paths.italian_data_dir / "train_it.tsv")
val_dataset   = load_datasets(paths.italian_data_dir / "dev_it.tsv")
test_dataset  = load_datasets(paths.italian_data_dir / "dev_test_it.tsv")
competition_test_dataset = load_datasets(paths.italian_data_dir / "test_it_labeled.tsv")

W0615 11:19:09.821000 8624 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
model_name = "Musixmatch/umberto-commoncrawl-cased-v1"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at Musixmatch/umberto-commoncrawl-cased-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
def tokenize_fn(examples):
    return tokenizer(
        examples["sentence"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_fn, batched=True)
val_dataset = val_dataset.map(tokenize_fn, batched=True)
test_dataset = test_dataset.map(tokenize_fn, batched=True)
competition_test_dataset = competition_test_dataset.map(tokenize_fn, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
val_dataset = val_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")
competition_test_dataset = competition_test_dataset.rename_column("label", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
competition_test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/1613 [00:00<?, ? examples/s]

Map:   0%|          | 0/667 [00:00<?, ? examples/s]

Map:   0%|          | 0/462 [00:00<?, ? examples/s]

Map:   0%|          | 0/299 [00:00<?, ? examples/s]

In [4]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    task_type=TaskType.SEQ_CLS,
    target_modules=["query", "key", "value", "dense"] 
)

model = get_peft_model(model, lora_config).to(device)

In [5]:
f1 = evaluate.load("f1")
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
        "precision": precision.compute(predictions=preds, references=labels, average="macro")["precision"],
        "recall": recall.compute(predictions=preds, references=labels, average="macro")["recall"],
    }

In [6]:
# === 8. TrainingArguments ===
training_args = TrainingArguments(
    output_dir="./results/italian-lora",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
)

In [7]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer
)

# === 10. Train ===
trainer.train()

trainer.save_model("./results/italian-lora/final_checkpoint")

# === 11. Evaluate on test set ===
trainer.evaluate(eval_dataset=test_dataset)

  0%|          | 0/4040 [00:00<?, ?it/s]



{'loss': 0.6477, 'grad_norm': 1.2946667671203613, 'learning_rate': 4.987623762376238e-05, 'epoch': 0.02}
{'loss': 0.635, 'grad_norm': 2.679760217666626, 'learning_rate': 4.975247524752475e-05, 'epoch': 0.05}
{'loss': 0.5162, 'grad_norm': 2.4853668212890625, 'learning_rate': 4.9628712871287133e-05, 'epoch': 0.07}
{'loss': 0.5719, 'grad_norm': 2.0953242778778076, 'learning_rate': 4.950495049504951e-05, 'epoch': 0.1}
{'loss': 0.547, 'grad_norm': 1.3017030954360962, 'learning_rate': 4.938118811881188e-05, 'epoch': 0.12}
{'loss': 0.5817, 'grad_norm': 2.492706537246704, 'learning_rate': 4.925742574257426e-05, 'epoch': 0.15}
{'loss': 0.6319, 'grad_norm': 1.0773237943649292, 'learning_rate': 4.913366336633663e-05, 'epoch': 0.17}
{'loss': 0.4662, 'grad_norm': 2.4209187030792236, 'learning_rate': 4.9009900990099014e-05, 'epoch': 0.2}
{'loss': 0.6549, 'grad_norm': 1.5622074604034424, 'learning_rate': 4.888613861386139e-05, 'epoch': 0.22}
{'loss': 0.5656, 'grad_norm': 1.5578254461288452, 'learning

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.47414451837539673, 'eval_accuracy': 0.7571214392803598, 'eval_f1_macro': 0.6790297283809795, 'eval_precision': 0.685347261434218, 'eval_recall': 0.6740977747031016, 'eval_runtime': 32.6237, 'eval_samples_per_second': 20.445, 'eval_steps_per_second': 5.119, 'epoch': 1.0}




{'loss': 0.5636, 'grad_norm': 2.0574791431427, 'learning_rate': 4.492574257425743e-05, 'epoch': 1.01}
{'loss': 0.3478, 'grad_norm': 3.224621295928955, 'learning_rate': 4.4801980198019804e-05, 'epoch': 1.04}
{'loss': 0.6063, 'grad_norm': 6.774715900421143, 'learning_rate': 4.467821782178218e-05, 'epoch': 1.06}
{'loss': 0.5476, 'grad_norm': 6.400738716125488, 'learning_rate': 4.455445544554456e-05, 'epoch': 1.09}
{'loss': 0.3547, 'grad_norm': 3.479358673095703, 'learning_rate': 4.4430693069306935e-05, 'epoch': 1.11}
{'loss': 0.4181, 'grad_norm': 1.2321308851242065, 'learning_rate': 4.430693069306931e-05, 'epoch': 1.14}
{'loss': 0.3813, 'grad_norm': 1.5671265125274658, 'learning_rate': 4.4183168316831684e-05, 'epoch': 1.16}
{'loss': 0.3333, 'grad_norm': 9.203937530517578, 'learning_rate': 4.405940594059406e-05, 'epoch': 1.19}
{'loss': 0.5131, 'grad_norm': 0.8447697162628174, 'learning_rate': 4.393564356435644e-05, 'epoch': 1.21}
{'loss': 0.5567, 'grad_norm': 6.54458475112915, 'learning_ra

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.514875054359436, 'eval_accuracy': 0.7886056971514243, 'eval_f1_macro': 0.6641814228021125, 'eval_precision': 0.7567998039696153, 'eval_recall': 0.6450017295053614, 'eval_runtime': 32.7498, 'eval_samples_per_second': 20.367, 'eval_steps_per_second': 5.099, 'epoch': 2.0}




{'loss': 0.3128, 'grad_norm': 1.678525447845459, 'learning_rate': 3.997524752475248e-05, 'epoch': 2.0}
{'loss': 0.6148, 'grad_norm': 3.3997509479522705, 'learning_rate': 3.9851485148514856e-05, 'epoch': 2.03}
{'loss': 0.5547, 'grad_norm': 3.86856746673584, 'learning_rate': 3.972772277227723e-05, 'epoch': 2.05}
{'loss': 0.2814, 'grad_norm': 4.017655372619629, 'learning_rate': 3.9603960396039605e-05, 'epoch': 2.08}
{'loss': 0.4783, 'grad_norm': 5.097978591918945, 'learning_rate': 3.948019801980199e-05, 'epoch': 2.1}
{'loss': 0.5077, 'grad_norm': 1.9345548152923584, 'learning_rate': 3.935643564356436e-05, 'epoch': 2.13}
{'loss': 0.5057, 'grad_norm': 2.992550849914551, 'learning_rate': 3.9232673267326736e-05, 'epoch': 2.15}
{'loss': 0.3618, 'grad_norm': 1.1169309616088867, 'learning_rate': 3.910891089108911e-05, 'epoch': 2.18}
{'loss': 0.6053, 'grad_norm': 1.3776308298110962, 'learning_rate': 3.8985148514851486e-05, 'epoch': 2.2}
{'loss': 0.3229, 'grad_norm': 3.1980204582214355, 'learning_

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.48362499475479126, 'eval_accuracy': 0.7796101949025487, 'eval_f1_macro': 0.7046654598139117, 'eval_precision': 0.7158552948026632, 'eval_recall': 0.6966216995272685, 'eval_runtime': 32.909, 'eval_samples_per_second': 20.268, 'eval_steps_per_second': 5.075, 'epoch': 3.0}




{'loss': 0.397, 'grad_norm': 3.608808755874634, 'learning_rate': 3.49009900990099e-05, 'epoch': 3.02}
{'loss': 0.2299, 'grad_norm': 6.467466831207275, 'learning_rate': 3.4777227722772276e-05, 'epoch': 3.04}
{'loss': 0.4534, 'grad_norm': 4.749697685241699, 'learning_rate': 3.465346534653465e-05, 'epoch': 3.07}
{'loss': 0.2439, 'grad_norm': 1.773861050605774, 'learning_rate': 3.452970297029703e-05, 'epoch': 3.09}
{'loss': 0.5209, 'grad_norm': 7.974883079528809, 'learning_rate': 3.440594059405941e-05, 'epoch': 3.12}
{'loss': 0.2245, 'grad_norm': 2.905939817428589, 'learning_rate': 3.428217821782179e-05, 'epoch': 3.14}
{'loss': 0.2857, 'grad_norm': 4.490268707275391, 'learning_rate': 3.415841584158416e-05, 'epoch': 3.17}
{'loss': 0.5301, 'grad_norm': 10.517884254455566, 'learning_rate': 3.403465346534654e-05, 'epoch': 3.19}
{'loss': 0.2658, 'grad_norm': 9.900703430175781, 'learning_rate': 3.391089108910891e-05, 'epoch': 3.22}
{'loss': 0.2709, 'grad_norm': 0.37651267647743225, 'learning_rat

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.6086726188659668, 'eval_accuracy': 0.7886056971514243, 'eval_f1_macro': 0.7107785701764906, 'eval_precision': 0.7293260038240919, 'eval_recall': 0.6991352473192667, 'eval_runtime': 33.5164, 'eval_samples_per_second': 19.901, 'eval_steps_per_second': 4.983, 'epoch': 4.0}




{'loss': 0.2311, 'grad_norm': 11.19477653503418, 'learning_rate': 2.995049504950495e-05, 'epoch': 4.01}
{'loss': 0.3875, 'grad_norm': 29.747661590576172, 'learning_rate': 2.9826732673267327e-05, 'epoch': 4.03}
{'loss': 0.5762, 'grad_norm': 3.092421531677246, 'learning_rate': 2.9702970297029702e-05, 'epoch': 4.06}
{'loss': 0.313, 'grad_norm': 14.304679870605469, 'learning_rate': 2.957920792079208e-05, 'epoch': 4.08}
{'loss': 0.3682, 'grad_norm': 17.73147201538086, 'learning_rate': 2.9455445544554455e-05, 'epoch': 4.11}
{'loss': 0.3895, 'grad_norm': 0.45905545353889465, 'learning_rate': 2.933168316831683e-05, 'epoch': 4.13}
{'loss': 0.3651, 'grad_norm': 21.205453872680664, 'learning_rate': 2.9207920792079208e-05, 'epoch': 4.16}
{'loss': 0.5213, 'grad_norm': 28.31267547607422, 'learning_rate': 2.9084158415841583e-05, 'epoch': 4.18}
{'loss': 0.4226, 'grad_norm': 18.780338287353516, 'learning_rate': 2.896039603960396e-05, 'epoch': 4.21}
{'loss': 0.233, 'grad_norm': 23.844398498535156, 'lear

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.7821573615074158, 'eval_accuracy': 0.7871064467766117, 'eval_f1_macro': 0.6871647509578545, 'eval_precision': 0.7344733395099199, 'eval_recall': 0.669243629655252, 'eval_runtime': 32.3026, 'eval_samples_per_second': 20.648, 'eval_steps_per_second': 5.17, 'epoch': 5.0}




{'loss': 0.3941, 'grad_norm': 35.068397521972656, 'learning_rate': 2.4876237623762376e-05, 'epoch': 5.02}
{'loss': 0.2628, 'grad_norm': 5.479031085968018, 'learning_rate': 2.4752475247524754e-05, 'epoch': 5.05}
{'loss': 0.5881, 'grad_norm': 0.5611746907234192, 'learning_rate': 2.462871287128713e-05, 'epoch': 5.07}
{'loss': 0.2618, 'grad_norm': 0.747341513633728, 'learning_rate': 2.4504950495049507e-05, 'epoch': 5.1}
{'loss': 0.3006, 'grad_norm': 4.886248588562012, 'learning_rate': 2.4381188118811882e-05, 'epoch': 5.12}
{'loss': 0.5108, 'grad_norm': 2.7582340240478516, 'learning_rate': 2.4257425742574257e-05, 'epoch': 5.15}
{'loss': 0.3364, 'grad_norm': 7.430686950683594, 'learning_rate': 2.4133663366336635e-05, 'epoch': 5.17}
{'loss': 0.2386, 'grad_norm': 0.1517215520143509, 'learning_rate': 2.400990099009901e-05, 'epoch': 5.2}
{'loss': 0.3358, 'grad_norm': 9.547091484069824, 'learning_rate': 2.3886138613861388e-05, 'epoch': 5.22}
{'loss': 0.4105, 'grad_norm': 0.5709012150764465, 'lear

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.728874683380127, 'eval_accuracy': 0.7841079460269865, 'eval_f1_macro': 0.7064906490649066, 'eval_precision': 0.7224686028257457, 'eval_recall': 0.6960740228294708, 'eval_runtime': 33.3266, 'eval_samples_per_second': 20.014, 'eval_steps_per_second': 5.011, 'epoch': 6.0}




{'loss': 0.5163, 'grad_norm': 0.09704329073429108, 'learning_rate': 1.9925742574257428e-05, 'epoch': 6.01}
{'loss': 0.5098, 'grad_norm': 0.25432005524635315, 'learning_rate': 1.9801980198019803e-05, 'epoch': 6.04}
{'loss': 0.3233, 'grad_norm': 19.964330673217773, 'learning_rate': 1.967821782178218e-05, 'epoch': 6.06}
{'loss': 0.4546, 'grad_norm': 57.22744369506836, 'learning_rate': 1.9554455445544556e-05, 'epoch': 6.09}
{'loss': 0.2086, 'grad_norm': 2.4479644298553467, 'learning_rate': 1.9430693069306934e-05, 'epoch': 6.11}
{'loss': 0.4054, 'grad_norm': 4.290477752685547, 'learning_rate': 1.930693069306931e-05, 'epoch': 6.14}
{'loss': 0.0394, 'grad_norm': 4.113261699676514, 'learning_rate': 1.9183168316831683e-05, 'epoch': 6.16}
{'loss': 0.3971, 'grad_norm': 0.38177141547203064, 'learning_rate': 1.905940594059406e-05, 'epoch': 6.19}
{'loss': 0.4877, 'grad_norm': 9.536050796508789, 'learning_rate': 1.8935643564356436e-05, 'epoch': 6.21}
{'loss': 0.6383, 'grad_norm': 0.17019475996494293,

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.818067729473114, 'eval_accuracy': 0.7901049475262368, 'eval_f1_macro': 0.7071478749560942, 'eval_precision': 0.7329922027290449, 'eval_recall': 0.6929378531073447, 'eval_runtime': 32.8294, 'eval_samples_per_second': 20.317, 'eval_steps_per_second': 5.087, 'epoch': 7.0}




{'loss': 0.5656, 'grad_norm': 15.043818473815918, 'learning_rate': 1.4975247524752475e-05, 'epoch': 7.0}
{'loss': 0.3255, 'grad_norm': 1.5345538854599, 'learning_rate': 1.4851485148514851e-05, 'epoch': 7.03}
{'loss': 0.1142, 'grad_norm': 15.945876121520996, 'learning_rate': 1.4727722772277228e-05, 'epoch': 7.05}
{'loss': 0.2255, 'grad_norm': 0.15017184615135193, 'learning_rate': 1.4603960396039604e-05, 'epoch': 7.08}
{'loss': 0.5186, 'grad_norm': 5.707466125488281, 'learning_rate': 1.448019801980198e-05, 'epoch': 7.1}
{'loss': 0.1814, 'grad_norm': 0.06441420316696167, 'learning_rate': 1.4356435643564355e-05, 'epoch': 7.13}
{'loss': 0.2055, 'grad_norm': 0.2953103184700012, 'learning_rate': 1.4232673267326732e-05, 'epoch': 7.15}
{'loss': 0.2918, 'grad_norm': 0.1334735006093979, 'learning_rate': 1.4108910891089108e-05, 'epoch': 7.18}
{'loss': 0.1721, 'grad_norm': 0.09749078005552292, 'learning_rate': 1.3985148514851486e-05, 'epoch': 7.2}
{'loss': 0.4485, 'grad_norm': 1.9058643579483032, '

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.8107857704162598, 'eval_accuracy': 0.7871064467766117, 'eval_f1_macro': 0.7186556878401178, 'eval_precision': 0.7262845849802372, 'eval_recall': 0.7125504439063761, 'eval_runtime': 32.2006, 'eval_samples_per_second': 20.714, 'eval_steps_per_second': 5.186, 'epoch': 8.0}




{'loss': 0.3751, 'grad_norm': 0.24987567961215973, 'learning_rate': 9.900990099009901e-06, 'epoch': 8.02}
{'loss': 0.1653, 'grad_norm': 79.06468200683594, 'learning_rate': 9.777227722772278e-06, 'epoch': 8.04}
{'loss': 0.3855, 'grad_norm': 0.12470856308937073, 'learning_rate': 9.653465346534654e-06, 'epoch': 8.07}
{'loss': 0.3208, 'grad_norm': 0.9925429224967957, 'learning_rate': 9.52970297029703e-06, 'epoch': 8.09}
{'loss': 0.1706, 'grad_norm': 14.46657657623291, 'learning_rate': 9.405940594059407e-06, 'epoch': 8.12}
{'loss': 0.2277, 'grad_norm': 73.15108489990234, 'learning_rate': 9.282178217821782e-06, 'epoch': 8.14}
{'loss': 0.1329, 'grad_norm': 10.850229263305664, 'learning_rate': 9.158415841584158e-06, 'epoch': 8.17}
{'loss': 0.2151, 'grad_norm': 0.17985272407531738, 'learning_rate': 9.034653465346535e-06, 'epoch': 8.19}
{'loss': 0.4871, 'grad_norm': 14.784942626953125, 'learning_rate': 8.910891089108911e-06, 'epoch': 8.22}
{'loss': 0.3588, 'grad_norm': 6.7634735107421875, 'learn

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.8725814819335938, 'eval_accuracy': 0.7901049475262368, 'eval_f1_macro': 0.7226182837860318, 'eval_precision': 0.730378317334839, 'eval_recall': 0.7163957108267036, 'eval_runtime': 44.5599, 'eval_samples_per_second': 14.969, 'eval_steps_per_second': 3.748, 'epoch': 9.0}




{'loss': 0.358, 'grad_norm': 0.09645579010248184, 'learning_rate': 4.950495049504951e-06, 'epoch': 9.01}
{'loss': 0.5241, 'grad_norm': 29.36901092529297, 'learning_rate': 4.826732673267327e-06, 'epoch': 9.03}
{'loss': 0.3498, 'grad_norm': 0.08616279065608978, 'learning_rate': 4.702970297029704e-06, 'epoch': 9.06}
{'loss': 0.2298, 'grad_norm': 4.9290924072265625, 'learning_rate': 4.579207920792079e-06, 'epoch': 9.08}
{'loss': 0.1845, 'grad_norm': 40.14142990112305, 'learning_rate': 4.455445544554456e-06, 'epoch': 9.11}
{'loss': 0.164, 'grad_norm': 0.0910131111741066, 'learning_rate': 4.331683168316831e-06, 'epoch': 9.13}
{'loss': 0.439, 'grad_norm': 0.10959550738334656, 'learning_rate': 4.207920792079209e-06, 'epoch': 9.16}
{'loss': 0.0264, 'grad_norm': 0.7815771698951721, 'learning_rate': 4.084158415841584e-06, 'epoch': 9.18}
{'loss': 0.2626, 'grad_norm': 7.1821699142456055, 'learning_rate': 3.960396039603961e-06, 'epoch': 9.21}
{'loss': 0.1141, 'grad_norm': 0.34607169032096863, 'learn

  0%|          | 0/167 [00:00<?, ?it/s]

{'eval_loss': 0.8937280774116516, 'eval_accuracy': 0.7856071964017991, 'eval_f1_macro': 0.7225401073407707, 'eval_precision': 0.7246711299036881, 'eval_recall': 0.720552288712095, 'eval_runtime': 33.9591, 'eval_samples_per_second': 19.641, 'eval_steps_per_second': 4.918, 'epoch': 10.0}




{'train_runtime': 5106.0853, 'train_samples_per_second': 3.159, 'train_steps_per_second': 0.791, 'train_loss': 0.3716635705654205, 'epoch': 10.0}




  0%|          | 0/116 [00:00<?, ?it/s]

{'eval_loss': 0.7847893834114075,
 'eval_accuracy': 0.829004329004329,
 'eval_f1_macro': 0.7650792016117094,
 'eval_precision': 0.8044846577498033,
 'eval_recall': 0.7444096182634731,
 'eval_runtime': 23.1998,
 'eval_samples_per_second': 19.914,
 'eval_steps_per_second': 5.0,
 'epoch': 10.0}

In [8]:
trainer.evaluate(eval_dataset=competition_test_dataset)



  0%|          | 0/75 [00:00<?, ?it/s]

{'eval_loss': 1.005687952041626,
 'eval_accuracy': 0.7859531772575251,
 'eval_f1_macro': 0.748792270531401,
 'eval_precision': 0.7837545337545337,
 'eval_recall': 0.7361029984423676,
 'eval_runtime': 15.3739,
 'eval_samples_per_second': 19.449,
 'eval_steps_per_second': 4.878,
 'epoch': 10.0}