In [1]:
import os 

os.chdir("../..")

from datasets import Dataset, load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq, AutoModelForSequenceClassification
from peft import get_peft_model, LoraConfig, TaskType
import evaluate
import numpy as np
from task1.config import ProjectPaths
import pandas as pd
import torch

paths = ProjectPaths()

# === 3. Set device ===
device = "mps" if torch.backends.mps.is_available() else "cpu"

# === 4. Load and preprocess data ===
def load_datasets(path):
    df = pd.read_csv(path, sep='\t')
    df = df[df['label'].isin(['SUBJ', 'OBJ'])].copy()
    df['label'] = df['label'].map({'OBJ': 0, 'SUBJ': 1})
    df = df[['sentence', 'label']]
    return Dataset.from_pandas(df)

train_dataset = load_datasets(paths.german_data_dir / "train_de.tsv")
val_dataset   = load_datasets(paths.german_data_dir / "dev_de.tsv")
test_dataset  = load_datasets(paths.german_data_dir / "dev_test_de.tsv")

W0614 14:53:55.252000 27100 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
model_name = "microsoft/deberta-v3-base"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
def tokenize_fn(examples):
    return tokenizer(
        examples["sentence"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_fn, batched=True)
val_dataset = val_dataset.map(tokenize_fn, batched=True)
test_dataset = test_dataset.map(tokenize_fn, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
val_dataset = val_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/491 [00:00<?, ? examples/s]

Map:   0%|          | 0/224 [00:00<?, ? examples/s]

In [4]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    task_type=TaskType.SEQ_CLS,
    target_modules=["query_proj", "key_proj", "value_proj", "dense"] 
)

model = get_peft_model(model, lora_config).to(device)

In [6]:
f1 = evaluate.load("f1")
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
        "precision": precision.compute(predictions=preds, references=labels, average="macro")["precision"],
        "recall": recall.compute(predictions=preds, references=labels, average="macro")["recall"],
    }

In [None]:
# === 8. TrainingArguments ===
training_args = TrainingArguments(
    output_dir="./results/german-lora",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
)

In [7]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

# === 10. Train ===
trainer.train()

# === 11. Evaluate on test set ===
trainer.evaluate(eval_dataset=test_dataset)

  0%|          | 0/2000 [00:00<?, ?it/s]



{'loss': 0.698, 'grad_norm': 1.0093352794647217, 'learning_rate': 4.975e-05, 'epoch': 0.05}
{'loss': 0.6847, 'grad_norm': 0.8912144899368286, 'learning_rate': 4.9500000000000004e-05, 'epoch': 0.1}
{'loss': 0.6779, 'grad_norm': 4.017972469329834, 'learning_rate': 4.9250000000000004e-05, 'epoch': 0.15}
{'loss': 0.6861, 'grad_norm': 2.537973165512085, 'learning_rate': 4.9e-05, 'epoch': 0.2}
{'loss': 0.6901, 'grad_norm': 1.059810996055603, 'learning_rate': 4.875e-05, 'epoch': 0.25}
{'loss': 0.6856, 'grad_norm': 1.1726713180541992, 'learning_rate': 4.85e-05, 'epoch': 0.3}
{'loss': 0.6725, 'grad_norm': 1.6457667350769043, 'learning_rate': 4.825e-05, 'epoch': 0.35}
{'loss': 0.6794, 'grad_norm': 1.1387596130371094, 'learning_rate': 4.8e-05, 'epoch': 0.4}
{'loss': 0.6957, 'grad_norm': 1.1148936748504639, 'learning_rate': 4.775e-05, 'epoch': 0.45}
{'loss': 0.675, 'grad_norm': 3.504206418991089, 'learning_rate': 4.75e-05, 'epoch': 0.5}
{'loss': 0.68, 'grad_norm': 1.607285499572754, 'learning_rate

  0%|          | 0/123 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


{'eval_loss': 0.6099659204483032, 'eval_accuracy': 0.6456211812627292, 'eval_f1_macro': 0.39232673267326734, 'eval_precision': 0.3228105906313646, 'eval_recall': 0.5, 'eval_runtime': 34.4583, 'eval_samples_per_second': 14.249, 'eval_steps_per_second': 3.57, 'epoch': 1.0}




{'loss': 0.6153, 'grad_norm': 2.0795321464538574, 'learning_rate': 4.4750000000000004e-05, 'epoch': 1.05}
{'loss': 0.6246, 'grad_norm': 5.234572887420654, 'learning_rate': 4.4500000000000004e-05, 'epoch': 1.1}
{'loss': 0.5835, 'grad_norm': 6.685357093811035, 'learning_rate': 4.4250000000000005e-05, 'epoch': 1.15}
{'loss': 0.5483, 'grad_norm': 2.2409069538116455, 'learning_rate': 4.4000000000000006e-05, 'epoch': 1.2}
{'loss': 0.7534, 'grad_norm': 3.4226462841033936, 'learning_rate': 4.375e-05, 'epoch': 1.25}
{'loss': 0.5258, 'grad_norm': 2.0268568992614746, 'learning_rate': 4.35e-05, 'epoch': 1.3}
{'loss': 0.4316, 'grad_norm': 3.295642375946045, 'learning_rate': 4.325e-05, 'epoch': 1.35}
{'loss': 0.5612, 'grad_norm': 6.478327751159668, 'learning_rate': 4.3e-05, 'epoch': 1.4}
{'loss': 0.47, 'grad_norm': 2.8996994495391846, 'learning_rate': 4.275e-05, 'epoch': 1.45}
{'loss': 0.4762, 'grad_norm': 2.0921897888183594, 'learning_rate': 4.25e-05, 'epoch': 1.5}
{'loss': 0.5978, 'grad_norm': 1.5

  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 0.5724183320999146, 'eval_accuracy': 0.790224032586558, 'eval_f1_macro': 0.7550766399496331, 'eval_precision': 0.784107294317218, 'eval_recall': 0.7429112730700895, 'eval_runtime': 34.4036, 'eval_samples_per_second': 14.272, 'eval_steps_per_second': 3.575, 'epoch': 2.0}




{'loss': 0.6848, 'grad_norm': 6.422117710113525, 'learning_rate': 3.9750000000000004e-05, 'epoch': 2.05}
{'loss': 0.5914, 'grad_norm': 5.8491621017456055, 'learning_rate': 3.9500000000000005e-05, 'epoch': 2.1}
{'loss': 0.5321, 'grad_norm': 4.48037576675415, 'learning_rate': 3.9250000000000005e-05, 'epoch': 2.15}
{'loss': 0.4377, 'grad_norm': 6.738595008850098, 'learning_rate': 3.9000000000000006e-05, 'epoch': 2.2}
{'loss': 0.5727, 'grad_norm': 8.040727615356445, 'learning_rate': 3.875e-05, 'epoch': 2.25}
{'loss': 0.3754, 'grad_norm': 3.41043758392334, 'learning_rate': 3.85e-05, 'epoch': 2.3}
{'loss': 0.3021, 'grad_norm': 6.2868733406066895, 'learning_rate': 3.825e-05, 'epoch': 2.35}
{'loss': 0.3866, 'grad_norm': 0.870008111000061, 'learning_rate': 3.8e-05, 'epoch': 2.4}
{'loss': 0.4233, 'grad_norm': 8.270454406738281, 'learning_rate': 3.775e-05, 'epoch': 2.45}
{'loss': 0.292, 'grad_norm': 0.5077714920043945, 'learning_rate': 3.7500000000000003e-05, 'epoch': 2.5}
{'loss': 0.6156, 'grad_

  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 0.5591015219688416, 'eval_accuracy': 0.7942973523421588, 'eval_f1_macro': 0.7606900711786705, 'eval_precision': 0.7880560339395977, 'eval_recall': 0.7486583995068712, 'eval_runtime': 33.7568, 'eval_samples_per_second': 14.545, 'eval_steps_per_second': 3.644, 'epoch': 3.0}




{'loss': 0.3755, 'grad_norm': 11.864304542541504, 'learning_rate': 3.475e-05, 'epoch': 3.05}
{'loss': 0.6366, 'grad_norm': 1.7812657356262207, 'learning_rate': 3.45e-05, 'epoch': 3.1}
{'loss': 0.2009, 'grad_norm': 6.208560466766357, 'learning_rate': 3.4250000000000006e-05, 'epoch': 3.15}
{'loss': 0.4098, 'grad_norm': 1.1339534521102905, 'learning_rate': 3.4000000000000007e-05, 'epoch': 3.2}
{'loss': 0.3046, 'grad_norm': 1.8611352443695068, 'learning_rate': 3.375000000000001e-05, 'epoch': 3.25}
{'loss': 0.278, 'grad_norm': 2.419081926345825, 'learning_rate': 3.35e-05, 'epoch': 3.3}
{'loss': 0.3122, 'grad_norm': 12.009300231933594, 'learning_rate': 3.325e-05, 'epoch': 3.35}
{'loss': 0.5878, 'grad_norm': 6.370048999786377, 'learning_rate': 3.3e-05, 'epoch': 3.4}
{'loss': 0.3893, 'grad_norm': 57.58097839355469, 'learning_rate': 3.275e-05, 'epoch': 3.45}
{'loss': 0.4884, 'grad_norm': 5.851184368133545, 'learning_rate': 3.2500000000000004e-05, 'epoch': 3.5}
{'loss': 0.4942, 'grad_norm': 5.62

  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 0.6268304586410522, 'eval_accuracy': 0.8105906313645621, 'eval_f1_macro': 0.7916615338288233, 'eval_precision': 0.7937355286853615, 'eval_recall': 0.7897947713840241, 'eval_runtime': 33.4253, 'eval_samples_per_second': 14.689, 'eval_steps_per_second': 3.68, 'epoch': 4.0}




{'loss': 0.3204, 'grad_norm': 10.230814933776855, 'learning_rate': 2.975e-05, 'epoch': 4.05}
{'loss': 0.2886, 'grad_norm': 0.5215737223625183, 'learning_rate': 2.95e-05, 'epoch': 4.1}
{'loss': 0.3885, 'grad_norm': 19.768505096435547, 'learning_rate': 2.925e-05, 'epoch': 4.15}
{'loss': 0.4613, 'grad_norm': 80.20777130126953, 'learning_rate': 2.9e-05, 'epoch': 4.2}
{'loss': 0.3234, 'grad_norm': 2.183642864227295, 'learning_rate': 2.8749999999999997e-05, 'epoch': 4.25}
{'loss': 0.6794, 'grad_norm': 0.24566130340099335, 'learning_rate': 2.8499999999999998e-05, 'epoch': 4.3}
{'loss': 0.3668, 'grad_norm': 0.22714310884475708, 'learning_rate': 2.825e-05, 'epoch': 4.35}
{'loss': 0.3986, 'grad_norm': 23.453969955444336, 'learning_rate': 2.8000000000000003e-05, 'epoch': 4.4}
{'loss': 0.2513, 'grad_norm': 8.12495231628418, 'learning_rate': 2.7750000000000004e-05, 'epoch': 4.45}
{'loss': 0.6551, 'grad_norm': 0.5264950394630432, 'learning_rate': 2.7500000000000004e-05, 'epoch': 4.5}
{'loss': 0.3314

  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 0.8309823870658875, 'eval_accuracy': 0.8044806517311609, 'eval_f1_macro': 0.7840571742715778, 'eval_precision': 0.7874698795180723, 'eval_recall': 0.7811740817288517, 'eval_runtime': 34.0473, 'eval_samples_per_second': 14.421, 'eval_steps_per_second': 3.613, 'epoch': 5.0}




{'loss': 0.3091, 'grad_norm': 3.64620304107666, 'learning_rate': 2.4750000000000002e-05, 'epoch': 5.05}
{'loss': 0.8129, 'grad_norm': 70.44954681396484, 'learning_rate': 2.45e-05, 'epoch': 5.1}
{'loss': 0.6313, 'grad_norm': 19.144224166870117, 'learning_rate': 2.425e-05, 'epoch': 5.15}
{'loss': 0.2371, 'grad_norm': 0.2112981379032135, 'learning_rate': 2.4e-05, 'epoch': 5.2}
{'loss': 0.5429, 'grad_norm': 38.014671325683594, 'learning_rate': 2.375e-05, 'epoch': 5.25}
{'loss': 0.4821, 'grad_norm': 0.14207525551319122, 'learning_rate': 2.35e-05, 'epoch': 5.3}
{'loss': 0.0869, 'grad_norm': 0.35188719630241394, 'learning_rate': 2.3250000000000003e-05, 'epoch': 5.35}
{'loss': 0.2992, 'grad_norm': 0.3308786153793335, 'learning_rate': 2.3000000000000003e-05, 'epoch': 5.4}
{'loss': 0.2581, 'grad_norm': 4.784211158752441, 'learning_rate': 2.275e-05, 'epoch': 5.45}
{'loss': 0.2344, 'grad_norm': 24.362953186035156, 'learning_rate': 2.25e-05, 'epoch': 5.5}
{'loss': 0.5609, 'grad_norm': 0.11162679642

  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 0.8500701785087585, 'eval_accuracy': 0.7983706720977597, 'eval_f1_macro': 0.772349841938883, 'eval_precision': 0.7844663030775224, 'eval_recall': 0.7647757351608108, 'eval_runtime': 34.1442, 'eval_samples_per_second': 14.38, 'eval_steps_per_second': 3.602, 'epoch': 6.0}




{'loss': 0.5457, 'grad_norm': 0.2998816668987274, 'learning_rate': 1.9750000000000002e-05, 'epoch': 6.05}
{'loss': 0.3119, 'grad_norm': 50.33544158935547, 'learning_rate': 1.9500000000000003e-05, 'epoch': 6.1}
{'loss': 0.1656, 'grad_norm': 0.1412527859210968, 'learning_rate': 1.925e-05, 'epoch': 6.15}
{'loss': 0.4591, 'grad_norm': 8.897539138793945, 'learning_rate': 1.9e-05, 'epoch': 6.2}
{'loss': 0.2548, 'grad_norm': 0.13835477828979492, 'learning_rate': 1.8750000000000002e-05, 'epoch': 6.25}
{'loss': 0.3071, 'grad_norm': 8.795724868774414, 'learning_rate': 1.85e-05, 'epoch': 6.3}
{'loss': 0.1292, 'grad_norm': 0.2912594676017761, 'learning_rate': 1.825e-05, 'epoch': 6.35}
{'loss': 0.229, 'grad_norm': 0.1426173895597458, 'learning_rate': 1.8e-05, 'epoch': 6.4}
{'loss': 0.1916, 'grad_norm': 4.958477973937988, 'learning_rate': 1.775e-05, 'epoch': 6.45}
{'loss': 0.3501, 'grad_norm': 0.9313207268714905, 'learning_rate': 1.75e-05, 'epoch': 6.5}
{'loss': 0.4213, 'grad_norm': 0.13779520988464

  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 0.9580276012420654, 'eval_accuracy': 0.7820773930753564, 'eval_f1_macro': 0.7687194319347412, 'eval_precision': 0.763965374819302, 'eval_recall': 0.7780829616737373, 'eval_runtime': 33.3847, 'eval_samples_per_second': 14.707, 'eval_steps_per_second': 3.684, 'epoch': 7.0}




{'loss': 0.2533, 'grad_norm': 0.23080196976661682, 'learning_rate': 1.475e-05, 'epoch': 7.05}
{'loss': 0.294, 'grad_norm': 0.10749437659978867, 'learning_rate': 1.45e-05, 'epoch': 7.1}
{'loss': 0.1227, 'grad_norm': 1.7650617361068726, 'learning_rate': 1.4249999999999999e-05, 'epoch': 7.15}
{'loss': 0.4779, 'grad_norm': 47.398284912109375, 'learning_rate': 1.4000000000000001e-05, 'epoch': 7.2}
{'loss': 0.1262, 'grad_norm': 0.15706726908683777, 'learning_rate': 1.3750000000000002e-05, 'epoch': 7.25}
{'loss': 0.0346, 'grad_norm': 0.19711123406887054, 'learning_rate': 1.3500000000000001e-05, 'epoch': 7.3}
{'loss': 0.1444, 'grad_norm': 0.10522834956645966, 'learning_rate': 1.3250000000000002e-05, 'epoch': 7.35}
{'loss': 0.4961, 'grad_norm': 0.09169311076402664, 'learning_rate': 1.3000000000000001e-05, 'epoch': 7.4}
{'loss': 0.2951, 'grad_norm': 15.649847030639648, 'learning_rate': 1.2750000000000002e-05, 'epoch': 7.45}
{'loss': 0.2575, 'grad_norm': 0.16833791136741638, 'learning_rate': 1.25

  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 1.0785268545150757, 'eval_accuracy': 0.7780040733197556, 'eval_f1_macro': 0.7675764879595248, 'eval_precision': 0.7633414395244368, 'eval_recall': 0.7827060444541136, 'eval_runtime': 34.2359, 'eval_samples_per_second': 14.342, 'eval_steps_per_second': 3.593, 'epoch': 8.0}




{'loss': 0.152, 'grad_norm': 16.217744827270508, 'learning_rate': 9.750000000000002e-06, 'epoch': 8.05}
{'loss': 0.2218, 'grad_norm': 71.45085906982422, 'learning_rate': 9.5e-06, 'epoch': 8.1}
{'loss': 0.0808, 'grad_norm': 0.1747199296951294, 'learning_rate': 9.25e-06, 'epoch': 8.15}
{'loss': 0.1914, 'grad_norm': 0.09392975270748138, 'learning_rate': 9e-06, 'epoch': 8.2}
{'loss': 0.027, 'grad_norm': 2.2244338989257812, 'learning_rate': 8.75e-06, 'epoch': 8.25}
{'loss': 0.4031, 'grad_norm': 0.10591121762990952, 'learning_rate': 8.500000000000002e-06, 'epoch': 8.3}
{'loss': 0.2664, 'grad_norm': 12.813729286193848, 'learning_rate': 8.25e-06, 'epoch': 8.35}
{'loss': 0.3007, 'grad_norm': 29.248395919799805, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.4}
{'loss': 0.1531, 'grad_norm': 0.14035263657569885, 'learning_rate': 7.75e-06, 'epoch': 8.45}
{'loss': 0.4997, 'grad_norm': 36.45167922973633, 'learning_rate': 7.5e-06, 'epoch': 8.5}
{'loss': 0.135, 'grad_norm': 0.06193707510828972, 'l

  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 1.0276789665222168, 'eval_accuracy': 0.780040733197556, 'eval_f1_macro': 0.7663141195134849, 'eval_precision': 0.7616609783845278, 'eval_recall': 0.7752093984553464, 'eval_runtime': 34.8589, 'eval_samples_per_second': 14.085, 'eval_steps_per_second': 3.529, 'epoch': 9.0}




{'loss': 0.0113, 'grad_norm': 2.2192533016204834, 'learning_rate': 4.75e-06, 'epoch': 9.05}
{'loss': 0.2576, 'grad_norm': 0.15804658830165863, 'learning_rate': 4.5e-06, 'epoch': 9.1}
{'loss': 0.4138, 'grad_norm': 0.27065786719322205, 'learning_rate': 4.250000000000001e-06, 'epoch': 9.15}
{'loss': 0.3153, 'grad_norm': 0.9093875288963318, 'learning_rate': 4.000000000000001e-06, 'epoch': 9.2}
{'loss': 0.3247, 'grad_norm': 63.24653625488281, 'learning_rate': 3.75e-06, 'epoch': 9.25}
{'loss': 0.2971, 'grad_norm': 0.4986174404621124, 'learning_rate': 3.5000000000000004e-06, 'epoch': 9.3}
{'loss': 0.1393, 'grad_norm': 0.0940680131316185, 'learning_rate': 3.2500000000000002e-06, 'epoch': 9.35}
{'loss': 0.5426, 'grad_norm': 38.61668395996094, 'learning_rate': 3e-06, 'epoch': 9.4}
{'loss': 0.0506, 'grad_norm': 0.10592406988143921, 'learning_rate': 2.7500000000000004e-06, 'epoch': 9.45}
{'loss': 0.3521, 'grad_norm': 0.33213943243026733, 'learning_rate': 2.5e-06, 'epoch': 9.5}
{'loss': 0.2169, 'gr

  0%|          | 0/123 [00:00<?, ?it/s]

{'eval_loss': 1.0055826902389526, 'eval_accuracy': 0.7861507128309573, 'eval_f1_macro': 0.7710752425567815, 'eval_precision': 0.7669808027923212, 'eval_recall': 0.7773487073497951, 'eval_runtime': 34.2975, 'eval_samples_per_second': 14.316, 'eval_steps_per_second': 3.586, 'epoch': 10.0}




{'train_runtime': 3138.4817, 'train_samples_per_second': 2.549, 'train_steps_per_second': 0.637, 'train_loss': 0.40653089388459923, 'epoch': 10.0}




  0%|          | 0/56 [00:00<?, ?it/s]

{'eval_loss': 0.49777624011039734,
 'eval_accuracy': 0.8392857142857143,
 'eval_f1_macro': 0.8183783783783785,
 'eval_precision': 0.8126159554730983,
 'eval_recall': 0.8257387462027064,
 'eval_runtime': 15.5094,
 'eval_samples_per_second': 14.443,
 'eval_steps_per_second': 3.611,
 'epoch': 10.0}