In [1]:
import os 
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
os.chdir("../..")

from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
import evaluate
import numpy as np
from task1.config import ProjectPaths
import pandas as pd
import torch

paths = ProjectPaths()

# === 3. Set device ===
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"


# === 4. Load and preprocess data ===
def load_raw_df(path):
    df = pd.read_csv(path, sep='\t')
    df = df[df['label'].isin(['SUBJ', 'OBJ'])].copy()
    df['labels'] = df['label'].map({'OBJ': 0, 'SUBJ': 1})
    df = df[['sentence', 'labels']]
    return df

# Assuming paths.data_dir is a Path object pointing to the directory containing your language folders
langs = ["english", "arabic", "bulgarian", "italian", "german"]
aliases = ["en", "ar", "bg", "it", "de"]

all_dfs = []

for lang, alias in zip(langs, aliases):
    train_df = load_raw_df(paths.data_dir / lang / f"train_{alias}.tsv")
    train2_df   = load_raw_df(paths.data_dir / lang / f"dev_{alias}.tsv")
    train3_df  = load_raw_df(paths.data_dir / lang / f"dev_test_{alias}.tsv")
    # Add a column for language
    # Append all to a single list
    all_dfs.append(train_df)
    all_dfs.append(train2_df)
    all_dfs.append(train3_df)

# Concatenate all DataFrames into a single big DataFrame

train4_df = load_raw_df(paths.data_dir / "multilingual" / "dev_test_multilingual.tsv" )
train_df = pd.concat(all_dfs, ignore_index=True)

# If needed as a HuggingFace Dataset
train_dataset = Dataset.from_pandas(train_df)

# === 5. Tokenization ===
model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_fn(examples):
    return tokenizer(
        examples["sentence"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_fn, batched=True)
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# === 6. Load model and add LoRA ===
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    task_type=TaskType.SEQ_CLS,
    target_modules=["query_proj", "key_proj", "value_proj","dense"]
)

model = get_peft_model(model, lora_config).to(device)

# === 7. Define metrics ===
f1 = evaluate.load("f1")
accuracy = evaluate.load("accuracy")
recall = evaluate.load("recall")
precision = evaluate.load("precision")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
        "recall": recall.compute(predictions=preds, references=labels)["recall"],
        "precision": precision.compute(predictions=preds, references=labels)["precision"]
    }

# === 8. TrainingArguments ===
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=15,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
)

# === 9. Trainer ===
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    compute_metrics=compute_metrics,
    eval_dataset=train_dataset
)

# === 10. Train ===
trainer.train()

print("Training complete")





Map:   0%|          | 0/10941 [00:00<?, ? examples/s]

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
ml_test_df = load_raw_df(paths.data_dir / "multilingual" / "test_multilingual_labeled.tsv" )
ml_test_ds = Dataset.from_pandas(ml_test_df)
ml_test_ds = ml_test_ds.map(tokenize_fn, batched=True)
ml_test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

print("Evaluating multilingual")

# === 11. Evaluate on test set ===
test_results = trainer.evaluate(eval_dataset=ml_test_ds)
test_results

NameError: name 'tokenize_fn' is not defined

In [9]:
ukr_test_df = load_raw_df(paths.data_dir / "ukrainian" / "test_ukr_labeled.tsv" )
ukr_test_ds = Dataset.from_pandas(ukr_test_df)
ukr_test_ds = ukr_test_ds.map(tokenize_fn, batched=True)
ukr_test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
print("Evaluating zero-shot ukranian")

# === 11. Evaluate on test set ===
test_results = trainer.evaluate(eval_dataset=ukr_test_ds)
test_results

Map:   0%|          | 0/297 [00:00<?, ? examples/s]

Evaluating zero-shot ukranian


{'eval_loss': 1.3262032270431519,
 'eval_accuracy': 0.6902356902356902,
 'eval_f1_macro': 0.6182092555331992,
 'eval_recall': 0.48717948717948717,
 'eval_precision': 0.4222222222222222,
 'eval_runtime': 3.7657,
 'eval_samples_per_second': 78.87,
 'eval_steps_per_second': 19.917,
 'epoch': 15.0}

In [10]:
ro_test_df = load_raw_df(paths.data_dir / "romanian" / "test_ro_labeled.tsv" )
ro_test_ds = Dataset.from_pandas(ro_test_df)
ro_test_ds = ro_test_ds.map(tokenize_fn, batched=True)
ro_test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
print("Evaluating zero-shot romanian")

# === 11. Evaluate on test set ===
test_results = trainer.evaluate(eval_dataset=ro_test_ds)
test_results

Map:   0%|          | 0/206 [00:00<?, ? examples/s]

Evaluating zero-shot romanian


{'eval_loss': 0.403812974691391,
 'eval_accuracy': 0.8640776699029126,
 'eval_f1_macro': 0.8175838077166351,
 'eval_recall': 0.7115384615384616,
 'eval_precision': 0.74,
 'eval_runtime': 2.622,
 'eval_samples_per_second': 78.566,
 'eval_steps_per_second': 19.832,
 'epoch': 15.0}

In [11]:
pl_test_df = load_raw_df(paths.data_dir / "polish" / "test_pol_labeled.tsv" )
pl_test_ds = Dataset.from_pandas(pl_test_df)
pl_test_ds = pl_test_ds.map(tokenize_fn, batched=True)
pl_test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
print("Evaluating zero-shot polish")

# === 11. Evaluate on test set ===
test_results = trainer.evaluate(eval_dataset=pl_test_ds)
test_results

Map:   0%|          | 0/351 [00:00<?, ? examples/s]

Evaluating zero-shot polish


{'eval_loss': 1.7625617980957031,
 'eval_accuracy': 0.6353276353276354,
 'eval_f1_macro': 0.5469871743163669,
 'eval_recall': 0.2111801242236025,
 'eval_precision': 0.9714285714285714,
 'eval_runtime': 4.4333,
 'eval_samples_per_second': 79.173,
 'eval_steps_per_second': 19.85,
 'epoch': 15.0}

In [12]:
gk_test_df = load_raw_df(paths.data_dir / "greek" / "test_gr_labeled.tsv" )
gk_test_ds = Dataset.from_pandas(gk_test_df)
gk_test_ds = gk_test_ds.map(tokenize_fn, batched=True)
gk_test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
print("Evaluating zero-shot greek")

# === 11. Evaluate on test set ===
test_results = trainer.evaluate(eval_dataset=gk_test_ds)
test_results

Map:   0%|          | 0/282 [00:00<?, ? examples/s]

Evaluating zero-shot greek


{'eval_loss': 0.4134100079536438,
 'eval_accuracy': 0.8900709219858156,
 'eval_f1_macro': 0.793211117681845,
 'eval_recall': 0.6304347826086957,
 'eval_precision': 0.6744186046511628,
 'eval_runtime': 3.5617,
 'eval_samples_per_second': 79.175,
 'eval_steps_per_second': 19.934,
 'epoch': 15.0}