In [1]:
import os 

os.chdir("../..")

from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq, AutoModelForSequenceClassification
from peft import get_peft_model, LoraConfig, TaskType
import evaluate
import numpy as np
from task1.config import ProjectPaths
import pandas as pd
import torch

paths = ProjectPaths()

# === 3. Set device ===
device = "mps" if torch.backends.mps.is_available() else "cpu"

# === 4. Load and preprocess data ===
def load_dataset(path):
    df = pd.read_csv(path, sep='\t')
    df = df[df['label'].isin(['SUBJ', 'OBJ'])].copy()
    df['label'] = df['label'].map({'OBJ': 0, 'SUBJ': 1})
    df = df[['sentence', 'label']]
    return Dataset.from_pandas(df)

train_dataset = load_dataset(paths.arabic_data_dir / "train_ar.tsv")
val_dataset   = load_dataset(paths.arabic_data_dir / "dev_ar.tsv")
test_dataset  = load_dataset(paths.arabic_data_dir / "dev_test_ar.tsv")
competition_test_dataset = load_dataset(paths.arabic_data_dir / "test_ar_labeled.tsv")

W0615 18:59:50.296000 26588 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
# === 5. Tokenization ===
model_name = "CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_fn(examples):
    return tokenizer(
        examples["sentence"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_fn, batched=True)
val_dataset = val_dataset.map(tokenize_fn, batched=True)
test_dataset = test_dataset.map(tokenize_fn, batched=True)
competition_test_dataset = competition_test_dataset.map(tokenize_fn, batched=True)


train_dataset = train_dataset.rename_column("label", "labels")
val_dataset = val_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")
competition_test_dataset = competition_test_dataset.rename_column("label", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
competition_test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/2446 [00:00<?, ? examples/s]

Map:   0%|          | 0/467 [00:00<?, ? examples/s]

Map:   0%|          | 0/748 [00:00<?, ? examples/s]

Map:   0%|          | 0/1036 [00:00<?, ? examples/s]

In [3]:
# === 7. Define metrics ===
f1 = evaluate.load("f1")
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
        "precision": precision.compute(predictions=preds, references=labels, average="macro")["precision"],
        "recall": recall.compute(predictions=preds, references=labels, average="macro")["recall"],
    }

In [None]:
# === 6. Load model and add LoRA ===

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, ignore_mismatched_sizes=True )

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    task_type=TaskType.SEQ_CLS,
    target_modules=["query", "key", "value"]  
)

model = get_peft_model(model, lora_config).to(device)


# === 8. TrainingArguments ===
training_args = TrainingArguments(
    output_dir="./results/arabic_lora",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# === 9. Trainer ===
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# === 10. Train ===
trainer.train()

# === 11. Evaluate on test set ===
trainer.evaluate(eval_dataset=test_dataset)

  0%|          | 0/6120 [00:00<?, ?it/s]



{'loss': 0.7509, 'grad_norm': 3.164447069168091, 'learning_rate': 4.991830065359477e-05, 'epoch': 0.02}
{'loss': 0.6906, 'grad_norm': 4.1256327629089355, 'learning_rate': 4.983660130718955e-05, 'epoch': 0.03}
{'loss': 0.7275, 'grad_norm': 5.46102237701416, 'learning_rate': 4.975490196078432e-05, 'epoch': 0.05}
{'loss': 0.7454, 'grad_norm': 5.023782253265381, 'learning_rate': 4.967320261437909e-05, 'epoch': 0.07}
{'loss': 0.6569, 'grad_norm': 8.242788314819336, 'learning_rate': 4.959150326797386e-05, 'epoch': 0.08}
{'loss': 0.7101, 'grad_norm': 4.897618770599365, 'learning_rate': 4.9509803921568634e-05, 'epoch': 0.1}
{'loss': 0.7061, 'grad_norm': 3.834735155105591, 'learning_rate': 4.9428104575163404e-05, 'epoch': 0.11}
{'loss': 0.7137, 'grad_norm': 7.145719051361084, 'learning_rate': 4.9346405228758174e-05, 'epoch': 0.13}
{'loss': 0.6853, 'grad_norm': 3.5212061405181885, 'learning_rate': 4.9264705882352944e-05, 'epoch': 0.15}
{'loss': 0.6901, 'grad_norm': 3.950662851333618, 'learning_r

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.6790615320205688, 'eval_accuracy': 0.5546038543897216, 'eval_f1_macro': 0.43761000463177396, 'eval_precision': 0.5020052866648437, 'eval_recall': 0.5008229529046496, 'eval_runtime': 21.3759, 'eval_samples_per_second': 21.847, 'eval_steps_per_second': 5.473, 'epoch': 1.0}




{'loss': 0.7051, 'grad_norm': 5.968447208404541, 'learning_rate': 4.493464052287582e-05, 'epoch': 1.01}
{'loss': 0.6874, 'grad_norm': 4.307218074798584, 'learning_rate': 4.485294117647059e-05, 'epoch': 1.03}
{'loss': 0.6725, 'grad_norm': 4.584897041320801, 'learning_rate': 4.477124183006536e-05, 'epoch': 1.05}
{'loss': 0.6899, 'grad_norm': 4.60283899307251, 'learning_rate': 4.468954248366014e-05, 'epoch': 1.06}
{'loss': 0.6198, 'grad_norm': 8.168156623840332, 'learning_rate': 4.460784313725491e-05, 'epoch': 1.08}
{'loss': 0.7014, 'grad_norm': 7.459867000579834, 'learning_rate': 4.452614379084967e-05, 'epoch': 1.09}
{'loss': 0.7086, 'grad_norm': 7.400516986846924, 'learning_rate': 4.4444444444444447e-05, 'epoch': 1.11}
{'loss': 0.6379, 'grad_norm': 4.418552875518799, 'learning_rate': 4.4362745098039216e-05, 'epoch': 1.13}
{'loss': 0.6621, 'grad_norm': 7.265684127807617, 'learning_rate': 4.4281045751633986e-05, 'epoch': 1.14}
{'loss': 0.7312, 'grad_norm': 3.205720901489258, 'learning_rat

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.6807137727737427, 'eval_accuracy': 0.5674518201284796, 'eval_f1_macro': 0.4738174921909861, 'eval_precision': 0.5365778166193286, 'eval_recall': 0.5187876407436502, 'eval_runtime': 21.0262, 'eval_samples_per_second': 22.21, 'eval_steps_per_second': 5.564, 'epoch': 2.0}




{'loss': 0.6778, 'grad_norm': 7.523370742797852, 'learning_rate': 3.9950980392156864e-05, 'epoch': 2.01}
{'loss': 0.7227, 'grad_norm': 8.627387046813965, 'learning_rate': 3.986928104575164e-05, 'epoch': 2.03}
{'loss': 0.6867, 'grad_norm': 2.6529695987701416, 'learning_rate': 3.97875816993464e-05, 'epoch': 2.04}
{'loss': 0.7021, 'grad_norm': 4.787527084350586, 'learning_rate': 3.970588235294117e-05, 'epoch': 2.06}
{'loss': 0.6774, 'grad_norm': 3.0096983909606934, 'learning_rate': 3.962418300653595e-05, 'epoch': 2.08}
{'loss': 0.6382, 'grad_norm': 6.428757667541504, 'learning_rate': 3.954248366013072e-05, 'epoch': 2.09}
{'loss': 0.6906, 'grad_norm': 4.863439083099365, 'learning_rate': 3.946078431372549e-05, 'epoch': 2.11}
{'loss': 0.7138, 'grad_norm': 5.079254627227783, 'learning_rate': 3.9379084967320266e-05, 'epoch': 2.12}
{'loss': 0.7819, 'grad_norm': 4.813907623291016, 'learning_rate': 3.9297385620915035e-05, 'epoch': 2.14}
{'loss': 0.6727, 'grad_norm': 7.999652862548828, 'learning_r

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.693486750125885, 'eval_accuracy': 0.5653104925053534, 'eval_f1_macro': 0.4746499088405291, 'eval_precision': 0.5329289732770746, 'eval_recall': 0.5175158044364643, 'eval_runtime': 21.3432, 'eval_samples_per_second': 21.881, 'eval_steps_per_second': 5.482, 'epoch': 3.0}




{'loss': 0.5619, 'grad_norm': 5.293996334075928, 'learning_rate': 3.4967320261437906e-05, 'epoch': 3.01}
{'loss': 0.6465, 'grad_norm': 6.7464189529418945, 'learning_rate': 3.488562091503268e-05, 'epoch': 3.02}
{'loss': 0.7245, 'grad_norm': 9.954477310180664, 'learning_rate': 3.480392156862745e-05, 'epoch': 3.04}
{'loss': 0.6452, 'grad_norm': 3.194906234741211, 'learning_rate': 3.472222222222222e-05, 'epoch': 3.06}
{'loss': 0.699, 'grad_norm': 3.842905044555664, 'learning_rate': 3.464052287581699e-05, 'epoch': 3.07}
{'loss': 0.7168, 'grad_norm': 7.275867938995361, 'learning_rate': 3.455882352941177e-05, 'epoch': 3.09}
{'loss': 0.6152, 'grad_norm': 4.3054351806640625, 'learning_rate': 3.447712418300654e-05, 'epoch': 3.1}
{'loss': 0.6595, 'grad_norm': 2.889285087585449, 'learning_rate': 3.439542483660131e-05, 'epoch': 3.12}
{'loss': 0.6717, 'grad_norm': 6.458240985870361, 'learning_rate': 3.431372549019608e-05, 'epoch': 3.14}
{'loss': 0.6536, 'grad_norm': 3.5540897846221924, 'learning_rat

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.6821533441543579, 'eval_accuracy': 0.5781584582441114, 'eval_f1_macro': 0.5595457526822519, 'eval_precision': 0.5639631610219845, 'eval_recall': 0.5604028728537762, 'eval_runtime': 21.5317, 'eval_samples_per_second': 21.689, 'eval_steps_per_second': 5.434, 'epoch': 4.0}




{'loss': 0.6425, 'grad_norm': 3.6672627925872803, 'learning_rate': 2.9983660130718955e-05, 'epoch': 4.0}
{'loss': 0.6453, 'grad_norm': 5.392911434173584, 'learning_rate': 2.9901960784313725e-05, 'epoch': 4.02}
{'loss': 0.6198, 'grad_norm': 3.051546096801758, 'learning_rate': 2.9820261437908498e-05, 'epoch': 4.04}
{'loss': 0.6464, 'grad_norm': 4.175714492797852, 'learning_rate': 2.9738562091503268e-05, 'epoch': 4.05}
{'loss': 0.6556, 'grad_norm': 5.5802459716796875, 'learning_rate': 2.965686274509804e-05, 'epoch': 4.07}
{'loss': 0.6627, 'grad_norm': 5.699153423309326, 'learning_rate': 2.957516339869281e-05, 'epoch': 4.08}
{'loss': 0.6861, 'grad_norm': 3.7744593620300293, 'learning_rate': 2.9493464052287584e-05, 'epoch': 4.1}
{'loss': 0.605, 'grad_norm': 3.8415467739105225, 'learning_rate': 2.9411764705882354e-05, 'epoch': 4.12}
{'loss': 0.6036, 'grad_norm': 2.4305689334869385, 'learning_rate': 2.9330065359477127e-05, 'epoch': 4.13}
{'loss': 0.5962, 'grad_norm': 3.036693572998047, 'learn

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.6951653361320496, 'eval_accuracy': 0.5674518201284796, 'eval_f1_macro': 0.5169493261234689, 'eval_precision': 0.5432806324110673, 'eval_recall': 0.5327684883851419, 'eval_runtime': 20.306, 'eval_samples_per_second': 22.998, 'eval_steps_per_second': 5.762, 'epoch': 5.0}




{'loss': 0.7144, 'grad_norm': 6.309552192687988, 'learning_rate': 2.4918300653594774e-05, 'epoch': 5.02}
{'loss': 0.5726, 'grad_norm': 5.911734104156494, 'learning_rate': 2.4836601307189544e-05, 'epoch': 5.03}
{'loss': 0.594, 'grad_norm': 5.559919357299805, 'learning_rate': 2.4754901960784317e-05, 'epoch': 5.05}
{'loss': 0.6987, 'grad_norm': 8.57082462310791, 'learning_rate': 2.4673202614379087e-05, 'epoch': 5.07}
{'loss': 0.8372, 'grad_norm': 2.5112838745117188, 'learning_rate': 2.4591503267973857e-05, 'epoch': 5.08}
{'loss': 0.7081, 'grad_norm': 3.4107468128204346, 'learning_rate': 2.4509803921568626e-05, 'epoch': 5.1}
{'loss': 0.7661, 'grad_norm': 8.572282791137695, 'learning_rate': 2.44281045751634e-05, 'epoch': 5.11}
{'loss': 0.6249, 'grad_norm': 5.360421657562256, 'learning_rate': 2.434640522875817e-05, 'epoch': 5.13}
{'loss': 0.6494, 'grad_norm': 4.284732818603516, 'learning_rate': 2.4264705882352942e-05, 'epoch': 5.15}
{'loss': 0.6185, 'grad_norm': 3.6587791442871094, 'learning

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.6982872486114502, 'eval_accuracy': 0.588865096359743, 'eval_f1_macro': 0.55821836815136, 'eval_precision': 0.5728330311663645, 'eval_recall': 0.5631148767440991, 'eval_runtime': 21.4325, 'eval_samples_per_second': 21.789, 'eval_steps_per_second': 5.459, 'epoch': 6.0}




{'loss': 0.6873, 'grad_norm': 2.3993728160858154, 'learning_rate': 1.993464052287582e-05, 'epoch': 6.01}
{'loss': 0.663, 'grad_norm': 7.096456527709961, 'learning_rate': 1.9852941176470586e-05, 'epoch': 6.03}
{'loss': 0.718, 'grad_norm': 8.997204780578613, 'learning_rate': 1.977124183006536e-05, 'epoch': 6.05}
{'loss': 0.6888, 'grad_norm': 5.602980136871338, 'learning_rate': 1.9689542483660133e-05, 'epoch': 6.06}
{'loss': 0.6231, 'grad_norm': 4.094402313232422, 'learning_rate': 1.9607843137254903e-05, 'epoch': 6.08}
{'loss': 0.6525, 'grad_norm': 3.6936538219451904, 'learning_rate': 1.9526143790849676e-05, 'epoch': 6.09}
{'loss': 0.6294, 'grad_norm': 4.135326385498047, 'learning_rate': 1.9444444444444445e-05, 'epoch': 6.11}
{'loss': 0.6151, 'grad_norm': 3.9150164127349854, 'learning_rate': 1.936274509803922e-05, 'epoch': 6.13}
{'loss': 0.6568, 'grad_norm': 6.210345268249512, 'learning_rate': 1.9281045751633988e-05, 'epoch': 6.14}
{'loss': 0.63, 'grad_norm': 5.903650283813477, 'learning_

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.7021394968032837, 'eval_accuracy': 0.5802997858672377, 'eval_f1_macro': 0.565919265498141, 'eval_precision': 0.5680948885368775, 'eval_recall': 0.5659297497475031, 'eval_runtime': 20.5097, 'eval_samples_per_second': 22.77, 'eval_steps_per_second': 5.705, 'epoch': 7.0}




{'loss': 0.6804, 'grad_norm': 4.649173736572266, 'learning_rate': 1.4950980392156863e-05, 'epoch': 7.01}
{'loss': 0.5993, 'grad_norm': 5.461357593536377, 'learning_rate': 1.4869281045751634e-05, 'epoch': 7.03}
{'loss': 0.6212, 'grad_norm': 5.992237567901611, 'learning_rate': 1.4787581699346405e-05, 'epoch': 7.04}
{'loss': 0.6022, 'grad_norm': 4.183780670166016, 'learning_rate': 1.4705882352941177e-05, 'epoch': 7.06}
{'loss': 0.6946, 'grad_norm': 6.794656753540039, 'learning_rate': 1.462418300653595e-05, 'epoch': 7.08}
{'loss': 0.6494, 'grad_norm': 3.6167409420013428, 'learning_rate': 1.4542483660130721e-05, 'epoch': 7.09}
{'loss': 0.6122, 'grad_norm': 5.17732048034668, 'learning_rate': 1.4460784313725493e-05, 'epoch': 7.11}
{'loss': 0.6728, 'grad_norm': 5.986691474914551, 'learning_rate': 1.4379084967320261e-05, 'epoch': 7.12}
{'loss': 0.6129, 'grad_norm': 4.087143421173096, 'learning_rate': 1.4297385620915032e-05, 'epoch': 7.14}
{'loss': 0.6341, 'grad_norm': 8.021775245666504, 'learni

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.7087445259094238, 'eval_accuracy': 0.576017130620985, 'eval_f1_macro': 0.5585084033613446, 'eval_precision': 0.5621583893673077, 'eval_recall': 0.5591310365465904, 'eval_runtime': 20.7284, 'eval_samples_per_second': 22.529, 'eval_steps_per_second': 5.644, 'epoch': 8.0}




{'loss': 0.6423, 'grad_norm': 3.138932943344116, 'learning_rate': 9.96732026143791e-06, 'epoch': 8.01}
{'loss': 0.8431, 'grad_norm': 8.598648071289062, 'learning_rate': 9.88562091503268e-06, 'epoch': 8.02}
{'loss': 0.6236, 'grad_norm': 6.1877336502075195, 'learning_rate': 9.803921568627451e-06, 'epoch': 8.04}
{'loss': 0.609, 'grad_norm': 3.8524911403656006, 'learning_rate': 9.722222222222223e-06, 'epoch': 8.06}
{'loss': 0.5626, 'grad_norm': 2.7503130435943604, 'learning_rate': 9.640522875816994e-06, 'epoch': 8.07}
{'loss': 0.5504, 'grad_norm': 4.39495325088501, 'learning_rate': 9.558823529411764e-06, 'epoch': 8.09}
{'loss': 0.7196, 'grad_norm': 5.505363464355469, 'learning_rate': 9.477124183006535e-06, 'epoch': 8.1}
{'loss': 0.6223, 'grad_norm': 3.831146240234375, 'learning_rate': 9.395424836601307e-06, 'epoch': 8.12}
{'loss': 0.606, 'grad_norm': 2.973806858062744, 'learning_rate': 9.31372549019608e-06, 'epoch': 8.14}
{'loss': 0.5157, 'grad_norm': 5.501428127288818, 'learning_rate': 9.

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.7149356007575989, 'eval_accuracy': 0.5802997858672377, 'eval_f1_macro': 0.5597052258908644, 'eval_precision': 0.5655227774433073, 'eval_recall': 0.5610668462200277, 'eval_runtime': 20.2003, 'eval_samples_per_second': 23.118, 'eval_steps_per_second': 5.792, 'epoch': 9.0}




{'loss': 0.5026, 'grad_norm': 4.932605266571045, 'learning_rate': 4.983660130718955e-06, 'epoch': 9.0}
{'loss': 0.6047, 'grad_norm': 3.2314531803131104, 'learning_rate': 4.901960784313726e-06, 'epoch': 9.02}
{'loss': 0.7665, 'grad_norm': 3.2467434406280518, 'learning_rate': 4.820261437908497e-06, 'epoch': 9.04}
{'loss': 0.6548, 'grad_norm': 6.98270320892334, 'learning_rate': 4.738562091503268e-06, 'epoch': 9.05}
{'loss': 0.5577, 'grad_norm': 5.059434413909912, 'learning_rate': 4.65686274509804e-06, 'epoch': 9.07}
{'loss': 0.5756, 'grad_norm': 6.846188545227051, 'learning_rate': 4.5751633986928105e-06, 'epoch': 9.08}
{'loss': 0.6824, 'grad_norm': 4.725461006164551, 'learning_rate': 4.493464052287582e-06, 'epoch': 9.1}
{'loss': 0.6087, 'grad_norm': 4.436490535736084, 'learning_rate': 4.411764705882353e-06, 'epoch': 9.12}
{'loss': 0.5753, 'grad_norm': 4.109707832336426, 'learning_rate': 4.330065359477124e-06, 'epoch': 9.13}
{'loss': 0.6141, 'grad_norm': 5.150399684906006, 'learning_rate':

  0%|          | 0/117 [00:00<?, ?it/s]

{'eval_loss': 0.7155539393424988, 'eval_accuracy': 0.5717344753747323, 'eval_f1_macro': 0.5532468526384265, 'eval_precision': 0.5572052315473368, 'eval_recall': 0.5541559121684809, 'eval_runtime': 20.8397, 'eval_samples_per_second': 22.409, 'eval_steps_per_second': 5.614, 'epoch': 10.0}




{'train_runtime': 5313.7839, 'train_samples_per_second': 4.603, 'train_steps_per_second': 1.152, 'train_loss': 0.6537797230521059, 'epoch': 10.0}


  0%|          | 0/187 [00:00<?, ?it/s]

{'eval_loss': 0.7207608819007874,
 'eval_accuracy': 0.5655080213903744,
 'eval_f1_macro': 0.5495002992830179,
 'eval_precision': 0.5521187469645459,
 'eval_recall': 0.550030959752322,
 'eval_runtime': 31.8244,
 'eval_samples_per_second': 23.504,
 'eval_steps_per_second': 5.876,
 'epoch': 10.0}

In [8]:
from tqdm import tqdm
from peft import PeftModel

model_name = "CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment"
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, ignore_mismatched_sizes=True )

CHECKPOINT_PATH = "./results/arabic_lora/checkpoint-2448" 

tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_PATH)

model = PeftModel.from_pretrained(base_model, CHECKPOINT_PATH)
model = model.to(device)
model.eval()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([3, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): BertForSequenceClassification(
      (bert): BertModel(
        (embeddings): BertEmbeddings(
          (word_embeddings): Embedding(30000, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (token_type_embeddings): Embedding(2, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): BertEncoder(
          (layer): ModuleList(
            (0-11): 12 x BertLayer(
              (attention): BertAttention(
                (self): BertSdpaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (defaul

In [9]:
eval_args = TrainingArguments(
    output_dir="./results/temp_eval",
    per_device_eval_batch_size=16, 
    report_to="none",
)

eval_trainer = Trainer(
    model=model,
    args=eval_args,
    compute_metrics=compute_metrics,
)

print("\nRunning evaluation on the competition test set...")
results = eval_trainer.evaluate(eval_dataset=competition_test_dataset)


# === 5. Print the Final Results ===
print("\n--- Competition Test Set Results ---")
for key, value in results.items():
    # Example: 'eval_f1_macro' -> 'F1-Macro'
    formatted_key = key.replace("eval_", "").replace("_", "-").capitalize()
    print(f"{formatted_key}: {value:.4f}")
print("------------------------------------")


Running evaluation on the competition test set...




  0%|          | 0/65 [00:00<?, ?it/s]


--- Competition Test Set Results ---
Loss: 0.6350
Accuracy: 0.6284
F1-macro: 0.5710
Precision: 0.5697
Recall: 0.5743
Runtime: 50.3276
Samples-per-second: 20.5850
Steps-per-second: 1.2920
------------------------------------
