In [1]:

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

from sklearn.metrics import classification_report
from transformers import AutoTokenizer, BertForSequenceClassification
from transformers import AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score


# the dataset downloaded directly from hf hub: https://huggingface.co/datasets/xnli/viewer/ar
#issues when loading directly from hf

df1 = pd.read_parquet('xnli_ar_dataset/train-00000-of-00001.parquet')
df2 = pd.read_parquet('xnli_ar_dataset/validation-00000-of-00001.parquet')
df3 = pd.read_parquet('xnli_ar_dataset/test-00000-of-00001.parquet')

df = pd.concat([df3, df2], ignore_index=True)  # combine val and test ds for validation
dataset_train = Dataset.from_pandas(df1)
dataset_validation = Dataset.from_pandas(df)

# Get the unique labels from the 'label' column
unique_labels = set(dataset_train['label'])
classes_num = len(unique_labels)

print(f'Unique Labels: {unique_labels}')
print(f'Number of Classes: {classes_num}')

url = 'faisalq/bert-base-arabic-wordpiece'
# url = 'faisalq/bert-base-arabic-senpiece
# url = 'faisalq/bert-base-arabic-bbpe'




tokenizer = AutoTokenizer.from_pretrained(url)
model = BertForSequenceClassification.from_pretrained(url,
                                                      num_labels=classes_num).to('cuda')                                                 
                                                     



max_length = 128

def preprocess_function(examples):
    return tokenizer(examples['premise'], examples['hypothesis'], truncation=True, padding="max_length",
                    max_length=max_length)


dataset_train = dataset_train.map(preprocess_function, batched=True)
dataset_validation = dataset_validation.map(preprocess_function, batched=True)

# Define the compute_metrics function for evaluation



2024-02-06 11:39:19.134780: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-06 11:39:19.158298: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Unique Labels: {0, 1, 2}
Number of Classes: 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/392702 [00:00<?, ? examples/s]

Map:   0%|          | 0/7500 [00:00<?, ? examples/s]

In [2]:

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)    
    acc = accuracy_score(labels, predictions)        
    f1 = f1_score(labels, predictions, average='macro')      
    return {'accuracy': acc, 'f1_score': f1}





epochs = 5
save_steps = 10000 #save checkpoint every 10000 steps
batch_size = 256


training_args = TrainingArguments(
    output_dir = 'bert_wp/',
    overwrite_output_dir=True,
    num_train_epochs = epochs,
    per_device_train_batch_size = batch_size,
    per_device_eval_batch_size = batch_size,
    save_steps = save_steps,
    save_total_limit = 2, 
    fp16=True,
    learning_rate = 5e-5,  # 5e-5 is the default
    # weight_decay=0.01,
    logging_steps = 250, #50_000
    evaluation_strategy = 'steps',
    eval_steps = 250
    
)




# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_train,
    eval_dataset=dataset_validation,
    compute_metrics=compute_metrics,
)

# Train the model
trainer.train()

# Evaluate the model
trainer.evaluate()




Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.8959,0.751507,0.674,0.673759
500,0.7763,0.713765,0.6964,0.697333
750,0.7615,0.718696,0.69,0.691055
1000,0.7197,0.686063,0.704933,0.706164
1250,0.6988,0.657866,0.727467,0.727994
1500,0.6792,0.673454,0.715333,0.715542
1750,0.5988,0.682264,0.728267,0.72814
2000,0.5782,0.648457,0.7388,0.739333
2250,0.5792,0.668837,0.729067,0.728458
2500,0.5766,0.660417,0.727067,0.726385


{'eval_loss': 0.8862895369529724,
 'eval_accuracy': 0.7272,
 'eval_f1_score': 0.7273808477982747,
 'eval_runtime': 3.2593,
 'eval_samples_per_second': 2301.109,
 'eval_steps_per_second': 9.204,
 'epoch': 5.0}