In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)

fname = 'OMCD_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/OMCD/train.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
df_test = pd.read_csv('datasets/OMCD/test.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3


      
display(df.columns)
display(df_test.columns)
display(df[:4])



classes = set(df['off'].values)
display(classes)

df['off'] = df['off'].astype('category')
df['label'] = df['off'].cat.codes


df_test['off'] = df_test['off'].astype('category')
df_test['label'] = df_test['off'].cat.codes

df = df[['comment', 'label']]
df_test = df_test[['comment', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))
display(len(df_test))


ds_t = Dataset.from_pandas(df)
ds_v = Dataset.from_pandas(df_test)

display(ds_t)
display(ds_v)

max_sequence_length = 128


models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds_t
        dataset_validation = ds_v                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['comment'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 20
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 60, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 60
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-15 09:47:25.471467: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-15 09:47:25.495308: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['Unnamed: 0', 'comment', 'off'], dtype='object')

Index(['Unnamed: 0', 'comment', 'off'], dtype='object')

Unnamed: 0.1,Unnamed: 0,comment,off
0,2908,فنانين الكبت والفساد .عقلية جنسية لا غير. العفن وليس الفن. شعب فيه اغلبية مستعدة للجنس وعندها قابلية .,1
1,1055,الدعارة هربت منها في المحمدية و سكنت في بوزنيقة و هي بحال بحال موجودة في كل المدن و السبب ملكنا زامل و الناس فقراء بالزاف,1
2,181,كون غير خريتي و مدرتيش هادشي,1
3,4313,لا حول ولا قوة الا بالله العلي العظيم لا حول ولا قوة الا بالله العلي العظيم. استغفرالله العظيم واتوب اليه. مساكن الوالدين هما اللي في الواجهة. شعب لا يرحم,0


{0, 1}

2

6419

1605

Dataset({
    features: ['comment', 'label'],
    num_rows: 6419
})

Dataset({
    features: ['comment', 'label'],
    num_rows: 1605
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.6591,0.659846,0.656075,0.617952
120,0.6571,0.627193,0.667913,0.633471
180,0.5301,0.442871,0.801246,0.8007
240,0.399,0.439409,0.790031,0.789967
300,0.3199,0.387068,0.824299,0.823227
360,0.2297,0.434718,0.813707,0.813446
420,0.2197,0.471307,0.832399,0.828231
480,0.1539,0.570209,0.810592,0.810575
540,0.109,0.586505,0.83053,0.829174
600,0.109,0.577751,0.834891,0.833767


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.6187,0.488469,0.807477,0.802689
120,0.4432,0.427887,0.816199,0.813697
180,0.3614,0.395844,0.819938,0.819395
240,0.2708,0.528337,0.807477,0.807409
300,0.2271,0.432945,0.832399,0.831981
360,0.1767,0.475713,0.821807,0.821679
420,0.1597,0.481548,0.844237,0.841805
480,0.1231,0.540584,0.839252,0.838638
540,0.0937,0.578369,0.837383,0.834269
600,0.0962,0.587478,0.834268,0.833156


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.6187,0.488469,0.807477,0.802689
120,0.4432,0.427887,0.816199,0.813697
180,0.3614,0.395844,0.819938,0.819395
240,0.2708,0.528337,0.807477,0.807409
300,0.2271,0.432945,0.832399,0.831981
360,0.1767,0.475713,0.821807,0.821679
420,0.1597,0.481548,0.844237,0.841805
480,0.1231,0.540584,0.839252,0.838638
540,0.0937,0.578369,0.837383,0.834269
600,0.0962,0.587478,0.834268,0.833156


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4606,0.377947,0.825545,0.821452
120,0.3247,0.48074,0.844237,0.840583
180,0.1952,0.386275,0.842991,0.841551
240,0.1376,0.597552,0.821807,0.821477
300,0.0926,0.489382,0.853583,0.851675
360,0.0559,0.589073,0.849844,0.848626
420,0.0576,0.653749,0.854829,0.853297
480,0.0444,0.676381,0.847352,0.84562
540,0.0472,0.72109,0.857944,0.854611
600,0.0336,0.773018,0.848598,0.847909


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4606,0.377947,0.825545,0.821452
120,0.3247,0.48074,0.844237,0.840583
180,0.1952,0.386275,0.842991,0.841551
240,0.1376,0.597552,0.821807,0.821477
300,0.0926,0.489382,0.853583,0.851675
360,0.0559,0.589073,0.849844,0.848626
420,0.0576,0.653749,0.854829,0.853297
480,0.0444,0.676381,0.847352,0.84562
540,0.0472,0.72109,0.857944,0.854611
600,0.0336,0.773018,0.848598,0.847909


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4606,0.377947,0.825545,0.821452
120,0.3247,0.48074,0.844237,0.840583
180,0.1952,0.386275,0.842991,0.841551
240,0.1376,0.597552,0.821807,0.821477
300,0.0926,0.489382,0.853583,0.851675
360,0.0559,0.589073,0.849844,0.848626
420,0.0576,0.653749,0.854829,0.853297
480,0.0444,0.676381,0.847352,0.84562
540,0.0472,0.72109,0.857944,0.854611
600,0.0336,0.773018,0.848598,0.847909


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.7526,0.705177,0.446729,0.308786
120,0.7066,0.688228,0.553271,0.356197
180,0.6988,0.687398,0.553271,0.356197
240,0.6984,0.687863,0.553271,0.356197
300,0.6949,0.692092,0.548287,0.533258
360,0.7025,0.690436,0.553271,0.356197
420,0.6985,0.679696,0.586293,0.585996
480,0.6743,0.673915,0.610592,0.610396
540,0.6958,0.677101,0.579439,0.443163
600,0.6608,0.677828,0.626791,0.5908


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.7526,0.705177,0.446729,0.308786
120,0.7066,0.688228,0.553271,0.356197
180,0.6988,0.687398,0.553271,0.356197
240,0.6984,0.687863,0.553271,0.356197
300,0.6949,0.692092,0.548287,0.533258
360,0.7025,0.690436,0.553271,0.356197
420,0.6985,0.679696,0.586293,0.585996
480,0.6743,0.673915,0.610592,0.610396
540,0.6958,0.677101,0.579439,0.443163
600,0.6608,0.677828,0.626791,0.5908


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.7526,0.705177,0.446729,0.308786
120,0.7066,0.688228,0.553271,0.356197
180,0.6988,0.687398,0.553271,0.356197
240,0.6984,0.687863,0.553271,0.356197
300,0.6949,0.692092,0.548287,0.533258
360,0.7025,0.690436,0.553271,0.356197
420,0.6985,0.679696,0.586293,0.585996
480,0.6743,0.673915,0.610592,0.610396
540,0.6958,0.677101,0.579439,0.443163
600,0.6608,0.677828,0.626791,0.5908


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4475,0.403901,0.812461,0.812461
120,0.3187,0.482401,0.844237,0.841903
180,0.2002,0.491229,0.845483,0.844103
240,0.1336,0.633008,0.842991,0.841314
300,0.0973,0.561921,0.837383,0.834327
360,0.0627,0.648115,0.836137,0.833553
420,0.0483,0.811073,0.83676,0.834704
480,0.046,0.870627,0.83676,0.83644
540,0.0448,0.821329,0.839252,0.838174
600,0.0373,0.771362,0.840498,0.838059


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4475,0.403901,0.812461,0.812461
120,0.3187,0.482401,0.844237,0.841903
180,0.2002,0.491229,0.845483,0.844103
240,0.1336,0.633008,0.842991,0.841314
300,0.0973,0.561921,0.837383,0.834327
360,0.0627,0.648115,0.836137,0.833553
420,0.0483,0.811073,0.83676,0.834704
480,0.046,0.870627,0.83676,0.83644
540,0.0448,0.821329,0.839252,0.838174
600,0.0373,0.771362,0.840498,0.838059


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4475,0.403901,0.812461,0.812461
120,0.3187,0.482401,0.844237,0.841903
180,0.2002,0.491229,0.845483,0.844103
240,0.1336,0.633008,0.842991,0.841314
300,0.0973,0.561921,0.837383,0.834327
360,0.0627,0.648115,0.836137,0.833553
420,0.0483,0.811073,0.83676,0.834704
480,0.046,0.870627,0.83676,0.83644
540,0.0448,0.821329,0.839252,0.838174
600,0.0373,0.771362,0.840498,0.838059


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4421,0.351074,0.836137,0.834097
120,0.3202,0.37914,0.853583,0.852361
180,0.2159,0.384887,0.849221,0.847531
240,0.1493,0.513538,0.847352,0.844264
300,0.1052,0.600918,0.848598,0.84796
360,0.0687,0.593445,0.842368,0.838575
420,0.0584,0.736695,0.855452,0.854295
480,0.0621,0.747734,0.834268,0.83426
540,0.0422,0.752437,0.846106,0.84475
600,0.0336,0.693587,0.849844,0.848141


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4421,0.351074,0.836137,0.834097
120,0.3202,0.37914,0.853583,0.852361
180,0.2159,0.384887,0.849221,0.847531
240,0.1493,0.513538,0.847352,0.844264
300,0.1052,0.600918,0.848598,0.84796
360,0.0687,0.593445,0.842368,0.838575
420,0.0584,0.736695,0.855452,0.854295
480,0.0621,0.747734,0.834268,0.83426
540,0.0422,0.752437,0.846106,0.84475
600,0.0336,0.693587,0.849844,0.848141


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4421,0.351074,0.836137,0.834097
120,0.3202,0.37914,0.853583,0.852361
180,0.2159,0.384887,0.849221,0.847531
240,0.1493,0.513538,0.847352,0.844264
300,0.1052,0.600918,0.848598,0.84796
360,0.0687,0.593445,0.842368,0.838575
420,0.0584,0.736695,0.855452,0.854295
480,0.0621,0.747734,0.834268,0.83426
540,0.0422,0.752437,0.846106,0.84475
600,0.0336,0.693587,0.849844,0.848141


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4677,0.407187,0.804984,0.804523
120,0.348,0.494636,0.82243,0.819337
180,0.2411,0.417619,0.824299,0.821667
240,0.1764,0.633845,0.8081,0.801104
300,0.1286,0.60524,0.833645,0.83123
360,0.0633,0.816005,0.819938,0.818785
420,0.0644,0.868636,0.818692,0.81633
480,0.052,0.905689,0.795639,0.795625
540,0.0432,0.884077,0.809346,0.805241
600,0.0452,0.908662,0.804361,0.803488


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4677,0.407187,0.804984,0.804523
120,0.348,0.494636,0.82243,0.819337
180,0.2411,0.417619,0.824299,0.821667
240,0.1764,0.633845,0.8081,0.801104
300,0.1286,0.60524,0.833645,0.83123
360,0.0633,0.816005,0.819938,0.818785
420,0.0644,0.868636,0.818692,0.81633
480,0.052,0.905689,0.795639,0.795625
540,0.0432,0.884077,0.809346,0.805241
600,0.0452,0.908662,0.804361,0.803488


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4677,0.407187,0.804984,0.804523
120,0.348,0.494636,0.82243,0.819337
180,0.2411,0.417619,0.824299,0.821667
240,0.1764,0.633845,0.8081,0.801104
300,0.1286,0.60524,0.833645,0.83123
360,0.0633,0.816005,0.819938,0.818785
420,0.0644,0.868636,0.818692,0.81633
480,0.052,0.905689,0.795639,0.795625
540,0.0432,0.884077,0.809346,0.805241
600,0.0452,0.908662,0.804361,0.803488


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4198,0.378933,0.821184,0.819773
120,0.3321,0.521284,0.819315,0.815004
180,0.2048,0.410218,0.829283,0.828631
240,0.1401,0.664158,0.811838,0.805975
300,0.1072,0.601107,0.82243,0.820736
360,0.0674,0.760767,0.81433,0.811065
420,0.062,0.754349,0.823053,0.82006
480,0.05,0.869627,0.818069,0.816444
540,0.0397,0.926285,0.824922,0.822047
600,0.0292,0.866614,0.826791,0.823062


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4198,0.378933,0.821184,0.819773
120,0.3321,0.521284,0.819315,0.815004
180,0.2048,0.410218,0.829283,0.828631
240,0.1401,0.664158,0.811838,0.805975
300,0.1072,0.601107,0.82243,0.820736
360,0.0674,0.760767,0.81433,0.811065
420,0.062,0.754349,0.823053,0.82006
480,0.05,0.869627,0.818069,0.816444
540,0.0397,0.926285,0.824922,0.822047
600,0.0292,0.866614,0.826791,0.823062


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4198,0.378933,0.821184,0.819773
120,0.3321,0.521284,0.819315,0.815004
180,0.2048,0.410218,0.829283,0.828631
240,0.1401,0.664158,0.811838,0.805975
300,0.1072,0.601107,0.82243,0.820736
360,0.0674,0.760767,0.81433,0.811065
420,0.062,0.754349,0.823053,0.82006
480,0.05,0.869627,0.818069,0.816444
540,0.0397,0.926285,0.824922,0.822047
600,0.0292,0.866614,0.826791,0.823062


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.858567,0.857287
3,alger-ia/dziribert,0.847352,0.846375
6,faisalq/EgyBERT,0.849221,0.847727
8,faisalq/SaudiBERT,0.857944,0.854611
11,otmangi/MorRoBERTa,0.833645,0.83123
14,otmangi/MorrBERT,0.833022,0.831609
17,tunis-ai/TunBERT,0.652336,0.645493
