In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'MTCD_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/MTCD.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3


      
display(df.columns)
display(df[:4])



classes = set(df['labels'].values)
display(classes)

df['labels'] = df['labels'].astype('category')
df['label'] = df['labels'].cat.codes

df = df[['text', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))



max_sequence_length = 128



models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]





seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
            
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['text'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True)
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 8
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 200, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 200
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-17 22:17:45.014631: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-17 22:17:45.038444: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['text', 'labels'], dtype='object')

Unnamed: 0,text,labels
0,ÿÆŸàŸäÿß ŸÜÿµŸäÿ≠ÿ© ŸÖŸÜŸä. ŸÉŸÖŸÑ ŸÅÿÆÿØŸÖÿ™ŸÉ ŸàŸÖÿ™ÿØŸäŸáÿßÿ¥ ŸÅŸÉŸÑÿßŸÖ ŸÜÿßÿ≥ ÿØÿßŸÉ ŸÑŸÇÿßŸÑŸäŸÉ ŸÜŸÇÿßÿ¥ÿ© ŸàŸÑÿßŸÜ⁄ØÿßŸÅÿ©. ÿßÿ™ŸÑŸÇÿß ÿÆÿ™Ÿà ŸÑŸÉÿ™ÿµÿ±ŸÅ ÿπŸÑŸäŸá.,News
1,ÿ∂ÿ±ÿ®ŸÜÿßŸÉŸÖ ŸÉÿßŸÖŸÑŸäŸÜ Ÿ°üòÇüñïüá≤üá¶üá≤üá¶üá≤üá¶üá≤üá¶,Sport
2,ÿ®ÿ∫Ÿäÿ™ ŸÜÿ¥ÿßÿ±ŸÉ ŸÅÿ¥Ÿä ŸÖÿ≥ÿßÿ®ŸÇÿ© ÿ≠Ÿäÿ™ ÿπŸÜÿØŸä 90 ŸÅÿ™ÿÆŸÅŸäÿ∂,Gaming
3,⁄Üÿßÿ™ŸÜŸä 92ŸÅ10,Gaming


{'Gaming', 'Kitchen', 'News', 'Sport'}

4

64222

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 51377
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 12845
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.929,0.616749,0.820864,0.823962
400,0.4921,0.422066,0.86049,0.861489
600,0.3972,0.348336,0.879019,0.880999
800,0.3433,0.31797,0.885559,0.886289
1000,0.2549,0.325807,0.893422,0.893806
1200,0.2431,0.293945,0.89311,0.893277
1400,0.2208,0.301899,0.893655,0.893183
1600,0.2203,0.2806,0.900973,0.902185
1800,0.1449,0.312061,0.897081,0.897773
2000,0.1615,0.324024,0.901051,0.900966


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.9502,0.608003,0.828182,0.828298
400,0.4992,0.396968,0.865629,0.866011
600,0.3911,0.343391,0.87933,0.881157
800,0.3419,0.313798,0.889062,0.890391
1000,0.2516,0.326478,0.889763,0.890736
1200,0.2395,0.302679,0.894434,0.894402
1400,0.2236,0.306024,0.89311,0.892339
1600,0.2252,0.272342,0.902141,0.903595
1800,0.143,0.31399,0.901907,0.902743
2000,0.1659,0.325984,0.896536,0.897074


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.9502,0.608003,0.828182,0.828298
400,0.4992,0.396968,0.865629,0.866011
600,0.3911,0.343391,0.87933,0.881157
800,0.3419,0.313798,0.889062,0.890391
1000,0.2516,0.326478,0.889763,0.890736
1200,0.2395,0.302679,0.894434,0.894402
1400,0.2236,0.306024,0.89311,0.892339
1600,0.2252,0.272342,0.902141,0.903595
1800,0.143,0.31399,0.901907,0.902743
2000,0.1659,0.325984,0.896536,0.897074


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.9666,0.596272,0.833865,0.833613
400,0.4987,0.398045,0.865395,0.866096
600,0.3873,0.331822,0.882834,0.883987
800,0.3322,0.315231,0.889529,0.890321
1000,0.2527,0.300685,0.895212,0.896349
1200,0.2402,0.320386,0.894511,0.8954
1400,0.2421,0.290442,0.901207,0.902103
1600,0.2287,0.292863,0.90144,0.90221
1800,0.1681,0.32982,0.901985,0.903579
2000,0.1628,0.307805,0.902141,0.903501


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.9666,0.596272,0.833865,0.833613
400,0.4987,0.398045,0.865395,0.866096
600,0.3873,0.331822,0.882834,0.883987
800,0.3322,0.315231,0.889529,0.890321
1000,0.2527,0.300685,0.895212,0.896349
1200,0.2402,0.320386,0.894511,0.8954
1400,0.2421,0.290442,0.901207,0.902103
1600,0.2287,0.292863,0.90144,0.90221
1800,0.1681,0.32982,0.901985,0.903579
2000,0.1628,0.307805,0.902141,0.903501


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.9666,0.596272,0.833865,0.833613
400,0.4987,0.398045,0.865395,0.866096
600,0.3873,0.331822,0.882834,0.883987
800,0.3322,0.315231,0.889529,0.890321
1000,0.2527,0.300685,0.895212,0.896349
1200,0.2402,0.320386,0.894511,0.8954
1400,0.2421,0.290442,0.901207,0.902103
1600,0.2287,0.292863,0.90144,0.90221
1800,0.1681,0.32982,0.901985,0.903579
2000,0.1628,0.307805,0.902141,0.903501


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.9598,0.594267,0.82935,0.829382
400,0.5032,0.398995,0.865784,0.866786
600,0.377,0.341274,0.880732,0.881339
800,0.3378,0.315869,0.890074,0.890821
1000,0.2564,0.306776,0.894979,0.895997
1200,0.2368,0.311265,0.895757,0.896914
1400,0.2326,0.307786,0.899105,0.899411
1600,0.2264,0.292487,0.902686,0.903284
1800,0.1598,0.323603,0.90035,0.900517
2000,0.153,0.344875,0.899961,0.900224


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.9598,0.594267,0.82935,0.829382
400,0.5032,0.398995,0.865784,0.866786
600,0.377,0.341274,0.880732,0.881339
800,0.3378,0.315869,0.890074,0.890821
1000,0.2564,0.306776,0.894979,0.895997
1200,0.2368,0.311265,0.895757,0.896914
1400,0.2326,0.307786,0.899105,0.899411
1600,0.2264,0.292487,0.902686,0.903284
1800,0.1598,0.323603,0.90035,0.900517
2000,0.153,0.344875,0.899961,0.900224


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.9598,0.594267,0.82935,0.829382
400,0.5032,0.398995,0.865784,0.866786
600,0.377,0.341274,0.880732,0.881339
800,0.3378,0.315869,0.890074,0.890821
1000,0.2564,0.306776,0.894979,0.895997
1200,0.2368,0.311265,0.895757,0.896914
1400,0.2326,0.307786,0.899105,0.899411
1600,0.2264,0.292487,0.902686,0.903284
1800,0.1598,0.323603,0.90035,0.900517
2000,0.153,0.344875,0.899961,0.900224


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 51377
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 12845
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.5016,0.374039,0.867264,0.869755
400,0.3561,0.322921,0.875905,0.875765
600,0.3164,0.325393,0.884469,0.886109
800,0.2943,0.291912,0.897548,0.899636
1000,0.184,0.304302,0.905411,0.906498
1200,0.182,0.296637,0.900662,0.900473
1400,0.1574,0.302153,0.903464,0.903855
1600,0.1609,0.303395,0.904243,0.904415
1800,0.0891,0.331693,0.909225,0.91019
2000,0.0971,0.345036,0.90035,0.899898


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.5016,0.374039,0.867264,0.869755
400,0.3561,0.322921,0.875905,0.875765
600,0.3164,0.325393,0.884469,0.886109
800,0.2943,0.291912,0.897548,0.899636
1000,0.184,0.304302,0.905411,0.906498
1200,0.182,0.296637,0.900662,0.900473
1400,0.1574,0.302153,0.903464,0.903855
1600,0.1609,0.303395,0.904243,0.904415
1800,0.0891,0.331693,0.909225,0.91019
2000,0.0971,0.345036,0.90035,0.899898


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.5016,0.374039,0.867264,0.869755
400,0.3561,0.322921,0.875905,0.875765
600,0.3164,0.325393,0.884469,0.886109
800,0.2943,0.291912,0.897548,0.899636
1000,0.184,0.304302,0.905411,0.906498
1200,0.182,0.296637,0.900662,0.900473
1400,0.1574,0.302153,0.903464,0.903855
1600,0.1609,0.303395,0.904243,0.904415
1800,0.0891,0.331693,0.909225,0.91019
2000,0.0971,0.345036,0.90035,0.899898


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.5106,0.387151,0.864149,0.86631
400,0.3579,0.330889,0.88478,0.887877
600,0.3149,0.292082,0.892799,0.895448
800,0.2907,0.271643,0.904321,0.906536
1000,0.1823,0.293295,0.895913,0.899126
1200,0.1767,0.293718,0.904554,0.905404
1400,0.1809,0.265971,0.91195,0.913175
1600,0.172,0.282743,0.911483,0.912508
1800,0.0901,0.36952,0.907668,0.909125
2000,0.0966,0.305794,0.911405,0.91255


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.5106,0.387151,0.864149,0.86631
400,0.3579,0.330889,0.88478,0.887877
600,0.3149,0.292082,0.892799,0.895448
800,0.2907,0.271643,0.904321,0.906536
1000,0.1823,0.293295,0.895913,0.899126
1200,0.1767,0.293718,0.904554,0.905404
1400,0.1809,0.265971,0.91195,0.913175
1600,0.172,0.282743,0.911483,0.912508
1800,0.0901,0.36952,0.907668,0.909125
2000,0.0966,0.305794,0.911405,0.91255


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.5106,0.387151,0.864149,0.86631
400,0.3579,0.330889,0.88478,0.887877
600,0.3149,0.292082,0.892799,0.895448
800,0.2907,0.271643,0.904321,0.906536
1000,0.1823,0.293295,0.895913,0.899126
1200,0.1767,0.293718,0.904554,0.905404
1400,0.1809,0.265971,0.91195,0.913175
1600,0.172,0.282743,0.911483,0.912508
1800,0.0901,0.36952,0.907668,0.909125
2000,0.0966,0.305794,0.911405,0.91255


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.5046,0.356733,0.873959,0.874654
400,0.3609,0.305851,0.891164,0.892764
600,0.3056,0.30369,0.893655,0.893825
800,0.2893,0.27039,0.904632,0.904505
1000,0.1791,0.273043,0.903854,0.903936
1200,0.174,0.28107,0.908603,0.909403
1400,0.1698,0.28531,0.908914,0.909651
1600,0.1674,0.284563,0.912884,0.913042
1800,0.0958,0.320047,0.909537,0.910286
2000,0.0949,0.32802,0.912962,0.913604


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.5046,0.356733,0.873959,0.874654
400,0.3609,0.305851,0.891164,0.892764
600,0.3056,0.30369,0.893655,0.893825
800,0.2893,0.27039,0.904632,0.904505
1000,0.1791,0.273043,0.903854,0.903936
1200,0.174,0.28107,0.908603,0.909403
1400,0.1698,0.28531,0.908914,0.909651
1600,0.1674,0.284563,0.912884,0.913042
1800,0.0958,0.320047,0.909537,0.910286
2000,0.0949,0.32802,0.912962,0.913604


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.5046,0.356733,0.873959,0.874654
400,0.3609,0.305851,0.891164,0.892764
600,0.3056,0.30369,0.893655,0.893825
800,0.2893,0.27039,0.904632,0.904505
1000,0.1791,0.273043,0.903854,0.903936
1200,0.174,0.28107,0.908603,0.909403
1400,0.1698,0.28531,0.908914,0.909651
1600,0.1674,0.284563,0.912884,0.913042
1800,0.0958,0.320047,0.909537,0.910286
2000,0.0949,0.32802,0.912962,0.913604


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 51377
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 12845
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,1.2425,1.162016,0.471467,0.380133
400,1.0472,1.066616,0.560763,0.542299
600,0.9728,0.908349,0.620786,0.612173
800,0.9182,0.848764,0.658466,0.656422
1000,0.8618,0.837657,0.667808,0.668258
1200,0.8427,0.820424,0.674192,0.675071
1400,0.8059,0.783589,0.686726,0.687241
1600,0.8131,0.784575,0.690775,0.691384
1800,0.7723,0.82999,0.666796,0.668717
2000,0.7602,0.790856,0.685325,0.687911


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,1.2425,1.162016,0.471467,0.380133
400,1.0472,1.066616,0.560763,0.542299
600,0.9728,0.908349,0.620786,0.612173
800,0.9182,0.848764,0.658466,0.656422
1000,0.8618,0.837657,0.667808,0.668258
1200,0.8427,0.820424,0.674192,0.675071
1400,0.8059,0.783589,0.686726,0.687241
1600,0.8131,0.784575,0.690775,0.691384
1800,0.7723,0.82999,0.666796,0.668717
2000,0.7602,0.790856,0.685325,0.687911


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,1.2425,1.162016,0.471467,0.380133
400,1.0472,1.066616,0.560763,0.542299
600,0.9728,0.908349,0.620786,0.612173
800,0.9182,0.848764,0.658466,0.656422
1000,0.8618,0.837657,0.667808,0.668258
1200,0.8427,0.820424,0.674192,0.675071
1400,0.8059,0.783589,0.686726,0.687241
1600,0.8131,0.784575,0.690775,0.691384
1800,0.7723,0.82999,0.666796,0.668717
2000,0.7602,0.790856,0.685325,0.687911


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,1.2276,1.125681,0.508447,0.431254
400,1.039,1.009173,0.558895,0.559881
600,0.9508,0.896119,0.646866,0.641027
800,0.8886,0.8633,0.651771,0.651053
1000,0.8647,0.859004,0.649046,0.639031
1200,0.8287,0.840516,0.663215,0.652981
1400,0.8205,0.806955,0.679564,0.682479
1600,0.7943,0.793702,0.682756,0.683701
1800,0.7605,0.803097,0.68984,0.692553
2000,0.7558,0.774907,0.690619,0.689612


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,1.2276,1.125681,0.508447,0.431254
400,1.039,1.009173,0.558895,0.559881
600,0.9508,0.896119,0.646866,0.641027
800,0.8886,0.8633,0.651771,0.651053
1000,0.8647,0.859004,0.649046,0.639031
1200,0.8287,0.840516,0.663215,0.652981
1400,0.8205,0.806955,0.679564,0.682479
1600,0.7943,0.793702,0.682756,0.683701
1800,0.7605,0.803097,0.68984,0.692553
2000,0.7558,0.774907,0.690619,0.689612


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,1.2276,1.125681,0.508447,0.431254
400,1.039,1.009173,0.558895,0.559881
600,0.9508,0.896119,0.646866,0.641027
800,0.8886,0.8633,0.651771,0.651053
1000,0.8647,0.859004,0.649046,0.639031
1200,0.8287,0.840516,0.663215,0.652981
1400,0.8205,0.806955,0.679564,0.682479
1600,0.7943,0.793702,0.682756,0.683701
1800,0.7605,0.803097,0.68984,0.692553
2000,0.7558,0.774907,0.690619,0.689612


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,1.2266,1.163268,0.491865,0.381331
400,1.035,1.016056,0.566913,0.560809
600,0.9387,0.900506,0.644687,0.645455
800,0.8921,0.900711,0.630985,0.62817
1000,0.8482,0.859045,0.648657,0.639753
1200,0.8262,0.872063,0.651538,0.648664
1400,0.8096,0.843801,0.653562,0.656607
1600,0.8034,0.827648,0.665006,0.662756
1800,0.7586,0.815908,0.674893,0.675445
2000,0.7577,0.77692,0.682834,0.683805


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,1.2266,1.163268,0.491865,0.381331
400,1.035,1.016056,0.566913,0.560809
600,0.9387,0.900506,0.644687,0.645455
800,0.8921,0.900711,0.630985,0.62817
1000,0.8482,0.859045,0.648657,0.639753
1200,0.8262,0.872063,0.651538,0.648664
1400,0.8096,0.843801,0.653562,0.656607
1600,0.8034,0.827648,0.665006,0.662756
1800,0.7586,0.815908,0.674893,0.675445
2000,0.7577,0.77692,0.682834,0.683805


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,1.2266,1.163268,0.491865,0.381331
400,1.035,1.016056,0.566913,0.560809
600,0.9387,0.900506,0.644687,0.645455
800,0.8921,0.900711,0.630985,0.62817
1000,0.8482,0.859045,0.648657,0.639753
1200,0.8262,0.872063,0.651538,0.648664
1400,0.8096,0.843801,0.653562,0.656607
1600,0.8034,0.827648,0.665006,0.662756
1800,0.7586,0.815908,0.674893,0.675445
2000,0.7577,0.77692,0.682834,0.683805


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 51377
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 12845
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4613,0.35842,0.870455,0.872621
400,0.3454,0.309992,0.885792,0.884752
600,0.3107,0.297902,0.891242,0.892883
800,0.2874,0.286041,0.897392,0.898491
1000,0.1548,0.310673,0.902219,0.903613
1200,0.15,0.288846,0.902219,0.903333
1400,0.1428,0.310472,0.908058,0.908035
1600,0.1392,0.2954,0.908213,0.908042
1800,0.0646,0.438836,0.901285,0.903076
2000,0.0784,0.400521,0.906111,0.905874


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4613,0.35842,0.870455,0.872621
400,0.3454,0.309992,0.885792,0.884752
600,0.3107,0.297902,0.891242,0.892883
800,0.2874,0.286041,0.897392,0.898491
1000,0.1548,0.310673,0.902219,0.903613
1200,0.15,0.288846,0.902219,0.903333
1400,0.1428,0.310472,0.908058,0.908035
1600,0.1392,0.2954,0.908213,0.908042
1800,0.0646,0.438836,0.901285,0.903076
2000,0.0784,0.400521,0.906111,0.905874


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4613,0.35842,0.870455,0.872621
400,0.3454,0.309992,0.885792,0.884752
600,0.3107,0.297902,0.891242,0.892883
800,0.2874,0.286041,0.897392,0.898491
1000,0.1548,0.310673,0.902219,0.903613
1200,0.15,0.288846,0.902219,0.903333
1400,0.1428,0.310472,0.908058,0.908035
1600,0.1392,0.2954,0.908213,0.908042
1800,0.0646,0.438836,0.901285,0.903076
2000,0.0784,0.400521,0.906111,0.905874


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4902,0.391245,0.855274,0.856077
400,0.3394,0.299853,0.890152,0.891654
600,0.3129,0.278274,0.894745,0.897568
800,0.2876,0.270149,0.903931,0.905452
1000,0.1542,0.275382,0.906734,0.908257
1200,0.1532,0.299103,0.908992,0.91084
1400,0.159,0.272687,0.910782,0.912365
1600,0.1509,0.290581,0.909615,0.911573
1800,0.0701,0.400458,0.904632,0.904987
2000,0.0748,0.395152,0.905956,0.907636


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4902,0.391245,0.855274,0.856077
400,0.3394,0.299853,0.890152,0.891654
600,0.3129,0.278274,0.894745,0.897568
800,0.2876,0.270149,0.903931,0.905452
1000,0.1542,0.275382,0.906734,0.908257
1200,0.1532,0.299103,0.908992,0.91084
1400,0.159,0.272687,0.910782,0.912365
1600,0.1509,0.290581,0.909615,0.911573
1800,0.0701,0.400458,0.904632,0.904987
2000,0.0748,0.395152,0.905956,0.907636


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4902,0.391245,0.855274,0.856077
400,0.3394,0.299853,0.890152,0.891654
600,0.3129,0.278274,0.894745,0.897568
800,0.2876,0.270149,0.903931,0.905452
1000,0.1542,0.275382,0.906734,0.908257
1200,0.1532,0.299103,0.908992,0.91084
1400,0.159,0.272687,0.910782,0.912365
1600,0.1509,0.290581,0.909615,0.911573
1800,0.0701,0.400458,0.904632,0.904987
2000,0.0748,0.395152,0.905956,0.907636


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4705,0.360773,0.86812,0.869979
400,0.3491,0.301704,0.889763,0.891234
600,0.2984,0.28058,0.898482,0.898865
800,0.2828,0.289164,0.899105,0.899826
1000,0.16,0.346199,0.896302,0.896387
1200,0.144,0.299579,0.909225,0.909947
1400,0.1494,0.345643,0.899494,0.899366
1600,0.1544,0.300701,0.906501,0.906906
1800,0.0764,0.359567,0.905878,0.906095
2000,0.0751,0.392353,0.907591,0.908055


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4705,0.360773,0.86812,0.869979
400,0.3491,0.301704,0.889763,0.891234
600,0.2984,0.28058,0.898482,0.898865
800,0.2828,0.289164,0.899105,0.899826
1000,0.16,0.346199,0.896302,0.896387
1200,0.144,0.299579,0.909225,0.909947
1400,0.1494,0.345643,0.899494,0.899366
1600,0.1544,0.300701,0.906501,0.906906
1800,0.0764,0.359567,0.905878,0.906095
2000,0.0751,0.392353,0.907591,0.908055


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4705,0.360773,0.86812,0.869979
400,0.3491,0.301704,0.889763,0.891234
600,0.2984,0.28058,0.898482,0.898865
800,0.2828,0.289164,0.899105,0.899826
1000,0.16,0.346199,0.896302,0.896387
1200,0.144,0.299579,0.909225,0.909947
1400,0.1494,0.345643,0.899494,0.899366
1600,0.1544,0.300701,0.906501,0.906906
1800,0.0764,0.359567,0.905878,0.906095
2000,0.0751,0.392353,0.907591,0.908055


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 51377
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 12845
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4017,0.373332,0.859868,0.864811
400,0.3238,0.279805,0.897236,0.896414
600,0.2831,0.275952,0.900817,0.9031
800,0.2663,0.273941,0.904788,0.907199
1000,0.1734,0.259128,0.915531,0.915838
1200,0.1621,0.27977,0.908369,0.909506
1400,0.1454,0.317324,0.903854,0.90573
1600,0.1568,0.251235,0.915843,0.916358
1800,0.0837,0.340259,0.911016,0.912248
2000,0.0883,0.370457,0.905177,0.90504


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4017,0.373332,0.859868,0.864811
400,0.3238,0.279805,0.897236,0.896414
600,0.2831,0.275952,0.900817,0.9031
800,0.2663,0.273941,0.904788,0.907199
1000,0.1734,0.259128,0.915531,0.915838
1200,0.1621,0.27977,0.908369,0.909506
1400,0.1454,0.317324,0.903854,0.90573
1600,0.1568,0.251235,0.915843,0.916358
1800,0.0837,0.340259,0.911016,0.912248
2000,0.0883,0.370457,0.905177,0.90504


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4017,0.373332,0.859868,0.864811
400,0.3238,0.279805,0.897236,0.896414
600,0.2831,0.275952,0.900817,0.9031
800,0.2663,0.273941,0.904788,0.907199
1000,0.1734,0.259128,0.915531,0.915838
1200,0.1621,0.27977,0.908369,0.909506
1400,0.1454,0.317324,0.903854,0.90573
1600,0.1568,0.251235,0.915843,0.916358
1800,0.0837,0.340259,0.911016,0.912248
2000,0.0883,0.370457,0.905177,0.90504


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4197,0.336476,0.876995,0.880803
400,0.3117,0.294284,0.894589,0.898005
600,0.289,0.308365,0.891475,0.894595
800,0.2667,0.24954,0.909381,0.912742
1000,0.1694,0.276714,0.906968,0.908807
1200,0.1603,0.304705,0.911327,0.912482
1400,0.1657,0.271279,0.909692,0.911719
1600,0.1619,0.259959,0.916232,0.917051
1800,0.0779,0.350842,0.910549,0.911133
2000,0.0958,0.312357,0.917711,0.919573


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4197,0.336476,0.876995,0.880803
400,0.3117,0.294284,0.894589,0.898005
600,0.289,0.308365,0.891475,0.894595
800,0.2667,0.24954,0.909381,0.912742
1000,0.1694,0.276714,0.906968,0.908807
1200,0.1603,0.304705,0.911327,0.912482
1400,0.1657,0.271279,0.909692,0.911719
1600,0.1619,0.259959,0.916232,0.917051
1800,0.0779,0.350842,0.910549,0.911133
2000,0.0958,0.312357,0.917711,0.919573


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4197,0.336476,0.876995,0.880803
400,0.3117,0.294284,0.894589,0.898005
600,0.289,0.308365,0.891475,0.894595
800,0.2667,0.24954,0.909381,0.912742
1000,0.1694,0.276714,0.906968,0.908807
1200,0.1603,0.304705,0.911327,0.912482
1400,0.1657,0.271279,0.909692,0.911719
1600,0.1619,0.259959,0.916232,0.917051
1800,0.0779,0.350842,0.910549,0.911133
2000,0.0958,0.312357,0.917711,0.919573


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4074,0.35622,0.877228,0.87876
400,0.3153,0.273217,0.902297,0.903345
600,0.2842,0.262492,0.906345,0.906923
800,0.2687,0.255684,0.911639,0.912852
1000,0.1673,0.26712,0.908603,0.908379
1200,0.1569,0.291287,0.907513,0.909072
1400,0.1584,0.291486,0.911639,0.912885
1600,0.1639,0.265218,0.914208,0.915185
1800,0.086,0.365286,0.913896,0.914763
2000,0.0869,0.3707,0.913896,0.914256


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4074,0.35622,0.877228,0.87876
400,0.3153,0.273217,0.902297,0.903345
600,0.2842,0.262492,0.906345,0.906923
800,0.2687,0.255684,0.911639,0.912852
1000,0.1673,0.26712,0.908603,0.908379
1200,0.1569,0.291287,0.907513,0.909072
1400,0.1584,0.291486,0.911639,0.912885
1600,0.1639,0.265218,0.914208,0.915185
1800,0.086,0.365286,0.913896,0.914763
2000,0.0869,0.3707,0.913896,0.914256


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4074,0.35622,0.877228,0.87876
400,0.3153,0.273217,0.902297,0.903345
600,0.2842,0.262492,0.906345,0.906923
800,0.2687,0.255684,0.911639,0.912852
1000,0.1673,0.26712,0.908603,0.908379
1200,0.1569,0.291287,0.907513,0.909072
1400,0.1584,0.291486,0.911639,0.912885
1600,0.1639,0.265218,0.914208,0.915185
1800,0.086,0.365286,0.913896,0.914763
2000,0.0869,0.3707,0.913896,0.914256


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 51377
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 12845
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4492,0.381331,0.86306,0.864897
400,0.349,0.335805,0.875671,0.871118
600,0.3087,0.296456,0.890308,0.889271
800,0.2945,0.289893,0.895368,0.895978
1000,0.1784,0.318541,0.901985,0.902658
1200,0.1764,0.296424,0.901985,0.901243
1400,0.162,0.3151,0.904165,0.903541
1600,0.1696,0.279239,0.906111,0.90581
1800,0.0885,0.407914,0.899572,0.898408
2000,0.087,0.384672,0.902608,0.902209


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4492,0.381331,0.86306,0.864897
400,0.349,0.335805,0.875671,0.871118
600,0.3087,0.296456,0.890308,0.889271
800,0.2945,0.289893,0.895368,0.895978
1000,0.1784,0.318541,0.901985,0.902658
1200,0.1764,0.296424,0.901985,0.901243
1400,0.162,0.3151,0.904165,0.903541
1600,0.1696,0.279239,0.906111,0.90581
1800,0.0885,0.407914,0.899572,0.898408
2000,0.087,0.384672,0.902608,0.902209


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4492,0.381331,0.86306,0.864897
400,0.349,0.335805,0.875671,0.871118
600,0.3087,0.296456,0.890308,0.889271
800,0.2945,0.289893,0.895368,0.895978
1000,0.1784,0.318541,0.901985,0.902658
1200,0.1764,0.296424,0.901985,0.901243
1400,0.162,0.3151,0.904165,0.903541
1600,0.1696,0.279239,0.906111,0.90581
1800,0.0885,0.407914,0.899572,0.898408
2000,0.087,0.384672,0.902608,0.902209


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4687,0.35823,0.873414,0.874743
400,0.3395,0.305986,0.892176,0.892832
600,0.3136,0.292364,0.888906,0.891959
800,0.2918,0.261497,0.906423,0.90715
1000,0.1771,0.27533,0.902919,0.903832
1200,0.1726,0.292116,0.905644,0.90542
1400,0.1769,0.28646,0.907513,0.90809
1600,0.1722,0.282158,0.906968,0.907706
1800,0.0802,0.369006,0.910393,0.911157
2000,0.0867,0.355608,0.906189,0.907431


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4687,0.35823,0.873414,0.874743
400,0.3395,0.305986,0.892176,0.892832
600,0.3136,0.292364,0.888906,0.891959
800,0.2918,0.261497,0.906423,0.90715
1000,0.1771,0.27533,0.902919,0.903832
1200,0.1726,0.292116,0.905644,0.90542
1400,0.1769,0.28646,0.907513,0.90809
1600,0.1722,0.282158,0.906968,0.907706
1800,0.0802,0.369006,0.910393,0.911157
2000,0.0867,0.355608,0.906189,0.907431


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4687,0.35823,0.873414,0.874743
400,0.3395,0.305986,0.892176,0.892832
600,0.3136,0.292364,0.888906,0.891959
800,0.2918,0.261497,0.906423,0.90715
1000,0.1771,0.27533,0.902919,0.903832
1200,0.1726,0.292116,0.905644,0.90542
1400,0.1769,0.28646,0.907513,0.90809
1600,0.1722,0.282158,0.906968,0.907706
1800,0.0802,0.369006,0.910393,0.911157
2000,0.0867,0.355608,0.906189,0.907431


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.452,0.362233,0.869132,0.8688
400,0.3539,0.310882,0.888595,0.888821
600,0.3001,0.291284,0.895446,0.894021
800,0.2862,0.272319,0.902997,0.903442
1000,0.1744,0.316386,0.898015,0.896903
1200,0.1655,0.297206,0.90253,0.901812
1400,0.173,0.298129,0.905255,0.905727
1600,0.1706,0.307524,0.905566,0.904342
1800,0.0865,0.415871,0.900117,0.901111
2000,0.0863,0.411729,0.901752,0.901802


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.452,0.362233,0.869132,0.8688
400,0.3539,0.310882,0.888595,0.888821
600,0.3001,0.291284,0.895446,0.894021
800,0.2862,0.272319,0.902997,0.903442
1000,0.1744,0.316386,0.898015,0.896903
1200,0.1655,0.297206,0.90253,0.901812
1400,0.173,0.298129,0.905255,0.905727
1600,0.1706,0.307524,0.905566,0.904342
1800,0.0865,0.415871,0.900117,0.901111
2000,0.0863,0.411729,0.901752,0.901802


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.452,0.362233,0.869132,0.8688
400,0.3539,0.310882,0.888595,0.888821
600,0.3001,0.291284,0.895446,0.894021
800,0.2862,0.272319,0.902997,0.903442
1000,0.1744,0.316386,0.898015,0.896903
1200,0.1655,0.297206,0.90253,0.901812
1400,0.173,0.298129,0.905255,0.905727
1600,0.1706,0.307524,0.905566,0.904342
1800,0.0865,0.415871,0.900117,0.901111
2000,0.0863,0.411729,0.901752,0.901802


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 51377
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 12845
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.42,0.354139,0.872324,0.874845
400,0.3472,0.324865,0.880888,0.880941
600,0.315,0.296596,0.888906,0.890234
800,0.3,0.308347,0.892721,0.895142
1000,0.1707,0.347716,0.89093,0.892301
1200,0.1743,0.333449,0.891865,0.892253
1400,0.168,0.336878,0.895835,0.895771
1600,0.1774,0.313976,0.898015,0.897793
1800,0.0899,0.413524,0.894511,0.895071
2000,0.0945,0.426238,0.891787,0.892909


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.42,0.354139,0.872324,0.874845
400,0.3472,0.324865,0.880888,0.880941
600,0.315,0.296596,0.888906,0.890234
800,0.3,0.308347,0.892721,0.895142
1000,0.1707,0.347716,0.89093,0.892301
1200,0.1743,0.333449,0.891865,0.892253
1400,0.168,0.336878,0.895835,0.895771
1600,0.1774,0.313976,0.898015,0.897793
1800,0.0899,0.413524,0.894511,0.895071
2000,0.0945,0.426238,0.891787,0.892909


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.42,0.354139,0.872324,0.874845
400,0.3472,0.324865,0.880888,0.880941
600,0.315,0.296596,0.888906,0.890234
800,0.3,0.308347,0.892721,0.895142
1000,0.1707,0.347716,0.89093,0.892301
1200,0.1743,0.333449,0.891865,0.892253
1400,0.168,0.336878,0.895835,0.895771
1600,0.1774,0.313976,0.898015,0.897793
1800,0.0899,0.413524,0.894511,0.895071
2000,0.0945,0.426238,0.891787,0.892909


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4364,0.346275,0.873803,0.875839
400,0.3363,0.300309,0.892409,0.894643
600,0.319,0.299125,0.890152,0.893032
800,0.3027,0.282584,0.893422,0.896292
1000,0.1758,0.317928,0.8942,0.895479
1200,0.1736,0.319565,0.894278,0.894376
1400,0.175,0.317738,0.902141,0.903876
1600,0.1822,0.286626,0.907201,0.908644
1800,0.091,0.396008,0.897548,0.898855
2000,0.0988,0.366521,0.899338,0.900868


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4364,0.346275,0.873803,0.875839
400,0.3363,0.300309,0.892409,0.894643
600,0.319,0.299125,0.890152,0.893032
800,0.3027,0.282584,0.893422,0.896292
1000,0.1758,0.317928,0.8942,0.895479
1200,0.1736,0.319565,0.894278,0.894376
1400,0.175,0.317738,0.902141,0.903876
1600,0.1822,0.286626,0.907201,0.908644
1800,0.091,0.396008,0.897548,0.898855
2000,0.0988,0.366521,0.899338,0.900868


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.4364,0.346275,0.873803,0.875839
400,0.3363,0.300309,0.892409,0.894643
600,0.319,0.299125,0.890152,0.893032
800,0.3027,0.282584,0.893422,0.896292
1000,0.1758,0.317928,0.8942,0.895479
1200,0.1736,0.319565,0.894278,0.894376
1400,0.175,0.317738,0.902141,0.903876
1600,0.1822,0.286626,0.907201,0.908644
1800,0.091,0.396008,0.897548,0.898855
2000,0.0988,0.366521,0.899338,0.900868


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.425,0.3621,0.863215,0.866219
400,0.3511,0.324836,0.880498,0.880987
600,0.3115,0.308628,0.885714,0.885435
800,0.2962,0.292053,0.892487,0.892592
1000,0.1731,0.320294,0.891086,0.892113
1200,0.175,0.322435,0.895134,0.895994
1400,0.1779,0.299787,0.897859,0.899401
1600,0.1736,0.316647,0.89638,0.896585
1800,0.0963,0.412877,0.898015,0.898244
2000,0.0966,0.389087,0.896069,0.897282


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.425,0.3621,0.863215,0.866219
400,0.3511,0.324836,0.880498,0.880987
600,0.3115,0.308628,0.885714,0.885435
800,0.2962,0.292053,0.892487,0.892592
1000,0.1731,0.320294,0.891086,0.892113
1200,0.175,0.322435,0.895134,0.895994
1400,0.1779,0.299787,0.897859,0.899401
1600,0.1736,0.316647,0.89638,0.896585
1800,0.0963,0.412877,0.898015,0.898244
2000,0.0966,0.389087,0.896069,0.897282


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/51377 [00:00<?, ? examples/s]

Map:   0%|          | 0/12845 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
200,0.425,0.3621,0.863215,0.866219
400,0.3511,0.324836,0.880498,0.880987
600,0.3115,0.308628,0.885714,0.885435
800,0.2962,0.292053,0.892487,0.892592
1000,0.1731,0.320294,0.891086,0.892113
1200,0.175,0.322435,0.895134,0.895994
1400,0.1779,0.299787,0.897859,0.899401
1600,0.1736,0.316647,0.89638,0.896585
1800,0.0963,0.412877,0.898015,0.898244
2000,0.0966,0.389087,0.896069,0.897282


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.92067,0.920993
3,alger-ia/dziribert,0.914675,0.916346
6,faisalq/EgyBERT,0.909381,0.910202
9,faisalq/SaudiBERT,0.9174,0.9192
12,otmangi/MorRoBERTa,0.910782,0.911175
15,otmangi/MorrBERT,0.907201,0.908644
18,tunis-ai/TunBERT,0.72573,0.730697
