In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'Elec_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/ElecMorocco2016.csv', encoding='utf-8', engine='python', sep='\t') #, quotechar="'"  , quoting=3


      
display(df.columns)
display(len(df))
display(df[:4])



classes = set(df['sentiment'].values)
display(classes)

c = df['sentiment'].value_counts()
display(c)

df['sentiment'] = df['sentiment'].astype('category')
df['label'] = df['sentiment'].cat.codes

df = df[['comment_message', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))


# ds = Dataset.from_pandas(df)
# ds = ds.train_test_split(test_size=0.2)

# display(ds)

max_sequence_length = 128


models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]

seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
    
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['comment_message'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True) # , batched=True
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)  # , batched=True
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 15
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 60, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 60
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-17 08:49:53.047222: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-17 08:49:53.071804: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['index', 'comment_message', 'sentiment', 'comment_published'], dtype='object')

10254

Unnamed: 0,index,comment_message,sentiment,comment_published
0,1,البكاء ليس سمت الرجال ..البكاء سلطة الضعفاء لكسب التعاطف ..انا لست ضد بنكيران وانما الرجل الصالح هو الذي يفعل ما يقول ولا يبكي الا عند النصر,N,2016-10-01 00:07:44
1,2,كلنا بنكيران\nولاية ثانية ان شاء الله\nموتوا بحقدكم,P,2016-10-01 00:47:37
2,3,الشركات في الدول المتقدمة تساهم في بناء أوطانها بدفع الضرائب وفي بلدنا العزيز الشركات تطلب من الدولة أن تخدم عليها؟,N,2016-10-01 00:48:02
3,4,من ينتقد بن كران هم خصوم معارضة لى خاصها هي سلطة لا تهتم لوضعية مواطن,N,2016-10-01 00:49:10


{'N', 'P'}

sentiment
N    6581
P    3673
Name: count, dtype: int64

2

10254

DatasetDict({
    train: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 8203
    })
    test: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 2051
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.5924,0.422221,0.855193,0.832285
120,0.4279,0.393563,0.840566,0.825861
180,0.3501,0.371877,0.860556,0.838965
240,0.3084,0.335968,0.865431,0.845727
300,0.2735,0.375797,0.858118,0.84395
360,0.2304,0.363057,0.866894,0.847947
420,0.1752,0.434421,0.862506,0.845805
480,0.1668,0.472654,0.849829,0.835309
540,0.1501,0.462936,0.856168,0.840436
600,0.1061,0.546622,0.836665,0.82289


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.5921,0.432106,0.850804,0.828545
120,0.433,0.396028,0.840566,0.823604
180,0.3557,0.364594,0.859093,0.835552
240,0.3076,0.343426,0.860068,0.840275
300,0.2664,0.43665,0.840566,0.828212
360,0.2192,0.371049,0.862506,0.846136
420,0.173,0.485537,0.842028,0.828655
480,0.1588,0.475682,0.837153,0.82347
540,0.1365,0.50147,0.848367,0.834585
600,0.1016,0.522296,0.85568,0.841011


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.5921,0.432106,0.850804,0.828545
120,0.433,0.396028,0.840566,0.823604
180,0.3557,0.364594,0.859093,0.835552
240,0.3076,0.343426,0.860068,0.840275
300,0.2664,0.43665,0.840566,0.828212
360,0.2192,0.371049,0.862506,0.846136
420,0.173,0.485537,0.842028,0.828655
480,0.1588,0.475682,0.837153,0.82347
540,0.1365,0.50147,0.848367,0.834585
600,0.1016,0.522296,0.85568,0.841011


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.5971,0.455206,0.836665,0.823304
120,0.4328,0.379707,0.854705,0.83179
180,0.332,0.353009,0.859581,0.842296
240,0.3169,0.344063,0.860068,0.841857
300,0.2458,0.37403,0.864944,0.849761
360,0.2094,0.383436,0.862506,0.848727
420,0.1778,0.457923,0.861531,0.846748
480,0.1466,0.472021,0.846416,0.836038
540,0.137,0.568948,0.845441,0.833466
600,0.1008,0.578285,0.849342,0.837299


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.5971,0.455206,0.836665,0.823304
120,0.4328,0.379707,0.854705,0.83179
180,0.332,0.353009,0.859581,0.842296
240,0.3169,0.344063,0.860068,0.841857
300,0.2458,0.37403,0.864944,0.849761
360,0.2094,0.383436,0.862506,0.848727
420,0.1778,0.457923,0.861531,0.846748
480,0.1466,0.472021,0.846416,0.836038
540,0.137,0.568948,0.845441,0.833466
600,0.1008,0.578285,0.849342,0.837299


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.5971,0.455206,0.836665,0.823304
120,0.4328,0.379707,0.854705,0.83179
180,0.332,0.353009,0.859581,0.842296
240,0.3169,0.344063,0.860068,0.841857
300,0.2458,0.37403,0.864944,0.849761
360,0.2094,0.383436,0.862506,0.848727
420,0.1778,0.457923,0.861531,0.846748
480,0.1466,0.472021,0.846416,0.836038
540,0.137,0.568948,0.845441,0.833466
600,0.1008,0.578285,0.849342,0.837299


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.5929,0.437198,0.848367,0.831552
120,0.4274,0.387166,0.839103,0.82399
180,0.3296,0.382444,0.842516,0.828517
240,0.3063,0.355483,0.859093,0.844206
300,0.2364,0.408674,0.848854,0.835269
360,0.208,0.389325,0.853242,0.839507
420,0.1923,0.463679,0.861531,0.844487
480,0.14,0.459622,0.858118,0.843233
540,0.139,0.519703,0.849829,0.834993
600,0.1066,0.583407,0.839103,0.826975


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.5929,0.437198,0.848367,0.831552
120,0.4274,0.387166,0.839103,0.82399
180,0.3296,0.382444,0.842516,0.828517
240,0.3063,0.355483,0.859093,0.844206
300,0.2364,0.408674,0.848854,0.835269
360,0.208,0.389325,0.853242,0.839507
420,0.1923,0.463679,0.861531,0.844487
480,0.14,0.459622,0.858118,0.843233
540,0.139,0.519703,0.849829,0.834993
600,0.1066,0.583407,0.839103,0.826975


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.5929,0.437198,0.848367,0.831552
120,0.4274,0.387166,0.839103,0.82399
180,0.3296,0.382444,0.842516,0.828517
240,0.3063,0.355483,0.859093,0.844206
300,0.2364,0.408674,0.848854,0.835269
360,0.208,0.389325,0.853242,0.839507
420,0.1923,0.463679,0.861531,0.844487
480,0.14,0.459622,0.858118,0.843233
540,0.139,0.519703,0.849829,0.834993
600,0.1066,0.583407,0.839103,0.826975


DatasetDict({
    train: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 8203
    })
    test: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 2051
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4071,0.339068,0.859581,0.837972
120,0.3557,0.375895,0.849342,0.812414
180,0.2477,0.415771,0.85568,0.839731
240,0.2026,0.407647,0.858606,0.842873
300,0.1373,0.713769,0.827889,0.816828
360,0.0972,0.481525,0.860068,0.844763
420,0.0556,0.718617,0.856168,0.841392
480,0.065,0.718686,0.83374,0.821454
540,0.0456,0.775834,0.860556,0.845146
600,0.0323,0.795479,0.848367,0.833652


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4071,0.339068,0.859581,0.837972
120,0.3557,0.375895,0.849342,0.812414
180,0.2477,0.415771,0.85568,0.839731
240,0.2026,0.407647,0.858606,0.842873
300,0.1373,0.713769,0.827889,0.816828
360,0.0972,0.481525,0.860068,0.844763
420,0.0556,0.718617,0.856168,0.841392
480,0.065,0.718686,0.83374,0.821454
540,0.0456,0.775834,0.860556,0.845146
600,0.0323,0.795479,0.848367,0.833652


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4071,0.339068,0.859581,0.837972
120,0.3557,0.375895,0.849342,0.812414
180,0.2477,0.415771,0.85568,0.839731
240,0.2026,0.407647,0.858606,0.842873
300,0.1373,0.713769,0.827889,0.816828
360,0.0972,0.481525,0.860068,0.844763
420,0.0556,0.718617,0.856168,0.841392
480,0.065,0.718686,0.83374,0.821454
540,0.0456,0.775834,0.860556,0.845146
600,0.0323,0.795479,0.848367,0.833652


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.425,0.344543,0.862506,0.844084
120,0.3666,0.335693,0.864456,0.84864
180,0.2291,0.356427,0.85763,0.843363
240,0.2087,0.334456,0.864944,0.849448
300,0.1099,0.64094,0.83374,0.824021
360,0.0715,0.616793,0.857143,0.838551
420,0.0582,0.750071,0.867382,0.848918
480,0.0718,0.651529,0.85373,0.84009
540,0.0278,0.894167,0.85763,0.845302
600,0.0244,0.851236,0.862994,0.848316


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.425,0.344543,0.862506,0.844084
120,0.3666,0.335693,0.864456,0.84864
180,0.2291,0.356427,0.85763,0.843363
240,0.2087,0.334456,0.864944,0.849448
300,0.1099,0.64094,0.83374,0.824021
360,0.0715,0.616793,0.857143,0.838551
420,0.0582,0.750071,0.867382,0.848918
480,0.0718,0.651529,0.85373,0.84009
540,0.0278,0.894167,0.85763,0.845302
600,0.0244,0.851236,0.862994,0.848316


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.425,0.344543,0.862506,0.844084
120,0.3666,0.335693,0.864456,0.84864
180,0.2291,0.356427,0.85763,0.843363
240,0.2087,0.334456,0.864944,0.849448
300,0.1099,0.64094,0.83374,0.824021
360,0.0715,0.616793,0.857143,0.838551
420,0.0582,0.750071,0.867382,0.848918
480,0.0718,0.651529,0.85373,0.84009
540,0.0278,0.894167,0.85763,0.845302
600,0.0244,0.851236,0.862994,0.848316


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4101,0.362402,0.85568,0.842418
120,0.353,0.365166,0.857143,0.832404
180,0.234,0.39722,0.862994,0.846517
240,0.2043,0.404338,0.868844,0.852754
300,0.1295,0.690832,0.814725,0.805913
360,0.09,0.524158,0.85373,0.841347
420,0.0703,0.687232,0.858606,0.846635
480,0.0279,0.784624,0.856655,0.84229
540,0.0316,0.872327,0.849829,0.836825
600,0.0389,0.80567,0.859581,0.845308


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4101,0.362402,0.85568,0.842418
120,0.353,0.365166,0.857143,0.832404
180,0.234,0.39722,0.862994,0.846517
240,0.2043,0.404338,0.868844,0.852754
300,0.1295,0.690832,0.814725,0.805913
360,0.09,0.524158,0.85373,0.841347
420,0.0703,0.687232,0.858606,0.846635
480,0.0279,0.784624,0.856655,0.84229
540,0.0316,0.872327,0.849829,0.836825
600,0.0389,0.80567,0.859581,0.845308


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4101,0.362402,0.85568,0.842418
120,0.353,0.365166,0.857143,0.832404
180,0.234,0.39722,0.862994,0.846517
240,0.2043,0.404338,0.868844,0.852754
300,0.1295,0.690832,0.814725,0.805913
360,0.09,0.524158,0.85373,0.841347
420,0.0703,0.687232,0.858606,0.846635
480,0.0279,0.784624,0.856655,0.84229
540,0.0316,0.872327,0.849829,0.836825
600,0.0389,0.80567,0.859581,0.845308


DatasetDict({
    train: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 8203
    })
    test: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 2051
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.6921,0.682247,0.660653,0.397827
120,0.6687,0.64072,0.660653,0.397827
180,0.6614,0.666089,0.584105,0.577934
240,0.6244,0.613072,0.694295,0.519073
300,0.6132,0.642346,0.618723,0.611534
360,0.5917,0.609124,0.678693,0.641578
420,0.5882,0.622257,0.670892,0.632876
480,0.5772,0.573352,0.72745,0.629643
540,0.5709,0.584806,0.700634,0.647073
600,0.5563,0.581575,0.706972,0.644814


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.6921,0.682247,0.660653,0.397827
120,0.6687,0.64072,0.660653,0.397827
180,0.6614,0.666089,0.584105,0.577934
240,0.6244,0.613072,0.694295,0.519073
300,0.6132,0.642346,0.618723,0.611534
360,0.5917,0.609124,0.678693,0.641578
420,0.5882,0.622257,0.670892,0.632876
480,0.5772,0.573352,0.72745,0.629643
540,0.5709,0.584806,0.700634,0.647073
600,0.5563,0.581575,0.706972,0.644814


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.6921,0.682247,0.660653,0.397827
120,0.6687,0.64072,0.660653,0.397827
180,0.6614,0.666089,0.584105,0.577934
240,0.6244,0.613072,0.694295,0.519073
300,0.6132,0.642346,0.618723,0.611534
360,0.5917,0.609124,0.678693,0.641578
420,0.5882,0.622257,0.670892,0.632876
480,0.5772,0.573352,0.72745,0.629643
540,0.5709,0.584806,0.700634,0.647073
600,0.5563,0.581575,0.706972,0.644814


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.7183,0.651238,0.644564,0.391936
120,0.6692,0.655292,0.644564,0.391936
180,0.6527,0.650757,0.644564,0.391936
240,0.6663,0.650815,0.644564,0.391936
300,0.6614,0.654513,0.644564,0.391936
360,0.6535,0.651553,0.644564,0.391936
420,0.6613,0.656096,0.644564,0.391936
480,0.6608,0.651376,0.644564,0.391936
540,0.6615,0.643682,0.644564,0.391936
600,0.6435,0.622946,0.679181,0.550625


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.7183,0.651238,0.644564,0.391936
120,0.6692,0.655292,0.644564,0.391936
180,0.6527,0.650757,0.644564,0.391936
240,0.6663,0.650815,0.644564,0.391936
300,0.6614,0.654513,0.644564,0.391936
360,0.6535,0.651553,0.644564,0.391936
420,0.6613,0.656096,0.644564,0.391936
480,0.6608,0.651376,0.644564,0.391936
540,0.6615,0.643682,0.644564,0.391936
600,0.6435,0.622946,0.679181,0.550625


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.7183,0.651238,0.644564,0.391936
120,0.6692,0.655292,0.644564,0.391936
180,0.6527,0.650757,0.644564,0.391936
240,0.6663,0.650815,0.644564,0.391936
300,0.6614,0.654513,0.644564,0.391936
360,0.6535,0.651553,0.644564,0.391936
420,0.6613,0.656096,0.644564,0.391936
480,0.6608,0.651376,0.644564,0.391936
540,0.6615,0.643682,0.644564,0.391936
600,0.6435,0.622946,0.679181,0.550625


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.6898,0.668852,0.646514,0.392656
120,0.6657,0.649804,0.646514,0.392656
180,0.6543,0.649029,0.646514,0.392656
240,0.6719,0.644625,0.658703,0.442635
300,0.6448,0.606122,0.686007,0.614567
360,0.6132,0.597165,0.692345,0.58137
420,0.6146,0.576854,0.712335,0.653189
480,0.5862,0.595224,0.705022,0.596405
540,0.5917,0.575157,0.710873,0.614372
600,0.57,0.603274,0.701121,0.675723


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.6898,0.668852,0.646514,0.392656
120,0.6657,0.649804,0.646514,0.392656
180,0.6543,0.649029,0.646514,0.392656
240,0.6719,0.644625,0.658703,0.442635
300,0.6448,0.606122,0.686007,0.614567
360,0.6132,0.597165,0.692345,0.58137
420,0.6146,0.576854,0.712335,0.653189
480,0.5862,0.595224,0.705022,0.596405
540,0.5917,0.575157,0.710873,0.614372
600,0.57,0.603274,0.701121,0.675723


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.6898,0.668852,0.646514,0.392656
120,0.6657,0.649804,0.646514,0.392656
180,0.6543,0.649029,0.646514,0.392656
240,0.6719,0.644625,0.658703,0.442635
300,0.6448,0.606122,0.686007,0.614567
360,0.6132,0.597165,0.692345,0.58137
420,0.6146,0.576854,0.712335,0.653189
480,0.5862,0.595224,0.705022,0.596405
540,0.5917,0.575157,0.710873,0.614372
600,0.57,0.603274,0.701121,0.675723


DatasetDict({
    train: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 8203
    })
    test: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 2051
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.436,0.488974,0.78352,0.772002
120,0.3827,0.357678,0.849342,0.823561
180,0.2517,0.441415,0.849342,0.826513
240,0.2131,0.430861,0.840078,0.822647
300,0.1277,0.596211,0.838615,0.8172
360,0.0902,0.57585,0.822526,0.802642
420,0.0563,1.216122,0.772306,0.761736
480,0.062,0.874954,0.808386,0.794114
540,0.0348,0.889992,0.836177,0.816803
600,0.0316,0.998999,0.822038,0.806219


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.436,0.488974,0.78352,0.772002
120,0.3827,0.357678,0.849342,0.823561
180,0.2517,0.441415,0.849342,0.826513
240,0.2131,0.430861,0.840078,0.822647
300,0.1277,0.596211,0.838615,0.8172
360,0.0902,0.57585,0.822526,0.802642
420,0.0563,1.216122,0.772306,0.761736
480,0.062,0.874954,0.808386,0.794114
540,0.0348,0.889992,0.836177,0.816803
600,0.0316,0.998999,0.822038,0.806219


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.436,0.488974,0.78352,0.772002
120,0.3827,0.357678,0.849342,0.823561
180,0.2517,0.441415,0.849342,0.826513
240,0.2131,0.430861,0.840078,0.822647
300,0.1277,0.596211,0.838615,0.8172
360,0.0902,0.57585,0.822526,0.802642
420,0.0563,1.216122,0.772306,0.761736
480,0.062,0.874954,0.808386,0.794114
540,0.0348,0.889992,0.836177,0.816803
600,0.0316,0.998999,0.822038,0.806219


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.452,0.367254,0.844466,0.815046
120,0.3888,0.348174,0.85373,0.835963
180,0.2403,0.409674,0.83764,0.828397
240,0.2117,0.3602,0.855193,0.838914
300,0.1188,0.634799,0.835202,0.820608
360,0.0843,0.657396,0.849829,0.823434
420,0.0485,0.885559,0.853242,0.835118
480,0.0513,0.737983,0.844954,0.828054
540,0.036,0.855884,0.852267,0.834382
600,0.017,0.825284,0.854705,0.834476


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.452,0.367254,0.844466,0.815046
120,0.3888,0.348174,0.85373,0.835963
180,0.2403,0.409674,0.83764,0.828397
240,0.2117,0.3602,0.855193,0.838914
300,0.1188,0.634799,0.835202,0.820608
360,0.0843,0.657396,0.849829,0.823434
420,0.0485,0.885559,0.853242,0.835118
480,0.0513,0.737983,0.844954,0.828054
540,0.036,0.855884,0.852267,0.834382
600,0.017,0.825284,0.854705,0.834476


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.452,0.367254,0.844466,0.815046
120,0.3888,0.348174,0.85373,0.835963
180,0.2403,0.409674,0.83764,0.828397
240,0.2117,0.3602,0.855193,0.838914
300,0.1188,0.634799,0.835202,0.820608
360,0.0843,0.657396,0.849829,0.823434
420,0.0485,0.885559,0.853242,0.835118
480,0.0513,0.737983,0.844954,0.828054
540,0.036,0.855884,0.852267,0.834382
600,0.017,0.825284,0.854705,0.834476


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4316,0.380915,0.834715,0.81264
120,0.3774,0.384359,0.843003,0.816058
180,0.2492,0.429136,0.839103,0.815982
240,0.1999,0.446112,0.830327,0.81462
300,0.1306,0.799213,0.817162,0.805708
360,0.0904,0.60835,0.831789,0.81004
420,0.052,0.946399,0.825939,0.807421
480,0.0444,0.953896,0.814725,0.799972
540,0.0236,1.11456,0.823013,0.808199
600,0.0151,1.174286,0.818137,0.803031


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4316,0.380915,0.834715,0.81264
120,0.3774,0.384359,0.843003,0.816058
180,0.2492,0.429136,0.839103,0.815982
240,0.1999,0.446112,0.830327,0.81462
300,0.1306,0.799213,0.817162,0.805708
360,0.0904,0.60835,0.831789,0.81004
420,0.052,0.946399,0.825939,0.807421
480,0.0444,0.953896,0.814725,0.799972
540,0.0236,1.11456,0.823013,0.808199
600,0.0151,1.174286,0.818137,0.803031


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4316,0.380915,0.834715,0.81264
120,0.3774,0.384359,0.843003,0.816058
180,0.2492,0.429136,0.839103,0.815982
240,0.1999,0.446112,0.830327,0.81462
300,0.1306,0.799213,0.817162,0.805708
360,0.0904,0.60835,0.831789,0.81004
420,0.052,0.946399,0.825939,0.807421
480,0.0444,0.953896,0.814725,0.799972
540,0.0236,1.11456,0.823013,0.808199
600,0.0151,1.174286,0.818137,0.803031


DatasetDict({
    train: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 8203
    })
    test: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 2051
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4723,0.456162,0.79961,0.784803
120,0.4122,0.390766,0.837153,0.800716
180,0.3113,0.394975,0.844466,0.821905
240,0.2693,0.389533,0.837153,0.81724
300,0.1936,0.677393,0.787421,0.77548
360,0.1231,0.506526,0.838128,0.818334
420,0.0825,0.695141,0.836177,0.808395
480,0.0803,0.716687,0.818137,0.802211
540,0.0466,0.871809,0.826426,0.809278
600,0.0415,0.680296,0.829839,0.811736


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4723,0.456162,0.79961,0.784803
120,0.4122,0.390766,0.837153,0.800716
180,0.3113,0.394975,0.844466,0.821905
240,0.2693,0.389533,0.837153,0.81724
300,0.1936,0.677393,0.787421,0.77548
360,0.1231,0.506526,0.838128,0.818334
420,0.0825,0.695141,0.836177,0.808395
480,0.0803,0.716687,0.818137,0.802211
540,0.0466,0.871809,0.826426,0.809278
600,0.0415,0.680296,0.829839,0.811736


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4723,0.456162,0.79961,0.784803
120,0.4122,0.390766,0.837153,0.800716
180,0.3113,0.394975,0.844466,0.821905
240,0.2693,0.389533,0.837153,0.81724
300,0.1936,0.677393,0.787421,0.77548
360,0.1231,0.506526,0.838128,0.818334
420,0.0825,0.695141,0.836177,0.808395
480,0.0803,0.716687,0.818137,0.802211
540,0.0466,0.871809,0.826426,0.809278
600,0.0415,0.680296,0.829839,0.811736


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4904,0.387721,0.832765,0.811281
120,0.4045,0.391415,0.833252,0.820783
180,0.2955,0.370939,0.845929,0.831245
240,0.2785,0.347078,0.850317,0.834723
300,0.1862,0.50574,0.85373,0.837343
360,0.1215,0.495098,0.848854,0.82821
420,0.0985,0.739702,0.846416,0.828914
480,0.0755,0.610108,0.842516,0.823703
540,0.0535,0.85628,0.83959,0.81789
600,0.0303,0.892444,0.835202,0.818213


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4904,0.387721,0.832765,0.811281
120,0.4045,0.391415,0.833252,0.820783
180,0.2955,0.370939,0.845929,0.831245
240,0.2785,0.347078,0.850317,0.834723
300,0.1862,0.50574,0.85373,0.837343
360,0.1215,0.495098,0.848854,0.82821
420,0.0985,0.739702,0.846416,0.828914
480,0.0755,0.610108,0.842516,0.823703
540,0.0535,0.85628,0.83959,0.81789
600,0.0303,0.892444,0.835202,0.818213


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4904,0.387721,0.832765,0.811281
120,0.4045,0.391415,0.833252,0.820783
180,0.2955,0.370939,0.845929,0.831245
240,0.2785,0.347078,0.850317,0.834723
300,0.1862,0.50574,0.85373,0.837343
360,0.1215,0.495098,0.848854,0.82821
420,0.0985,0.739702,0.846416,0.828914
480,0.0755,0.610108,0.842516,0.823703
540,0.0535,0.85628,0.83959,0.81789
600,0.0303,0.892444,0.835202,0.818213


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4848,0.397666,0.828864,0.806008
120,0.4124,0.43029,0.81765,0.773804
180,0.32,0.400648,0.818137,0.806937
240,0.2851,0.386833,0.83764,0.823101
300,0.2009,0.562182,0.828376,0.815338
360,0.1476,0.441995,0.841053,0.826447
420,0.0986,0.790847,0.836177,0.82167
480,0.0566,0.817972,0.814725,0.806575
540,0.0482,0.857923,0.829352,0.815445
600,0.0445,0.7213,0.843003,0.824311


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4848,0.397666,0.828864,0.806008
120,0.4124,0.43029,0.81765,0.773804
180,0.32,0.400648,0.818137,0.806937
240,0.2851,0.386833,0.83764,0.823101
300,0.2009,0.562182,0.828376,0.815338
360,0.1476,0.441995,0.841053,0.826447
420,0.0986,0.790847,0.836177,0.82167
480,0.0566,0.817972,0.814725,0.806575
540,0.0482,0.857923,0.829352,0.815445
600,0.0445,0.7213,0.843003,0.824311


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4848,0.397666,0.828864,0.806008
120,0.4124,0.43029,0.81765,0.773804
180,0.32,0.400648,0.818137,0.806937
240,0.2851,0.386833,0.83764,0.823101
300,0.2009,0.562182,0.828376,0.815338
360,0.1476,0.441995,0.841053,0.826447
420,0.0986,0.790847,0.836177,0.82167
480,0.0566,0.817972,0.814725,0.806575
540,0.0482,0.857923,0.829352,0.815445
600,0.0445,0.7213,0.843003,0.824311


DatasetDict({
    train: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 8203
    })
    test: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 2051
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4625,0.405493,0.824963,0.804607
120,0.4135,0.385709,0.83374,0.800613
180,0.2915,0.442406,0.826426,0.804202
240,0.2749,0.42358,0.823501,0.804395
300,0.2161,0.751645,0.74549,0.738167
360,0.1529,0.635465,0.814725,0.793539
420,0.0978,0.915021,0.812774,0.794145
480,0.0774,0.855674,0.806923,0.784696
540,0.0679,1.010729,0.816187,0.790351
600,0.0451,1.006372,0.807411,0.786347


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4625,0.405493,0.824963,0.804607
120,0.4135,0.385709,0.83374,0.800613
180,0.2915,0.442406,0.826426,0.804202
240,0.2749,0.42358,0.823501,0.804395
300,0.2161,0.751645,0.74549,0.738167
360,0.1529,0.635465,0.814725,0.793539
420,0.0978,0.915021,0.812774,0.794145
480,0.0774,0.855674,0.806923,0.784696
540,0.0679,1.010729,0.816187,0.790351
600,0.0451,1.006372,0.807411,0.786347


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4625,0.405493,0.824963,0.804607
120,0.4135,0.385709,0.83374,0.800613
180,0.2915,0.442406,0.826426,0.804202
240,0.2749,0.42358,0.823501,0.804395
300,0.2161,0.751645,0.74549,0.738167
360,0.1529,0.635465,0.814725,0.793539
420,0.0978,0.915021,0.812774,0.794145
480,0.0774,0.855674,0.806923,0.784696
540,0.0679,1.010729,0.816187,0.790351
600,0.0451,1.006372,0.807411,0.786347


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.482,0.401274,0.818625,0.78259
120,0.4124,0.383543,0.829839,0.800196
180,0.3091,0.411904,0.821063,0.807415
240,0.2896,0.399416,0.826914,0.810238
300,0.1925,0.527012,0.831302,0.813788
360,0.1467,0.573057,0.830814,0.806731
420,0.1107,0.84461,0.814725,0.80098
480,0.075,0.759821,0.835202,0.811222
540,0.0542,0.853862,0.830814,0.806731
600,0.0401,0.938953,0.828864,0.810146


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.482,0.401274,0.818625,0.78259
120,0.4124,0.383543,0.829839,0.800196
180,0.3091,0.411904,0.821063,0.807415
240,0.2896,0.399416,0.826914,0.810238
300,0.1925,0.527012,0.831302,0.813788
360,0.1467,0.573057,0.830814,0.806731
420,0.1107,0.84461,0.814725,0.80098
480,0.075,0.759821,0.835202,0.811222
540,0.0542,0.853862,0.830814,0.806731
600,0.0401,0.938953,0.828864,0.810146


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.482,0.401274,0.818625,0.78259
120,0.4124,0.383543,0.829839,0.800196
180,0.3091,0.411904,0.821063,0.807415
240,0.2896,0.399416,0.826914,0.810238
300,0.1925,0.527012,0.831302,0.813788
360,0.1467,0.573057,0.830814,0.806731
420,0.1107,0.84461,0.814725,0.80098
480,0.075,0.759821,0.835202,0.811222
540,0.0542,0.853862,0.830814,0.806731
600,0.0401,0.938953,0.828864,0.810146


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4739,0.403473,0.823501,0.789233
120,0.4108,0.395607,0.826914,0.807055
180,0.2962,0.440381,0.827889,0.807465
240,0.2644,0.433899,0.830327,0.807593
300,0.1917,0.650751,0.825939,0.800531
360,0.1414,0.529259,0.808386,0.792591
420,0.0955,0.975958,0.811799,0.797388
480,0.0622,0.798646,0.804486,0.7916
540,0.0529,1.022648,0.815212,0.797021
600,0.0399,0.968345,0.823501,0.803319


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4739,0.403473,0.823501,0.789233
120,0.4108,0.395607,0.826914,0.807055
180,0.2962,0.440381,0.827889,0.807465
240,0.2644,0.433899,0.830327,0.807593
300,0.1917,0.650751,0.825939,0.800531
360,0.1414,0.529259,0.808386,0.792591
420,0.0955,0.975958,0.811799,0.797388
480,0.0622,0.798646,0.804486,0.7916
540,0.0529,1.022648,0.815212,0.797021
600,0.0399,0.968345,0.823501,0.803319


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4739,0.403473,0.823501,0.789233
120,0.4108,0.395607,0.826914,0.807055
180,0.2962,0.440381,0.827889,0.807465
240,0.2644,0.433899,0.830327,0.807593
300,0.1917,0.650751,0.825939,0.800531
360,0.1414,0.529259,0.808386,0.792591
420,0.0955,0.975958,0.811799,0.797388
480,0.0622,0.798646,0.804486,0.7916
540,0.0529,1.022648,0.815212,0.797021
600,0.0399,0.968345,0.823501,0.803319


DatasetDict({
    train: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 8203
    })
    test: Dataset({
        features: ['comment_message', 'label'],
        num_rows: 2051
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.46,0.381549,0.829352,0.801606
120,0.4131,0.372231,0.83569,0.806206
180,0.2843,0.431767,0.83764,0.811482
240,0.2429,0.432236,0.831789,0.814142
300,0.1642,0.587135,0.825939,0.803579
360,0.1116,0.65915,0.800585,0.782859
420,0.0712,0.852131,0.827889,0.80549
480,0.0524,0.864715,0.78352,0.770131
540,0.0432,0.925716,0.818137,0.79523
600,0.0266,0.966699,0.825451,0.804249


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.46,0.381549,0.829352,0.801606
120,0.4131,0.372231,0.83569,0.806206
180,0.2843,0.431767,0.83764,0.811482
240,0.2429,0.432236,0.831789,0.814142
300,0.1642,0.587135,0.825939,0.803579
360,0.1116,0.65915,0.800585,0.782859
420,0.0712,0.852131,0.827889,0.80549
480,0.0524,0.864715,0.78352,0.770131
540,0.0432,0.925716,0.818137,0.79523
600,0.0266,0.966699,0.825451,0.804249


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.46,0.381549,0.829352,0.801606
120,0.4131,0.372231,0.83569,0.806206
180,0.2843,0.431767,0.83764,0.811482
240,0.2429,0.432236,0.831789,0.814142
300,0.1642,0.587135,0.825939,0.803579
360,0.1116,0.65915,0.800585,0.782859
420,0.0712,0.852131,0.827889,0.80549
480,0.0524,0.864715,0.78352,0.770131
540,0.0432,0.925716,0.818137,0.79523
600,0.0266,0.966699,0.825451,0.804249


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4819,0.435585,0.810824,0.764073
120,0.42,0.398665,0.828376,0.806687
180,0.2825,0.400458,0.829352,0.811635
240,0.2485,0.416887,0.829352,0.809034
300,0.1426,0.74151,0.793759,0.782735
360,0.0933,0.666622,0.81765,0.797081
420,0.0619,0.864834,0.823988,0.802251
480,0.0496,0.86601,0.823988,0.80123
540,0.0362,0.997435,0.8196,0.796802
600,0.0308,1.046841,0.827401,0.797954


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4819,0.435585,0.810824,0.764073
120,0.42,0.398665,0.828376,0.806687
180,0.2825,0.400458,0.829352,0.811635
240,0.2485,0.416887,0.829352,0.809034
300,0.1426,0.74151,0.793759,0.782735
360,0.0933,0.666622,0.81765,0.797081
420,0.0619,0.864834,0.823988,0.802251
480,0.0496,0.86601,0.823988,0.80123
540,0.0362,0.997435,0.8196,0.796802
600,0.0308,1.046841,0.827401,0.797954


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4819,0.435585,0.810824,0.764073
120,0.42,0.398665,0.828376,0.806687
180,0.2825,0.400458,0.829352,0.811635
240,0.2485,0.416887,0.829352,0.809034
300,0.1426,0.74151,0.793759,0.782735
360,0.0933,0.666622,0.81765,0.797081
420,0.0619,0.864834,0.823988,0.802251
480,0.0496,0.86601,0.823988,0.80123
540,0.0362,0.997435,0.8196,0.796802
600,0.0308,1.046841,0.827401,0.797954


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4532,0.4226,0.814237,0.794474
120,0.3945,0.435166,0.818137,0.783336
180,0.2818,0.466164,0.823013,0.797178
240,0.2499,0.468977,0.823501,0.801631
300,0.1491,0.646361,0.82155,0.799293
360,0.1002,0.636942,0.807899,0.78378
420,0.0809,0.97518,0.823013,0.794671
480,0.0482,0.908475,0.81765,0.803576
540,0.0318,1.035337,0.815212,0.793137
600,0.0413,1.054035,0.81765,0.79694


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4532,0.4226,0.814237,0.794474
120,0.3945,0.435166,0.818137,0.783336
180,0.2818,0.466164,0.823013,0.797178
240,0.2499,0.468977,0.823501,0.801631
300,0.1491,0.646361,0.82155,0.799293
360,0.1002,0.636942,0.807899,0.78378
420,0.0809,0.97518,0.823013,0.794671
480,0.0482,0.908475,0.81765,0.803576
540,0.0318,1.035337,0.815212,0.793137
600,0.0413,1.054035,0.81765,0.79694


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8203 [00:00<?, ? examples/s]

Map:   0%|          | 0/2051 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4532,0.4226,0.814237,0.794474
120,0.3945,0.435166,0.818137,0.783336
180,0.2818,0.466164,0.823013,0.797178
240,0.2499,0.468977,0.823501,0.801631
300,0.1491,0.646361,0.82155,0.799293
360,0.1002,0.636942,0.807899,0.78378
420,0.0809,0.97518,0.823013,0.794671
480,0.0482,0.908475,0.81765,0.803576
540,0.0318,1.035337,0.815212,0.793137
600,0.0413,1.054035,0.81765,0.79694


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.85373,0.837343
3,alger-ia/dziribert,0.855193,0.838914
6,faisalq/EgyBERT,0.864944,0.849761
9,faisalq/SaudiBERT,0.87177,0.85869
12,otmangi/MorRoBERTa,0.838128,0.815734
15,otmangi/MorrBERT,0.842516,0.817755
18,tunis-ai/TunBERT,0.728425,0.685464
