In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'OpenTC_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df1 = pd.read_csv('datasets/OpenAccessArDialect/Topic.csv', encoding='utf-8', engine='python', sep='\t') #, quotechar="'"  , quoting=3
df2 = pd.read_csv('datasets/OpenAccessArDialect/dialect.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3

df2 = df2[df2['dialect'] == 'Morocco']
      
display(df1.columns)
display(df1[:4])

display(df2.columns)
display(df2[:4])
display(len(df2))




df = df1[df1['Text'].isin(df2['Twits'])]

display(df[:5])
display(len(df))

c = df['Tag'].value_counts()
display(c)

classes = set(df['Tag'].values)
display(classes)

df['Tag'] = df['Tag'].astype('category')
df['label'] = df['Tag'].cat.codes

df = df[['Text', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))



max_sequence_length = 128



models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]


seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
            
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['Text'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True)
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 15
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 50, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 50
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-18 17:02:51.259552: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-18 17:02:51.282453: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['Text', 'Tag'], dtype='object')

Unnamed: 0,Text,Tag
0,: حالة الطوارئ الصحية: وثيقة التنقل لن تسلم للقاصرين، عند وجود حالة ضرورية لتنقل القاصرين لا بد ان يكونوا مرفوقين باشخاص ب…,social
1,: السلطات تعمل على الإغلاق الشامل لبويزاكارن البؤرة الجديدة لڤيروس كورونا بالمغرب والأقاليم الجنوبية.,sante
2,"للأسف هادي أنانية ولكن لبعلوك عجبني الرد ديالو ههههه , أميييين شكرا خويا",autre
3,انا لله وانا اليه راجعون. الله يرحمها و يغفر لها,autre


Index(['Twits', 'dialect'], dtype='object')

Unnamed: 0,Twits,dialect
13393,slawiya msikina tb9a f darha 😐😕😕,Morocco
13394,ناس د الرباط أش واقع عندكم تما ... ياكما شي شبه عائلية 😐,Morocco
13395,لي دوا مشاا 😂😂,Morocco
13396,: وزراء تقلقوا,Morocco


9965

Unnamed: 0,Text,Tag
0,: حالة الطوارئ الصحية: وثيقة التنقل لن تسلم للقاصرين، عند وجود حالة ضرورية لتنقل القاصرين لا بد ان يكونوا مرفوقين باشخاص ب…,social
1,: السلطات تعمل على الإغلاق الشامل لبويزاكارن البؤرة الجديدة لڤيروس كورونا بالمغرب والأقاليم الجنوبية.,sante
2,"للأسف هادي أنانية ولكن لبعلوك عجبني الرد ديالو ههههه , أميييين شكرا خويا",autre
3,انا لله وانا اليه راجعون. الله يرحمها و يغفر لها,autre
4,مالنا,autre


6091

Tag
autre         5411
social         346
sante          217
politique       93
sport           12
économique      12
Name: count, dtype: int64

{'autre', 'politique', 'sante', 'social', 'sport', 'économique'}

6

6091

DatasetDict({
    train: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 4872
    })
    test: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 1219
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,1.452,1.028969,0.898277,0.189283
100,0.74,0.500593,0.898277,0.189283
150,0.4724,0.354701,0.898277,0.189283
200,0.3724,0.346108,0.90484,0.224008
250,0.3516,0.315257,0.913864,0.332044
300,0.3021,0.333672,0.908942,0.370651
350,0.2651,0.331183,0.914684,0.384682
400,0.235,0.342069,0.894176,0.287182
450,0.2355,0.358776,0.901559,0.266944
500,0.1985,0.375592,0.894176,0.275286


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,1.4435,1.06225,0.898277,0.189283
100,0.7734,0.511089,0.898277,0.189283
150,0.5441,0.463856,0.898277,0.189283
200,0.4769,0.382015,0.898277,0.189283
250,0.4726,0.355594,0.898277,0.189283
300,0.4147,0.365478,0.898277,0.189283
350,0.4165,0.375122,0.898277,0.189283
400,0.3677,0.334945,0.898277,0.189283
450,0.3968,0.318373,0.916325,0.320642
500,0.3527,0.312062,0.898277,0.189283


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,1.4435,1.06225,0.898277,0.189283
100,0.7734,0.511089,0.898277,0.189283
150,0.5441,0.463856,0.898277,0.189283
200,0.4769,0.382015,0.898277,0.189283
250,0.4726,0.355594,0.898277,0.189283
300,0.4147,0.365478,0.898277,0.189283
350,0.4165,0.375122,0.898277,0.189283
400,0.3677,0.334945,0.898277,0.189283
450,0.3968,0.318373,0.916325,0.320642
500,0.3527,0.312062,0.898277,0.189283


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,1.4488,1.083109,0.884331,0.156436
100,0.7925,0.556945,0.884331,0.156436
150,0.5126,0.499725,0.884331,0.156436
200,0.4665,0.429699,0.884331,0.156436
250,0.3724,0.359431,0.882691,0.1741
300,0.3347,0.363672,0.894176,0.247893
350,0.3125,0.363089,0.906481,0.322683
400,0.2866,0.376338,0.90484,0.316705
450,0.2403,0.392517,0.883511,0.201317
500,0.2297,0.412451,0.890074,0.214463


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,1.4488,1.083109,0.884331,0.156436
100,0.7925,0.556945,0.884331,0.156436
150,0.5126,0.499725,0.884331,0.156436
200,0.4665,0.429699,0.884331,0.156436
250,0.3724,0.359431,0.882691,0.1741
300,0.3347,0.363672,0.894176,0.247893
350,0.3125,0.363089,0.906481,0.322683
400,0.2866,0.376338,0.90484,0.316705
450,0.2403,0.392517,0.883511,0.201317
500,0.2297,0.412451,0.890074,0.214463


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,1.4488,1.083109,0.884331,0.156436
100,0.7925,0.556945,0.884331,0.156436
150,0.5126,0.499725,0.884331,0.156436
200,0.4665,0.429699,0.884331,0.156436
250,0.3724,0.359431,0.882691,0.1741
300,0.3347,0.363672,0.894176,0.247893
350,0.3125,0.363089,0.906481,0.322683
400,0.2866,0.376338,0.90484,0.316705
450,0.2403,0.392517,0.883511,0.201317
500,0.2297,0.412451,0.890074,0.214463


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,1.4689,1.180937,0.886792,0.156667
100,0.8819,0.599113,0.886792,0.156667
150,0.5398,0.503301,0.886792,0.156667
200,0.4886,0.496204,0.886792,0.156667
250,0.4826,0.44814,0.886792,0.156667
300,0.3977,0.366558,0.900738,0.256127
350,0.341,0.333267,0.90402,0.260512
400,0.3073,0.31896,0.917966,0.337257
450,0.2753,0.308355,0.902379,0.29431
500,0.2358,0.340173,0.912223,0.321625


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,1.4689,1.180937,0.886792,0.156667
100,0.8819,0.599113,0.886792,0.156667
150,0.5398,0.503301,0.886792,0.156667
200,0.4886,0.496204,0.886792,0.156667
250,0.4826,0.44814,0.886792,0.156667
300,0.3977,0.366558,0.900738,0.256127
350,0.341,0.333267,0.90402,0.260512
400,0.3073,0.31896,0.917966,0.337257
450,0.2753,0.308355,0.902379,0.29431
500,0.2358,0.340173,0.912223,0.321625


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,1.4689,1.180937,0.886792,0.156667
100,0.8819,0.599113,0.886792,0.156667
150,0.5398,0.503301,0.886792,0.156667
200,0.4886,0.496204,0.886792,0.156667
250,0.4826,0.44814,0.886792,0.156667
300,0.3977,0.366558,0.900738,0.256127
350,0.341,0.333267,0.90402,0.260512
400,0.3073,0.31896,0.917966,0.337257
450,0.2753,0.308355,0.902379,0.29431
500,0.2358,0.340173,0.912223,0.321625


DatasetDict({
    train: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 4872
    })
    test: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 1219
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3929,0.208489,0.925349,0.409819
100,0.22,0.22195,0.920427,0.433376
150,0.1926,0.221866,0.926169,0.418675
200,0.1135,0.313264,0.921247,0.532405
250,0.078,0.312892,0.92781,0.500254
300,0.0506,0.36093,0.92863,0.539744
350,0.0264,0.369709,0.92781,0.401491
400,0.0145,0.436763,0.918786,0.521496
450,0.01,0.440025,0.924528,0.520653
500,0.0038,0.477509,0.917145,0.471431


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3929,0.208489,0.925349,0.409819
100,0.22,0.22195,0.920427,0.433376
150,0.1926,0.221866,0.926169,0.418675
200,0.1135,0.313264,0.921247,0.532405
250,0.078,0.312892,0.92781,0.500254
300,0.0506,0.36093,0.92863,0.539744
350,0.0264,0.369709,0.92781,0.401491
400,0.0145,0.436763,0.918786,0.521496
450,0.01,0.440025,0.924528,0.520653
500,0.0038,0.477509,0.917145,0.471431


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3929,0.208489,0.925349,0.409819
100,0.22,0.22195,0.920427,0.433376
150,0.1926,0.221866,0.926169,0.418675
200,0.1135,0.313264,0.921247,0.532405
250,0.078,0.312892,0.92781,0.500254
300,0.0506,0.36093,0.92863,0.539744
350,0.0264,0.369709,0.92781,0.401491
400,0.0145,0.436763,0.918786,0.521496
450,0.01,0.440025,0.924528,0.520653
500,0.0038,0.477509,0.917145,0.471431


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3801,0.269527,0.908942,0.292528
100,0.2209,0.249,0.922888,0.42616
150,0.1863,0.261915,0.926989,0.415579
200,0.0974,0.295371,0.926989,0.432941
250,0.0768,0.302658,0.931091,0.443688
300,0.0452,0.372874,0.92781,0.540112
350,0.0105,0.392948,0.924528,0.503745
400,0.0199,0.404478,0.926989,0.53876
450,0.0084,0.422026,0.925349,0.512056
500,0.0051,0.407275,0.921247,0.521964


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3801,0.269527,0.908942,0.292528
100,0.2209,0.249,0.922888,0.42616
150,0.1863,0.261915,0.926989,0.415579
200,0.0974,0.295371,0.926989,0.432941
250,0.0768,0.302658,0.931091,0.443688
300,0.0452,0.372874,0.92781,0.540112
350,0.0105,0.392948,0.924528,0.503745
400,0.0199,0.404478,0.926989,0.53876
450,0.0084,0.422026,0.925349,0.512056
500,0.0051,0.407275,0.921247,0.521964


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3801,0.269527,0.908942,0.292528
100,0.2209,0.249,0.922888,0.42616
150,0.1863,0.261915,0.926989,0.415579
200,0.0974,0.295371,0.926989,0.432941
250,0.0768,0.302658,0.931091,0.443688
300,0.0452,0.372874,0.92781,0.540112
350,0.0105,0.392948,0.924528,0.503745
400,0.0199,0.404478,0.926989,0.53876
450,0.0084,0.422026,0.925349,0.512056
500,0.0051,0.407275,0.921247,0.521964


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3593,0.314084,0.90402,0.272265
100,0.2498,0.320104,0.913864,0.338079
150,0.2048,0.238615,0.917966,0.394359
200,0.1162,0.298814,0.913864,0.508373
250,0.0797,0.343433,0.916325,0.495403
300,0.0482,0.33461,0.923708,0.53646
350,0.0259,0.379459,0.908121,0.410841
400,0.0133,0.452809,0.90402,0.497853
450,0.0054,0.411591,0.930271,0.530725
500,0.0017,0.440414,0.918786,0.48741


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3593,0.314084,0.90402,0.272265
100,0.2498,0.320104,0.913864,0.338079
150,0.2048,0.238615,0.917966,0.394359
200,0.1162,0.298814,0.913864,0.508373
250,0.0797,0.343433,0.916325,0.495403
300,0.0482,0.33461,0.923708,0.53646
350,0.0259,0.379459,0.908121,0.410841
400,0.0133,0.452809,0.90402,0.497853
450,0.0054,0.411591,0.930271,0.530725
500,0.0017,0.440414,0.918786,0.48741


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3593,0.314084,0.90402,0.272265
100,0.2498,0.320104,0.913864,0.338079
150,0.2048,0.238615,0.917966,0.394359
200,0.1162,0.298814,0.913864,0.508373
250,0.0797,0.343433,0.916325,0.495403
300,0.0482,0.33461,0.923708,0.53646
350,0.0259,0.379459,0.908121,0.410841
400,0.0133,0.452809,0.90402,0.497853
450,0.0054,0.411591,0.930271,0.530725
500,0.0017,0.440414,0.918786,0.48741


DatasetDict({
    train: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 4872
    })
    test: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 1219
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4237,0.318051,0.898277,0.189283
100,0.3401,0.28154,0.894996,0.278557
150,0.3367,0.268409,0.90402,0.290998
200,0.3085,0.288913,0.906481,0.274018
250,0.3104,0.264992,0.90402,0.379989
300,0.2913,0.254567,0.908942,0.393911
350,0.3032,0.264433,0.908121,0.345351
400,0.2725,0.261467,0.910582,0.388239
450,0.2953,0.264179,0.913043,0.324718
500,0.2508,0.249008,0.910582,0.398813


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4237,0.318051,0.898277,0.189283
100,0.3401,0.28154,0.894996,0.278557
150,0.3367,0.268409,0.90402,0.290998
200,0.3085,0.288913,0.906481,0.274018
250,0.3104,0.264992,0.90402,0.379989
300,0.2913,0.254567,0.908942,0.393911
350,0.3032,0.264433,0.908121,0.345351
400,0.2725,0.261467,0.910582,0.388239
450,0.2953,0.264179,0.913043,0.324718
500,0.2508,0.249008,0.910582,0.398813


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4237,0.318051,0.898277,0.189283
100,0.3401,0.28154,0.894996,0.278557
150,0.3367,0.268409,0.90402,0.290998
200,0.3085,0.288913,0.906481,0.274018
250,0.3104,0.264992,0.90402,0.379989
300,0.2913,0.254567,0.908942,0.393911
350,0.3032,0.264433,0.908121,0.345351
400,0.2725,0.261467,0.910582,0.388239
450,0.2953,0.264179,0.913043,0.324718
500,0.2508,0.249008,0.910582,0.398813


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4372,0.396763,0.884331,0.156436
100,0.3444,0.341917,0.884331,0.16537
150,0.3263,0.330977,0.884331,0.156436
200,0.3372,0.332945,0.885152,0.213303
250,0.3404,0.317804,0.90402,0.324954
300,0.2988,0.308792,0.901559,0.260504
350,0.2822,0.308788,0.902379,0.287517
400,0.2768,0.307075,0.906481,0.336307
450,0.2705,0.312303,0.90402,0.312458
500,0.2713,0.303797,0.899098,0.330578


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4372,0.396763,0.884331,0.156436
100,0.3444,0.341917,0.884331,0.16537
150,0.3263,0.330977,0.884331,0.156436
200,0.3372,0.332945,0.885152,0.213303
250,0.3404,0.317804,0.90402,0.324954
300,0.2988,0.308792,0.901559,0.260504
350,0.2822,0.308788,0.902379,0.287517
400,0.2768,0.307075,0.906481,0.336307
450,0.2705,0.312303,0.90402,0.312458
500,0.2713,0.303797,0.899098,0.330578


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4372,0.396763,0.884331,0.156436
100,0.3444,0.341917,0.884331,0.16537
150,0.3263,0.330977,0.884331,0.156436
200,0.3372,0.332945,0.885152,0.213303
250,0.3404,0.317804,0.90402,0.324954
300,0.2988,0.308792,0.901559,0.260504
350,0.2822,0.308788,0.902379,0.287517
400,0.2768,0.307075,0.906481,0.336307
450,0.2705,0.312303,0.90402,0.312458
500,0.2713,0.303797,0.899098,0.330578


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4432,0.338249,0.886792,0.156667
100,0.345,0.325414,0.895816,0.221319
150,0.3246,0.30657,0.906481,0.299968
200,0.3092,0.308564,0.903199,0.276163
250,0.2988,0.302842,0.902379,0.270491
300,0.2863,0.3306,0.889253,0.316838
350,0.2704,0.336739,0.903199,0.316336
400,0.2509,0.324324,0.893355,0.315393
450,0.2529,0.325087,0.908942,0.317305
500,0.2396,0.353184,0.907301,0.304737


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4432,0.338249,0.886792,0.156667
100,0.345,0.325414,0.895816,0.221319
150,0.3246,0.30657,0.906481,0.299968
200,0.3092,0.308564,0.903199,0.276163
250,0.2988,0.302842,0.902379,0.270491
300,0.2863,0.3306,0.889253,0.316838
350,0.2704,0.336739,0.903199,0.316336
400,0.2509,0.324324,0.893355,0.315393
450,0.2529,0.325087,0.908942,0.317305
500,0.2396,0.353184,0.907301,0.304737


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4432,0.338249,0.886792,0.156667
100,0.345,0.325414,0.895816,0.221319
150,0.3246,0.30657,0.906481,0.299968
200,0.3092,0.308564,0.903199,0.276163
250,0.2988,0.302842,0.902379,0.270491
300,0.2863,0.3306,0.889253,0.316838
350,0.2704,0.336739,0.903199,0.316336
400,0.2509,0.324324,0.893355,0.315393
450,0.2529,0.325087,0.908942,0.317305
500,0.2396,0.353184,0.907301,0.304737


DatasetDict({
    train: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 4872
    })
    test: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 1219
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3851,0.200919,0.933552,0.461619
100,0.2137,0.22729,0.931091,0.47836
150,0.1695,0.222884,0.926169,0.419873
200,0.0767,0.300231,0.92863,0.429074
250,0.0552,0.298003,0.919606,0.420402
300,0.0215,0.398624,0.926989,0.42479
350,0.0139,0.406827,0.92945,0.436016
400,0.0061,0.429357,0.931091,0.440465
450,0.0028,0.435716,0.92781,0.434896
500,0.0018,0.434552,0.930271,0.42691


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3851,0.200919,0.933552,0.461619
100,0.2137,0.22729,0.931091,0.47836
150,0.1695,0.222884,0.926169,0.419873
200,0.0767,0.300231,0.92863,0.429074
250,0.0552,0.298003,0.919606,0.420402
300,0.0215,0.398624,0.926989,0.42479
350,0.0139,0.406827,0.92945,0.436016
400,0.0061,0.429357,0.931091,0.440465
450,0.0028,0.435716,0.92781,0.434896
500,0.0018,0.434552,0.930271,0.42691


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3851,0.200919,0.933552,0.461619
100,0.2137,0.22729,0.931091,0.47836
150,0.1695,0.222884,0.926169,0.419873
200,0.0767,0.300231,0.92863,0.429074
250,0.0552,0.298003,0.919606,0.420402
300,0.0215,0.398624,0.926989,0.42479
350,0.0139,0.406827,0.92945,0.436016
400,0.0061,0.429357,0.931091,0.440465
450,0.0028,0.435716,0.92781,0.434896
500,0.0018,0.434552,0.930271,0.42691


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3659,0.2856,0.90402,0.290628
100,0.2104,0.254726,0.917145,0.383686
150,0.1669,0.26008,0.92863,0.412256
200,0.0854,0.345979,0.914684,0.536757
250,0.0801,0.434695,0.916325,0.401179
300,0.0427,0.460675,0.912223,0.379165
350,0.0187,0.465439,0.917145,0.415181
400,0.0176,0.437645,0.926169,0.599642
450,0.0097,0.480408,0.917145,0.41091
500,0.0036,0.489378,0.921247,0.425914


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3659,0.2856,0.90402,0.290628
100,0.2104,0.254726,0.917145,0.383686
150,0.1669,0.26008,0.92863,0.412256
200,0.0854,0.345979,0.914684,0.536757
250,0.0801,0.434695,0.916325,0.401179
300,0.0427,0.460675,0.912223,0.379165
350,0.0187,0.465439,0.917145,0.415181
400,0.0176,0.437645,0.926169,0.599642
450,0.0097,0.480408,0.917145,0.41091
500,0.0036,0.489378,0.921247,0.425914


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3659,0.2856,0.90402,0.290628
100,0.2104,0.254726,0.917145,0.383686
150,0.1669,0.26008,0.92863,0.412256
200,0.0854,0.345979,0.914684,0.536757
250,0.0801,0.434695,0.916325,0.401179
300,0.0427,0.460675,0.912223,0.379165
350,0.0187,0.465439,0.917145,0.415181
400,0.0176,0.437645,0.926169,0.599642
450,0.0097,0.480408,0.917145,0.41091
500,0.0036,0.489378,0.921247,0.425914


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3524,0.351506,0.90402,0.260122
100,0.2501,0.293682,0.924528,0.382542
150,0.1877,0.252182,0.926169,0.389856
200,0.0975,0.296222,0.922888,0.467797
250,0.0649,0.34997,0.926169,0.498877
300,0.0315,0.332955,0.922888,0.496902
350,0.0108,0.37898,0.92863,0.535904
400,0.0103,0.395692,0.926169,0.564689
450,0.0029,0.453059,0.925349,0.541022
500,0.0015,0.428592,0.926169,0.565605


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3524,0.351506,0.90402,0.260122
100,0.2501,0.293682,0.924528,0.382542
150,0.1877,0.252182,0.926169,0.389856
200,0.0975,0.296222,0.922888,0.467797
250,0.0649,0.34997,0.926169,0.498877
300,0.0315,0.332955,0.922888,0.496902
350,0.0108,0.37898,0.92863,0.535904
400,0.0103,0.395692,0.926169,0.564689
450,0.0029,0.453059,0.925349,0.541022
500,0.0015,0.428592,0.926169,0.565605


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3524,0.351506,0.90402,0.260122
100,0.2501,0.293682,0.924528,0.382542
150,0.1877,0.252182,0.926169,0.389856
200,0.0975,0.296222,0.922888,0.467797
250,0.0649,0.34997,0.926169,0.498877
300,0.0315,0.332955,0.922888,0.496902
350,0.0108,0.37898,0.92863,0.535904
400,0.0103,0.395692,0.926169,0.564689
450,0.0029,0.453059,0.925349,0.541022
500,0.0015,0.428592,0.926169,0.565605


DatasetDict({
    train: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 4872
    })
    test: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 1219
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3882,0.224275,0.920427,0.388315
100,0.217,0.223593,0.931911,0.459194
150,0.1939,0.278205,0.92945,0.449519
200,0.0951,0.330859,0.932732,0.489276
250,0.0551,0.342464,0.918786,0.430867
300,0.0378,0.417089,0.931911,0.416578
350,0.0225,0.403189,0.925349,0.4246
400,0.0145,0.396952,0.92945,0.422603
450,0.0127,0.420418,0.931911,0.444796
500,0.0055,0.41027,0.930271,0.424187


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3882,0.224275,0.920427,0.388315
100,0.217,0.223593,0.931911,0.459194
150,0.1939,0.278205,0.92945,0.449519
200,0.0951,0.330859,0.932732,0.489276
250,0.0551,0.342464,0.918786,0.430867
300,0.0378,0.417089,0.931911,0.416578
350,0.0225,0.403189,0.925349,0.4246
400,0.0145,0.396952,0.92945,0.422603
450,0.0127,0.420418,0.931911,0.444796
500,0.0055,0.41027,0.930271,0.424187


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3882,0.224275,0.920427,0.388315
100,0.217,0.223593,0.931911,0.459194
150,0.1939,0.278205,0.92945,0.449519
200,0.0951,0.330859,0.932732,0.489276
250,0.0551,0.342464,0.918786,0.430867
300,0.0378,0.417089,0.931911,0.416578
350,0.0225,0.403189,0.925349,0.4246
400,0.0145,0.396952,0.92945,0.422603
450,0.0127,0.420418,0.931911,0.444796
500,0.0055,0.41027,0.930271,0.424187


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3714,0.29797,0.912223,0.309759
100,0.2304,0.263252,0.922067,0.37268
150,0.1851,0.283346,0.922888,0.379124
200,0.1083,0.296465,0.916325,0.435736
250,0.0743,0.32789,0.920427,0.451936
300,0.0405,0.481948,0.917966,0.390981
350,0.0235,0.391353,0.926169,0.43949
400,0.0134,0.387128,0.92863,0.468118
450,0.0084,0.424875,0.925349,0.446147
500,0.0034,0.457509,0.92781,0.418368


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3714,0.29797,0.912223,0.309759
100,0.2304,0.263252,0.922067,0.37268
150,0.1851,0.283346,0.922888,0.379124
200,0.1083,0.296465,0.916325,0.435736
250,0.0743,0.32789,0.920427,0.451936
300,0.0405,0.481948,0.917966,0.390981
350,0.0235,0.391353,0.926169,0.43949
400,0.0134,0.387128,0.92863,0.468118
450,0.0084,0.424875,0.925349,0.446147
500,0.0034,0.457509,0.92781,0.418368


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3714,0.29797,0.912223,0.309759
100,0.2304,0.263252,0.922067,0.37268
150,0.1851,0.283346,0.922888,0.379124
200,0.1083,0.296465,0.916325,0.435736
250,0.0743,0.32789,0.920427,0.451936
300,0.0405,0.481948,0.917966,0.390981
350,0.0235,0.391353,0.926169,0.43949
400,0.0134,0.387128,0.92863,0.468118
450,0.0084,0.424875,0.925349,0.446147
500,0.0034,0.457509,0.92781,0.418368


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3586,0.366475,0.899918,0.246861
100,0.2428,0.322837,0.922888,0.347337
150,0.1925,0.258813,0.918786,0.398076
200,0.0989,0.381548,0.922888,0.423934
250,0.0854,0.349352,0.913864,0.403086
300,0.041,0.34898,0.924528,0.427678
350,0.0252,0.424925,0.924528,0.409086
400,0.0195,0.42096,0.92781,0.42656
450,0.0104,0.479778,0.923708,0.412039
500,0.0106,0.485664,0.922888,0.446543


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3586,0.366475,0.899918,0.246861
100,0.2428,0.322837,0.922888,0.347337
150,0.1925,0.258813,0.918786,0.398076
200,0.0989,0.381548,0.922888,0.423934
250,0.0854,0.349352,0.913864,0.403086
300,0.041,0.34898,0.924528,0.427678
350,0.0252,0.424925,0.924528,0.409086
400,0.0195,0.42096,0.92781,0.42656
450,0.0104,0.479778,0.923708,0.412039
500,0.0106,0.485664,0.922888,0.446543


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3586,0.366475,0.899918,0.246861
100,0.2428,0.322837,0.922888,0.347337
150,0.1925,0.258813,0.918786,0.398076
200,0.0989,0.381548,0.922888,0.423934
250,0.0854,0.349352,0.913864,0.403086
300,0.041,0.34898,0.924528,0.427678
350,0.0252,0.424925,0.924528,0.409086
400,0.0195,0.42096,0.92781,0.42656
450,0.0104,0.479778,0.923708,0.412039
500,0.0106,0.485664,0.922888,0.446543


DatasetDict({
    train: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 4872
    })
    test: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 1219
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4648,0.26986,0.90402,0.24818
100,0.2505,0.225219,0.919606,0.422375
150,0.2296,0.223425,0.925349,0.426298
200,0.1345,0.313974,0.922888,0.407866
250,0.1219,0.290104,0.926169,0.488694
300,0.0598,0.360112,0.926989,0.487135
350,0.0466,0.397208,0.922067,0.381375
400,0.023,0.440896,0.922067,0.397351
450,0.0137,0.44651,0.924528,0.421212
500,0.0043,0.437996,0.923708,0.410518


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4648,0.26986,0.90402,0.24818
100,0.2505,0.225219,0.919606,0.422375
150,0.2296,0.223425,0.925349,0.426298
200,0.1345,0.313974,0.922888,0.407866
250,0.1219,0.290104,0.926169,0.488694
300,0.0598,0.360112,0.926989,0.487135
350,0.0466,0.397208,0.922067,0.381375
400,0.023,0.440896,0.922067,0.397351
450,0.0137,0.44651,0.924528,0.421212
500,0.0043,0.437996,0.923708,0.410518


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4648,0.26986,0.90402,0.24818
100,0.2505,0.225219,0.919606,0.422375
150,0.2296,0.223425,0.925349,0.426298
200,0.1345,0.313974,0.922888,0.407866
250,0.1219,0.290104,0.926169,0.488694
300,0.0598,0.360112,0.926989,0.487135
350,0.0466,0.397208,0.922067,0.381375
400,0.023,0.440896,0.922067,0.397351
450,0.0137,0.44651,0.924528,0.421212
500,0.0043,0.437996,0.923708,0.410518


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4228,0.300765,0.908121,0.288336
100,0.248,0.283611,0.909762,0.360737
150,0.2121,0.278108,0.919606,0.359297
200,0.1393,0.422324,0.876128,0.366336
250,0.1194,0.358313,0.913864,0.398871
300,0.0754,0.390263,0.915505,0.498508
350,0.0409,0.399375,0.922888,0.552395
400,0.0348,0.432917,0.914684,0.49808
450,0.0174,0.450463,0.917145,0.555557
500,0.0089,0.450301,0.921247,0.51113


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4228,0.300765,0.908121,0.288336
100,0.248,0.283611,0.909762,0.360737
150,0.2121,0.278108,0.919606,0.359297
200,0.1393,0.422324,0.876128,0.366336
250,0.1194,0.358313,0.913864,0.398871
300,0.0754,0.390263,0.915505,0.498508
350,0.0409,0.399375,0.922888,0.552395
400,0.0348,0.432917,0.914684,0.49808
450,0.0174,0.450463,0.917145,0.555557
500,0.0089,0.450301,0.921247,0.51113


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4228,0.300765,0.908121,0.288336
100,0.248,0.283611,0.909762,0.360737
150,0.2121,0.278108,0.919606,0.359297
200,0.1393,0.422324,0.876128,0.366336
250,0.1194,0.358313,0.913864,0.398871
300,0.0754,0.390263,0.915505,0.498508
350,0.0409,0.399375,0.922888,0.552395
400,0.0348,0.432917,0.914684,0.49808
450,0.0174,0.450463,0.917145,0.555557
500,0.0089,0.450301,0.921247,0.51113


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3976,0.329307,0.901559,0.255608
100,0.2432,0.312319,0.921247,0.352948
150,0.2101,0.304582,0.922888,0.350478
200,0.1321,0.352185,0.913864,0.356594
250,0.0926,0.372322,0.910582,0.344839
300,0.0653,0.434745,0.920427,0.395227
350,0.0298,0.439273,0.914684,0.390277
400,0.0259,0.45641,0.911403,0.378158
450,0.0088,0.496368,0.915505,0.407204
500,0.0083,0.497617,0.912223,0.422266


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3976,0.329307,0.901559,0.255608
100,0.2432,0.312319,0.921247,0.352948
150,0.2101,0.304582,0.922888,0.350478
200,0.1321,0.352185,0.913864,0.356594
250,0.0926,0.372322,0.910582,0.344839
300,0.0653,0.434745,0.920427,0.395227
350,0.0298,0.439273,0.914684,0.390277
400,0.0259,0.45641,0.911403,0.378158
450,0.0088,0.496368,0.915505,0.407204
500,0.0083,0.497617,0.912223,0.422266


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3976,0.329307,0.901559,0.255608
100,0.2432,0.312319,0.921247,0.352948
150,0.2101,0.304582,0.922888,0.350478
200,0.1321,0.352185,0.913864,0.356594
250,0.0926,0.372322,0.910582,0.344839
300,0.0653,0.434745,0.920427,0.395227
350,0.0298,0.439273,0.914684,0.390277
400,0.0259,0.45641,0.911403,0.378158
450,0.0088,0.496368,0.915505,0.407204
500,0.0083,0.497617,0.912223,0.422266


DatasetDict({
    train: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 4872
    })
    test: Dataset({
        features: ['Text', 'label', '__index_level_0__'],
        num_rows: 1219
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4163,0.251025,0.913043,0.314631
100,0.2489,0.226937,0.918786,0.449525
150,0.2121,0.258299,0.922067,0.465399
200,0.1244,0.277201,0.926989,0.481524
250,0.0746,0.342715,0.919606,0.415255
300,0.0435,0.381168,0.920427,0.361955
350,0.0269,0.390646,0.921247,0.416047
400,0.0154,0.426804,0.914684,0.415627
450,0.0072,0.488062,0.922888,0.476668
500,0.0021,0.452337,0.920427,0.397153


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4163,0.251025,0.913043,0.314631
100,0.2489,0.226937,0.918786,0.449525
150,0.2121,0.258299,0.922067,0.465399
200,0.1244,0.277201,0.926989,0.481524
250,0.0746,0.342715,0.919606,0.415255
300,0.0435,0.381168,0.920427,0.361955
350,0.0269,0.390646,0.921247,0.416047
400,0.0154,0.426804,0.914684,0.415627
450,0.0072,0.488062,0.922888,0.476668
500,0.0021,0.452337,0.920427,0.397153


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.4163,0.251025,0.913043,0.314631
100,0.2489,0.226937,0.918786,0.449525
150,0.2121,0.258299,0.922067,0.465399
200,0.1244,0.277201,0.926989,0.481524
250,0.0746,0.342715,0.919606,0.415255
300,0.0435,0.381168,0.920427,0.361955
350,0.0269,0.390646,0.921247,0.416047
400,0.0154,0.426804,0.914684,0.415627
450,0.0072,0.488062,0.922888,0.476668
500,0.0021,0.452337,0.920427,0.397153


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3884,0.27416,0.912223,0.319811
100,0.2295,0.261748,0.922067,0.389062
150,0.1872,0.324861,0.914684,0.348795
200,0.1066,0.304713,0.917966,0.577857
250,0.0867,0.355132,0.90484,0.506596
300,0.0443,0.462311,0.912223,0.398172
350,0.0229,0.447051,0.917966,0.418683
400,0.0183,0.390288,0.922067,0.517871
450,0.004,0.441436,0.916325,0.522039
500,0.0024,0.474859,0.920427,0.530838


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3884,0.27416,0.912223,0.319811
100,0.2295,0.261748,0.922067,0.389062
150,0.1872,0.324861,0.914684,0.348795
200,0.1066,0.304713,0.917966,0.577857
250,0.0867,0.355132,0.90484,0.506596
300,0.0443,0.462311,0.912223,0.398172
350,0.0229,0.447051,0.917966,0.418683
400,0.0183,0.390288,0.922067,0.517871
450,0.004,0.441436,0.916325,0.522039
500,0.0024,0.474859,0.920427,0.530838


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3884,0.27416,0.912223,0.319811
100,0.2295,0.261748,0.922067,0.389062
150,0.1872,0.324861,0.914684,0.348795
200,0.1066,0.304713,0.917966,0.577857
250,0.0867,0.355132,0.90484,0.506596
300,0.0443,0.462311,0.912223,0.398172
350,0.0229,0.447051,0.917966,0.418683
400,0.0183,0.390288,0.922067,0.517871
450,0.004,0.441436,0.916325,0.522039
500,0.0024,0.474859,0.920427,0.530838


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3706,0.308925,0.903199,0.26194
100,0.2421,0.329249,0.917966,0.347258
150,0.2019,0.282367,0.917966,0.383871
200,0.1017,0.342012,0.916325,0.388643
250,0.0857,0.375341,0.916325,0.391318
300,0.037,0.414324,0.917145,0.501705
350,0.0129,0.488669,0.90484,0.353331
400,0.0111,0.469964,0.915505,0.406796
450,0.0081,0.476869,0.922888,0.424318
500,0.0026,0.499405,0.922067,0.412261


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3706,0.308925,0.903199,0.26194
100,0.2421,0.329249,0.917966,0.347258
150,0.2019,0.282367,0.917966,0.383871
200,0.1017,0.342012,0.916325,0.388643
250,0.0857,0.375341,0.916325,0.391318
300,0.037,0.414324,0.917145,0.501705
350,0.0129,0.488669,0.90484,0.353331
400,0.0111,0.469964,0.915505,0.406796
450,0.0081,0.476869,0.922888,0.424318
500,0.0026,0.499405,0.922067,0.412261


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3706,0.308925,0.903199,0.26194
100,0.2421,0.329249,0.917966,0.347258
150,0.2019,0.282367,0.917966,0.383871
200,0.1017,0.342012,0.916325,0.388643
250,0.0857,0.375341,0.916325,0.391318
300,0.037,0.414324,0.917145,0.501705
350,0.0129,0.488669,0.90484,0.353331
400,0.0111,0.469964,0.915505,0.406796
450,0.0081,0.476869,0.922888,0.424318
500,0.0026,0.499405,0.922067,0.412261


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.932732,0.489276
3,alger-ia/dziribert,0.92863,0.652564
6,faisalq/EgyBERT,0.916325,0.424782
7,faisalq/SaudiBERT,0.922888,0.573169
10,otmangi/MorRoBERTa,0.917145,0.55896
13,otmangi/MorrBERT,0.917966,0.577857
16,tunis-ai/TunBERT,0.923708,0.403577
