In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'MSA_MDA_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/MSA_MDA_comments.csv', encoding='utf-8', engine='python', sep='\t') #, quotechar="'"  , quoting=3

display(len(df))
df = df[df['language'] == 'mda']
      
display(df.columns)
display(df[:4])


display(len(df))

c = df['sentiment'].value_counts()
display(c)

classes = set(df['sentiment'].values)
display(classes)

df['sentiment'] = df['sentiment'].astype('category')
df['label'] = df['sentiment'].cat.codes

df = df[['text', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))


max_sequence_length = 128



models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]


seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
            
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['text'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True)
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 15
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 25, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 25
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-18 19:05:34.741245: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-18 19:05:34.767682: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


9901

Index(['text', 'language', 'sentiment'], dtype='object')

Unnamed: 0,text,language,sentiment
6359,اجي هاك الطاجين راك فزت به سربي ما حدو سخون. بالصحة والعافية.,mda,positive
6360,اخصنى انديروها ابنات ميدالت فالصيف فالمو صمورة متافقين ويشجعون ولداتنا تخيلوها هههههههه,mda,positive
6361,الفلاح هوا العمود الفقري ديال المغرب تبرك الله عليك الله يعونك,mda,positive
6362,كاين الخير والأرض خضارت والفلاح نشط مع هاد الأمطار غير كاين سمر والبرد مع الصباح حيت ليالي قريبة والحمد القوي العزيز,mda,positive


3542

sentiment
negative    2858
positive     684
Name: count, dtype: int64

{'negative', 'positive'}

2

3542

DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 2833
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 709
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.6052,0.538191,0.791255,0.441732
50,0.471,0.432331,0.791255,0.441732
75,0.3618,0.325559,0.895628,0.833543
100,0.2814,0.274424,0.913963,0.863161
125,0.2132,0.282756,0.911142,0.857104
150,0.1674,0.295621,0.888575,0.842322
175,0.1906,0.280131,0.911142,0.863103
200,0.1484,0.339056,0.87024,0.822034
225,0.1421,0.306306,0.888575,0.842322
250,0.1046,0.294955,0.908322,0.854979


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.6075,0.543277,0.791255,0.441732
50,0.4744,0.419942,0.791255,0.441732
75,0.3678,0.339897,0.857546,0.704804
100,0.2991,0.322764,0.863188,0.816369
125,0.219,0.274453,0.901269,0.855508
150,0.1671,0.28848,0.897038,0.848964
175,0.1895,0.296384,0.899859,0.849546
200,0.1229,0.340458,0.887165,0.83998
225,0.1235,0.290119,0.912553,0.858979
250,0.098,0.313028,0.892807,0.847316


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.6075,0.543277,0.791255,0.441732
50,0.4744,0.419942,0.791255,0.441732
75,0.3678,0.339897,0.857546,0.704804
100,0.2991,0.322764,0.863188,0.816369
125,0.219,0.274453,0.901269,0.855508
150,0.1671,0.28848,0.897038,0.848964
175,0.1895,0.296384,0.899859,0.849546
200,0.1229,0.340458,0.887165,0.83998
225,0.1235,0.290119,0.912553,0.858979
250,0.098,0.313028,0.892807,0.847316


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.6165,0.533799,0.803949,0.445661
50,0.4813,0.469645,0.803949,0.445661
75,0.4641,0.442207,0.803949,0.445661
100,0.4218,0.388369,0.803949,0.445661
125,0.3796,0.391619,0.803949,0.452668
150,0.3674,0.352525,0.885755,0.777962
175,0.302,0.315889,0.90409,0.831695
200,0.2506,0.3186,0.882934,0.813809
225,0.253,0.340805,0.87024,0.811837
250,0.2287,0.364316,0.858956,0.80498


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.6165,0.533799,0.803949,0.445661
50,0.4813,0.469645,0.803949,0.445661
75,0.4641,0.442207,0.803949,0.445661
100,0.4218,0.388369,0.803949,0.445661
125,0.3796,0.391619,0.803949,0.452668
150,0.3674,0.352525,0.885755,0.777962
175,0.302,0.315889,0.90409,0.831695
200,0.2506,0.3186,0.882934,0.813809
225,0.253,0.340805,0.87024,0.811837
250,0.2287,0.364316,0.858956,0.80498


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.6165,0.533799,0.803949,0.445661
50,0.4813,0.469645,0.803949,0.445661
75,0.4641,0.442207,0.803949,0.445661
100,0.4218,0.388369,0.803949,0.445661
125,0.3796,0.391619,0.803949,0.452668
150,0.3674,0.352525,0.885755,0.777962
175,0.302,0.315889,0.90409,0.831695
200,0.2506,0.3186,0.882934,0.813809
225,0.253,0.340805,0.87024,0.811837
250,0.2287,0.364316,0.858956,0.80498


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.6118,0.527392,0.812412,0.448249
50,0.4748,0.411294,0.812412,0.448249
75,0.3695,0.328091,0.877292,0.734447
100,0.2877,0.299763,0.913963,0.835293
125,0.2292,0.278529,0.901269,0.838968
150,0.1959,0.255109,0.923836,0.865471
175,0.1503,0.249641,0.925247,0.874819
200,0.1286,0.273066,0.916784,0.849531
225,0.1099,0.261265,0.923836,0.872073
250,0.0982,0.284544,0.913963,0.860863


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.6118,0.527392,0.812412,0.448249
50,0.4748,0.411294,0.812412,0.448249
75,0.3695,0.328091,0.877292,0.734447
100,0.2877,0.299763,0.913963,0.835293
125,0.2292,0.278529,0.901269,0.838968
150,0.1959,0.255109,0.923836,0.865471
175,0.1503,0.249641,0.925247,0.874819
200,0.1286,0.273066,0.916784,0.849531
225,0.1099,0.261265,0.923836,0.872073
250,0.0982,0.284544,0.913963,0.860863


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.6118,0.527392,0.812412,0.448249
50,0.4748,0.411294,0.812412,0.448249
75,0.3695,0.328091,0.877292,0.734447
100,0.2877,0.299763,0.913963,0.835293
125,0.2292,0.278529,0.901269,0.838968
150,0.1959,0.255109,0.923836,0.865471
175,0.1503,0.249641,0.925247,0.874819
200,0.1286,0.273066,0.916784,0.849531
225,0.1099,0.261265,0.923836,0.872073
250,0.0982,0.284544,0.913963,0.860863


DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 2833
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 709
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3243,0.264752,0.891396,0.842873
50,0.2239,0.268159,0.911142,0.857895
75,0.1326,0.247118,0.905501,0.85661
100,0.0832,0.419,0.901269,0.824455
125,0.0613,0.492869,0.868829,0.823941
150,0.0454,0.389023,0.912553,0.861296
175,0.0265,0.444023,0.912553,0.867641
200,0.0186,0.426611,0.909732,0.865366
225,0.0086,0.55766,0.895628,0.85262
250,0.0044,0.531647,0.912553,0.864223


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3243,0.264752,0.891396,0.842873
50,0.2239,0.268159,0.911142,0.857895
75,0.1326,0.247118,0.905501,0.85661
100,0.0832,0.419,0.901269,0.824455
125,0.0613,0.492869,0.868829,0.823941
150,0.0454,0.389023,0.912553,0.861296
175,0.0265,0.444023,0.912553,0.867641
200,0.0186,0.426611,0.909732,0.865366
225,0.0086,0.55766,0.895628,0.85262
250,0.0044,0.531647,0.912553,0.864223


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3243,0.264752,0.891396,0.842873
50,0.2239,0.268159,0.911142,0.857895
75,0.1326,0.247118,0.905501,0.85661
100,0.0832,0.419,0.901269,0.824455
125,0.0613,0.492869,0.868829,0.823941
150,0.0454,0.389023,0.912553,0.861296
175,0.0265,0.444023,0.912553,0.867641
200,0.0186,0.426611,0.909732,0.865366
225,0.0086,0.55766,0.895628,0.85262
250,0.0044,0.531647,0.912553,0.864223


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3412,0.289561,0.892807,0.839344
50,0.2496,0.242334,0.925247,0.87251
75,0.1659,0.244528,0.916784,0.856291
100,0.1327,0.268275,0.912553,0.861296
125,0.0509,0.468764,0.913963,0.846516
150,0.0645,0.366726,0.90409,0.854105
175,0.033,0.470076,0.909732,0.847461
200,0.0254,0.451247,0.913963,0.863161
225,0.0088,0.504997,0.915374,0.856117
250,0.0095,0.52781,0.909732,0.853607


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3412,0.289561,0.892807,0.839344
50,0.2496,0.242334,0.925247,0.87251
75,0.1659,0.244528,0.916784,0.856291
100,0.1327,0.268275,0.912553,0.861296
125,0.0509,0.468764,0.913963,0.846516
150,0.0645,0.366726,0.90409,0.854105
175,0.033,0.470076,0.909732,0.847461
200,0.0254,0.451247,0.913963,0.863161
225,0.0088,0.504997,0.915374,0.856117
250,0.0095,0.52781,0.909732,0.853607


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3412,0.289561,0.892807,0.839344
50,0.2496,0.242334,0.925247,0.87251
75,0.1659,0.244528,0.916784,0.856291
100,0.1327,0.268275,0.912553,0.861296
125,0.0509,0.468764,0.913963,0.846516
150,0.0645,0.366726,0.90409,0.854105
175,0.033,0.470076,0.909732,0.847461
200,0.0254,0.451247,0.913963,0.863161
225,0.0088,0.504997,0.915374,0.856117
250,0.0095,0.52781,0.909732,0.853607


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3554,0.23518,0.919605,0.853632
50,0.2165,0.245835,0.913963,0.859269
75,0.1486,0.240499,0.93512,0.889005
100,0.1137,0.243021,0.916784,0.867647
125,0.0528,0.265302,0.937941,0.893164
150,0.0642,0.348235,0.921016,0.865709
175,0.0335,0.387733,0.918195,0.843975
200,0.0122,0.390925,0.928068,0.884323
225,0.0147,0.430976,0.922426,0.857785
250,0.0042,0.442077,0.925247,0.874819


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3554,0.23518,0.919605,0.853632
50,0.2165,0.245835,0.913963,0.859269
75,0.1486,0.240499,0.93512,0.889005
100,0.1137,0.243021,0.916784,0.867647
125,0.0528,0.265302,0.937941,0.893164
150,0.0642,0.348235,0.921016,0.865709
175,0.0335,0.387733,0.918195,0.843975
200,0.0122,0.390925,0.928068,0.884323
225,0.0147,0.430976,0.922426,0.857785
250,0.0042,0.442077,0.925247,0.874819


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3554,0.23518,0.919605,0.853632
50,0.2165,0.245835,0.913963,0.859269
75,0.1486,0.240499,0.93512,0.889005
100,0.1137,0.243021,0.916784,0.867647
125,0.0528,0.265302,0.937941,0.893164
150,0.0642,0.348235,0.921016,0.865709
175,0.0335,0.387733,0.918195,0.843975
200,0.0122,0.390925,0.928068,0.884323
225,0.0147,0.430976,0.922426,0.857785
250,0.0042,0.442077,0.925247,0.874819


DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 2833
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 709
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.5267,0.538749,0.791255,0.441732
50,0.4925,0.51243,0.791255,0.441732
75,0.4917,0.518177,0.791255,0.441732
100,0.4929,0.516876,0.791255,0.441732
125,0.4923,0.517181,0.791255,0.441732
150,0.4904,0.515634,0.791255,0.441732
175,0.5178,0.522408,0.791255,0.441732
200,0.4802,0.51056,0.791255,0.441732
225,0.4759,0.50294,0.791255,0.441732
250,0.4847,0.59145,0.791255,0.441732


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.5267,0.538749,0.791255,0.441732
50,0.4925,0.51243,0.791255,0.441732
75,0.4917,0.518177,0.791255,0.441732
100,0.4929,0.516876,0.791255,0.441732
125,0.4923,0.517181,0.791255,0.441732
150,0.4904,0.515634,0.791255,0.441732
175,0.5178,0.522408,0.791255,0.441732
200,0.4802,0.51056,0.791255,0.441732
225,0.4759,0.50294,0.791255,0.441732
250,0.4847,0.59145,0.791255,0.441732


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.5267,0.538749,0.791255,0.441732
50,0.4925,0.51243,0.791255,0.441732
75,0.4917,0.518177,0.791255,0.441732
100,0.4929,0.516876,0.791255,0.441732
125,0.4923,0.517181,0.791255,0.441732
150,0.4904,0.515634,0.791255,0.441732
175,0.5178,0.522408,0.791255,0.441732
200,0.4802,0.51056,0.791255,0.441732
225,0.4759,0.50294,0.791255,0.441732
250,0.4847,0.59145,0.791255,0.441732


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.5613,0.502402,0.803949,0.445661
50,0.4897,0.506331,0.803949,0.445661
75,0.4853,0.500205,0.803949,0.445661
100,0.5168,0.495936,0.803949,0.445661
125,0.5006,0.499233,0.803949,0.445661
150,0.5061,0.497444,0.803949,0.445661
175,0.4969,0.503894,0.803949,0.445661
200,0.504,0.502282,0.803949,0.445661
225,0.4839,0.500248,0.803949,0.445661
250,0.5008,0.505779,0.803949,0.445661


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.5613,0.502402,0.803949,0.445661
50,0.4897,0.506331,0.803949,0.445661
75,0.4853,0.500205,0.803949,0.445661
100,0.5168,0.495936,0.803949,0.445661
125,0.5006,0.499233,0.803949,0.445661
150,0.5061,0.497444,0.803949,0.445661
175,0.4969,0.503894,0.803949,0.445661
200,0.504,0.502282,0.803949,0.445661
225,0.4839,0.500248,0.803949,0.445661
250,0.5008,0.505779,0.803949,0.445661


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.5613,0.502402,0.803949,0.445661
50,0.4897,0.506331,0.803949,0.445661
75,0.4853,0.500205,0.803949,0.445661
100,0.5168,0.495936,0.803949,0.445661
125,0.5006,0.499233,0.803949,0.445661
150,0.5061,0.497444,0.803949,0.445661
175,0.4969,0.503894,0.803949,0.445661
200,0.504,0.502282,0.803949,0.445661
225,0.4839,0.500248,0.803949,0.445661
250,0.5008,0.505779,0.803949,0.445661


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.5572,0.491146,0.812412,0.448249
50,0.5042,0.494648,0.812412,0.448249
75,0.5003,0.484169,0.812412,0.448249
100,0.4888,0.482617,0.812412,0.448249
125,0.5063,0.495196,0.812412,0.448249
150,0.512,0.499273,0.812412,0.448249
175,0.51,0.478257,0.812412,0.448249
200,0.4739,0.45926,0.815233,0.568777
225,0.4594,0.455531,0.816643,0.590268
250,0.465,0.443573,0.826516,0.595111


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.5572,0.491146,0.812412,0.448249
50,0.5042,0.494648,0.812412,0.448249
75,0.5003,0.484169,0.812412,0.448249
100,0.4888,0.482617,0.812412,0.448249
125,0.5063,0.495196,0.812412,0.448249
150,0.512,0.499273,0.812412,0.448249
175,0.51,0.478257,0.812412,0.448249
200,0.4739,0.45926,0.815233,0.568777
225,0.4594,0.455531,0.816643,0.590268
250,0.465,0.443573,0.826516,0.595111


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.5572,0.491146,0.812412,0.448249
50,0.5042,0.494648,0.812412,0.448249
75,0.5003,0.484169,0.812412,0.448249
100,0.4888,0.482617,0.812412,0.448249
125,0.5063,0.495196,0.812412,0.448249
150,0.512,0.499273,0.812412,0.448249
175,0.51,0.478257,0.812412,0.448249
200,0.4739,0.45926,0.815233,0.568777
225,0.4594,0.455531,0.816643,0.590268
250,0.465,0.443573,0.826516,0.595111


DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 2833
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 709
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3143,0.225533,0.921016,0.868128
50,0.2233,0.252547,0.912553,0.849459
75,0.105,0.279978,0.90409,0.860284
100,0.0586,0.402025,0.911142,0.84557
125,0.0491,0.447342,0.894217,0.848323
150,0.0287,0.431107,0.909732,0.848383
175,0.0257,0.380339,0.923836,0.878531
200,0.003,0.479716,0.915374,0.870617
225,0.0078,0.492151,0.923836,0.880488
250,0.0064,0.522617,0.909732,0.862687


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3143,0.225533,0.921016,0.868128
50,0.2233,0.252547,0.912553,0.849459
75,0.105,0.279978,0.90409,0.860284
100,0.0586,0.402025,0.911142,0.84557
125,0.0491,0.447342,0.894217,0.848323
150,0.0287,0.431107,0.909732,0.848383
175,0.0257,0.380339,0.923836,0.878531
200,0.003,0.479716,0.915374,0.870617
225,0.0078,0.492151,0.923836,0.880488
250,0.0064,0.522617,0.909732,0.862687


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3143,0.225533,0.921016,0.868128
50,0.2233,0.252547,0.912553,0.849459
75,0.105,0.279978,0.90409,0.860284
100,0.0586,0.402025,0.911142,0.84557
125,0.0491,0.447342,0.894217,0.848323
150,0.0287,0.431107,0.909732,0.848383
175,0.0257,0.380339,0.923836,0.878531
200,0.003,0.479716,0.915374,0.870617
225,0.0078,0.492151,0.923836,0.880488
250,0.0064,0.522617,0.909732,0.862687


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3416,0.262525,0.916784,0.845275
50,0.2027,0.234019,0.921016,0.868128
75,0.1124,0.270122,0.929478,0.877826
100,0.0833,0.288234,0.922426,0.874548
125,0.0294,0.368011,0.919605,0.872134
150,0.0139,0.432166,0.916784,0.867647
175,0.0046,0.476278,0.922426,0.873111
200,0.0059,0.522012,0.918195,0.868811
225,0.0129,0.534893,0.909732,0.856822
250,0.0014,0.611537,0.912553,0.842417


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3416,0.262525,0.916784,0.845275
50,0.2027,0.234019,0.921016,0.868128
75,0.1124,0.270122,0.929478,0.877826
100,0.0833,0.288234,0.922426,0.874548
125,0.0294,0.368011,0.919605,0.872134
150,0.0139,0.432166,0.916784,0.867647
175,0.0046,0.476278,0.922426,0.873111
200,0.0059,0.522012,0.918195,0.868811
225,0.0129,0.534893,0.909732,0.856822
250,0.0014,0.611537,0.912553,0.842417


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3416,0.262525,0.916784,0.845275
50,0.2027,0.234019,0.921016,0.868128
75,0.1124,0.270122,0.929478,0.877826
100,0.0833,0.288234,0.922426,0.874548
125,0.0294,0.368011,0.919605,0.872134
150,0.0139,0.432166,0.916784,0.867647
175,0.0046,0.476278,0.922426,0.873111
200,0.0059,0.522012,0.918195,0.868811
225,0.0129,0.534893,0.909732,0.856822
250,0.0014,0.611537,0.912553,0.842417


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3058,0.255761,0.915374,0.840946
50,0.2244,0.244889,0.918195,0.848437
75,0.0965,0.371658,0.912553,0.833214
100,0.13,0.256416,0.928068,0.871677
125,0.0389,0.309382,0.922426,0.86518
150,0.0193,0.40403,0.915374,0.861176
175,0.0186,0.456736,0.912553,0.850398
200,0.0071,0.452028,0.918195,0.864226
225,0.0026,0.475223,0.922426,0.873836
250,0.0115,0.55289,0.894217,0.83786


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3058,0.255761,0.915374,0.840946
50,0.2244,0.244889,0.918195,0.848437
75,0.0965,0.371658,0.912553,0.833214
100,0.13,0.256416,0.928068,0.871677
125,0.0389,0.309382,0.922426,0.86518
150,0.0193,0.40403,0.915374,0.861176
175,0.0186,0.456736,0.912553,0.850398
200,0.0071,0.452028,0.918195,0.864226
225,0.0026,0.475223,0.922426,0.873836
250,0.0115,0.55289,0.894217,0.83786


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3058,0.255761,0.915374,0.840946
50,0.2244,0.244889,0.918195,0.848437
75,0.0965,0.371658,0.912553,0.833214
100,0.13,0.256416,0.928068,0.871677
125,0.0389,0.309382,0.922426,0.86518
150,0.0193,0.40403,0.915374,0.861176
175,0.0186,0.456736,0.912553,0.850398
200,0.0071,0.452028,0.918195,0.864226
225,0.0026,0.475223,0.922426,0.873836
250,0.0115,0.55289,0.894217,0.83786


DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 2833
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 709
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3224,0.352323,0.892807,0.799979
50,0.2282,0.298733,0.912553,0.84554
75,0.1421,0.224632,0.923836,0.878531
100,0.0855,0.431966,0.90409,0.830593
125,0.0629,0.306177,0.915374,0.861973
150,0.0377,0.366399,0.916784,0.862292
175,0.0254,0.392768,0.922426,0.873111
200,0.0218,0.407141,0.911142,0.870854
225,0.0107,0.422975,0.912553,0.852228
250,0.0041,0.439628,0.922426,0.874548


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3224,0.352323,0.892807,0.799979
50,0.2282,0.298733,0.912553,0.84554
75,0.1421,0.224632,0.923836,0.878531
100,0.0855,0.431966,0.90409,0.830593
125,0.0629,0.306177,0.915374,0.861973
150,0.0377,0.366399,0.916784,0.862292
175,0.0254,0.392768,0.922426,0.873111
200,0.0218,0.407141,0.911142,0.870854
225,0.0107,0.422975,0.912553,0.852228
250,0.0041,0.439628,0.922426,0.874548


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3224,0.352323,0.892807,0.799979
50,0.2282,0.298733,0.912553,0.84554
75,0.1421,0.224632,0.923836,0.878531
100,0.0855,0.431966,0.90409,0.830593
125,0.0629,0.306177,0.915374,0.861973
150,0.0377,0.366399,0.916784,0.862292
175,0.0254,0.392768,0.922426,0.873111
200,0.0218,0.407141,0.911142,0.870854
225,0.0107,0.422975,0.912553,0.852228
250,0.0041,0.439628,0.922426,0.874548


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3332,0.254649,0.901269,0.839883
50,0.2438,0.243322,0.921016,0.857667
75,0.1383,0.236606,0.928068,0.87336
100,0.0978,0.322583,0.911142,0.861671
125,0.0451,0.368465,0.922426,0.873836
150,0.0424,0.333888,0.929478,0.881549
175,0.0264,0.403322,0.929478,0.881549
200,0.0118,0.39161,0.926657,0.872939
225,0.0035,0.441458,0.926657,0.880376
250,0.0026,0.47792,0.932299,0.884893


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3332,0.254649,0.901269,0.839883
50,0.2438,0.243322,0.921016,0.857667
75,0.1383,0.236606,0.928068,0.87336
100,0.0978,0.322583,0.911142,0.861671
125,0.0451,0.368465,0.922426,0.873836
150,0.0424,0.333888,0.929478,0.881549
175,0.0264,0.403322,0.929478,0.881549
200,0.0118,0.39161,0.926657,0.872939
225,0.0035,0.441458,0.926657,0.880376
250,0.0026,0.47792,0.932299,0.884893


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3332,0.254649,0.901269,0.839883
50,0.2438,0.243322,0.921016,0.857667
75,0.1383,0.236606,0.928068,0.87336
100,0.0978,0.322583,0.911142,0.861671
125,0.0451,0.368465,0.922426,0.873836
150,0.0424,0.333888,0.929478,0.881549
175,0.0264,0.403322,0.929478,0.881549
200,0.0118,0.39161,0.926657,0.872939
225,0.0035,0.441458,0.926657,0.880376
250,0.0026,0.47792,0.932299,0.884893


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3435,0.242282,0.918195,0.849502
50,0.2172,0.213972,0.909732,0.845572
75,0.1422,0.282806,0.925247,0.861992
100,0.1005,0.307963,0.906911,0.858396
125,0.0654,0.332393,0.926657,0.866929
150,0.039,0.369561,0.926657,0.875301
175,0.0295,0.40973,0.922426,0.861612
200,0.0041,0.452196,0.926657,0.867834
225,0.0083,0.44681,0.926657,0.878985
250,0.0016,0.474647,0.925247,0.873293


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3435,0.242282,0.918195,0.849502
50,0.2172,0.213972,0.909732,0.845572
75,0.1422,0.282806,0.925247,0.861992
100,0.1005,0.307963,0.906911,0.858396
125,0.0654,0.332393,0.926657,0.866929
150,0.039,0.369561,0.926657,0.875301
175,0.0295,0.40973,0.922426,0.861612
200,0.0041,0.452196,0.926657,0.867834
225,0.0083,0.44681,0.926657,0.878985
250,0.0016,0.474647,0.925247,0.873293


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3435,0.242282,0.918195,0.849502
50,0.2172,0.213972,0.909732,0.845572
75,0.1422,0.282806,0.925247,0.861992
100,0.1005,0.307963,0.906911,0.858396
125,0.0654,0.332393,0.926657,0.866929
150,0.039,0.369561,0.926657,0.875301
175,0.0295,0.40973,0.922426,0.861612
200,0.0041,0.452196,0.926657,0.867834
225,0.0083,0.44681,0.926657,0.878985
250,0.0016,0.474647,0.925247,0.873293


DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 2833
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 709
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3382,0.272599,0.906911,0.839747
50,0.2361,0.271816,0.894217,0.813764
75,0.162,0.268304,0.911142,0.865839
100,0.0877,0.287356,0.911142,0.856301
125,0.0705,0.318711,0.916784,0.876183
150,0.0304,0.347178,0.918195,0.873642
175,0.0193,0.55301,0.901269,0.830034
200,0.0064,0.519865,0.898449,0.846293
225,0.0065,0.566631,0.911142,0.858674
250,0.0016,0.569869,0.912553,0.868939


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3382,0.272599,0.906911,0.839747
50,0.2361,0.271816,0.894217,0.813764
75,0.162,0.268304,0.911142,0.865839
100,0.0877,0.287356,0.911142,0.856301
125,0.0705,0.318711,0.916784,0.876183
150,0.0304,0.347178,0.918195,0.873642
175,0.0193,0.55301,0.901269,0.830034
200,0.0064,0.519865,0.898449,0.846293
225,0.0065,0.566631,0.911142,0.858674
250,0.0016,0.569869,0.912553,0.868939


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3382,0.272599,0.906911,0.839747
50,0.2361,0.271816,0.894217,0.813764
75,0.162,0.268304,0.911142,0.865839
100,0.0877,0.287356,0.911142,0.856301
125,0.0705,0.318711,0.916784,0.876183
150,0.0304,0.347178,0.918195,0.873642
175,0.0193,0.55301,0.901269,0.830034
200,0.0064,0.519865,0.898449,0.846293
225,0.0065,0.566631,0.911142,0.858674
250,0.0016,0.569869,0.912553,0.868939


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3574,0.297391,0.90268,0.816421
50,0.24,0.264718,0.897038,0.835339
75,0.1314,0.301111,0.905501,0.842693
100,0.106,0.325663,0.906911,0.853145
125,0.0414,0.494806,0.90268,0.841721
150,0.0289,0.509112,0.899859,0.837133
175,0.0091,0.582205,0.897038,0.832546
200,0.014,0.576925,0.905501,0.833629
225,0.0093,0.597435,0.90409,0.836932
250,0.0003,0.675435,0.90268,0.843495


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3574,0.297391,0.90268,0.816421
50,0.24,0.264718,0.897038,0.835339
75,0.1314,0.301111,0.905501,0.842693
100,0.106,0.325663,0.906911,0.853145
125,0.0414,0.494806,0.90268,0.841721
150,0.0289,0.509112,0.899859,0.837133
175,0.0091,0.582205,0.897038,0.832546
200,0.014,0.576925,0.905501,0.833629
225,0.0093,0.597435,0.90409,0.836932
250,0.0003,0.675435,0.90268,0.843495


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3574,0.297391,0.90268,0.816421
50,0.24,0.264718,0.897038,0.835339
75,0.1314,0.301111,0.905501,0.842693
100,0.106,0.325663,0.906911,0.853145
125,0.0414,0.494806,0.90268,0.841721
150,0.0289,0.509112,0.899859,0.837133
175,0.0091,0.582205,0.897038,0.832546
200,0.014,0.576925,0.905501,0.833629
225,0.0093,0.597435,0.90409,0.836932
250,0.0003,0.675435,0.90268,0.843495


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3275,0.230063,0.913963,0.846516
50,0.2494,0.278708,0.906911,0.816976
75,0.1358,0.257639,0.916784,0.85253
100,0.1418,0.368633,0.905501,0.812004
125,0.0686,0.332804,0.913963,0.848528
150,0.0396,0.390139,0.918195,0.854549
175,0.0249,0.450862,0.908322,0.83964
200,0.0102,0.502043,0.906911,0.835575
225,0.0053,0.528319,0.908322,0.841676
250,0.0014,0.553349,0.912553,0.84554


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3275,0.230063,0.913963,0.846516
50,0.2494,0.278708,0.906911,0.816976
75,0.1358,0.257639,0.916784,0.85253
100,0.1418,0.368633,0.905501,0.812004
125,0.0686,0.332804,0.913963,0.848528
150,0.0396,0.390139,0.918195,0.854549
175,0.0249,0.450862,0.908322,0.83964
200,0.0102,0.502043,0.906911,0.835575
225,0.0053,0.528319,0.908322,0.841676
250,0.0014,0.553349,0.912553,0.84554


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3275,0.230063,0.913963,0.846516
50,0.2494,0.278708,0.906911,0.816976
75,0.1358,0.257639,0.916784,0.85253
100,0.1418,0.368633,0.905501,0.812004
125,0.0686,0.332804,0.913963,0.848528
150,0.0396,0.390139,0.918195,0.854549
175,0.0249,0.450862,0.908322,0.83964
200,0.0102,0.502043,0.906911,0.835575
225,0.0053,0.528319,0.908322,0.841676
250,0.0014,0.553349,0.912553,0.84554


DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 2833
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 709
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.2969,0.238469,0.905501,0.841752
50,0.213,0.235473,0.911142,0.858674
75,0.1441,0.285909,0.901269,0.831094
100,0.0807,0.302846,0.915374,0.861176
125,0.0669,0.485734,0.908322,0.844604
150,0.0281,0.408612,0.906911,0.852348
175,0.0196,0.46308,0.906911,0.846404
200,0.0129,0.492354,0.908322,0.858024
225,0.0067,0.521806,0.905501,0.848873
250,0.0046,0.591161,0.901269,0.845077


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.2969,0.238469,0.905501,0.841752
50,0.213,0.235473,0.911142,0.858674
75,0.1441,0.285909,0.901269,0.831094
100,0.0807,0.302846,0.915374,0.861176
125,0.0669,0.485734,0.908322,0.844604
150,0.0281,0.408612,0.906911,0.852348
175,0.0196,0.46308,0.906911,0.846404
200,0.0129,0.492354,0.908322,0.858024
225,0.0067,0.521806,0.905501,0.848873
250,0.0046,0.591161,0.901269,0.845077


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.2969,0.238469,0.905501,0.841752
50,0.213,0.235473,0.911142,0.858674
75,0.1441,0.285909,0.901269,0.831094
100,0.0807,0.302846,0.915374,0.861176
125,0.0669,0.485734,0.908322,0.844604
150,0.0281,0.408612,0.906911,0.852348
175,0.0196,0.46308,0.906911,0.846404
200,0.0129,0.492354,0.908322,0.858024
225,0.0067,0.521806,0.905501,0.848873
250,0.0046,0.591161,0.901269,0.845077


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3021,0.250796,0.899859,0.837133
50,0.2306,0.234003,0.919605,0.867735
75,0.1167,0.344002,0.885755,0.831625
100,0.0868,0.336537,0.898449,0.838042
125,0.0275,0.56152,0.884344,0.829946
150,0.0298,0.57223,0.895628,0.835344
175,0.0116,0.567202,0.905501,0.842693
200,0.005,0.550184,0.912553,0.856549
225,0.0058,0.665844,0.897038,0.84055
250,0.0056,0.628467,0.911142,0.8503


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3021,0.250796,0.899859,0.837133
50,0.2306,0.234003,0.919605,0.867735
75,0.1167,0.344002,0.885755,0.831625
100,0.0868,0.336537,0.898449,0.838042
125,0.0275,0.56152,0.884344,0.829946
150,0.0298,0.57223,0.895628,0.835344
175,0.0116,0.567202,0.905501,0.842693
200,0.005,0.550184,0.912553,0.856549
225,0.0058,0.665844,0.897038,0.84055
250,0.0056,0.628467,0.911142,0.8503


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3021,0.250796,0.899859,0.837133
50,0.2306,0.234003,0.919605,0.867735
75,0.1167,0.344002,0.885755,0.831625
100,0.0868,0.336537,0.898449,0.838042
125,0.0275,0.56152,0.884344,0.829946
150,0.0298,0.57223,0.895628,0.835344
175,0.0116,0.567202,0.905501,0.842693
200,0.005,0.550184,0.912553,0.856549
225,0.0058,0.665844,0.897038,0.84055
250,0.0056,0.628467,0.911142,0.8503


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3005,0.236199,0.908322,0.849171
50,0.219,0.224871,0.922426,0.866874
75,0.1254,0.274929,0.921016,0.856693
100,0.1029,0.283533,0.908322,0.844604
125,0.0616,0.336511,0.915374,0.854315
150,0.0419,0.349805,0.909732,0.853607
175,0.0213,0.371555,0.919605,0.857529
200,0.0083,0.539153,0.921016,0.855702
225,0.0038,0.590436,0.918195,0.848437
250,0.0017,0.562921,0.906911,0.845499


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3005,0.236199,0.908322,0.849171
50,0.219,0.224871,0.922426,0.866874
75,0.1254,0.274929,0.921016,0.856693
100,0.1029,0.283533,0.908322,0.844604
125,0.0616,0.336511,0.915374,0.854315
150,0.0419,0.349805,0.909732,0.853607
175,0.0213,0.371555,0.919605,0.857529
200,0.0083,0.539153,0.921016,0.855702
225,0.0038,0.590436,0.918195,0.848437
250,0.0017,0.562921,0.906911,0.845499


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2833 [00:00<?, ? examples/s]

Map:   0%|          | 0/709 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3005,0.236199,0.908322,0.849171
50,0.219,0.224871,0.922426,0.866874
75,0.1254,0.274929,0.921016,0.856693
100,0.1029,0.283533,0.908322,0.844604
125,0.0616,0.336511,0.915374,0.854315
150,0.0419,0.349805,0.909732,0.853607
175,0.0213,0.371555,0.919605,0.857529
200,0.0083,0.539153,0.921016,0.855702
225,0.0038,0.590436,0.918195,0.848437
250,0.0017,0.562921,0.906911,0.845499


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.93653,0.892418
3,alger-ia/dziribert,0.923836,0.880488
6,faisalq/EgyBERT,0.925247,0.874819
9,faisalq/SaudiBERT,0.937941,0.893164
12,otmangi/MorRoBERTa,0.916784,0.876183
15,otmangi/MorrBERT,0.919605,0.867735
18,tunis-ai/TunBERT,0.832158,0.673796
