In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'MDOD_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/Moroccan_Darija_Offensive_Language_Detection_Dataset.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3


      
display(df.columns)
display(len(df))
display(df[:4])



classes = set(df['label'].values)
display(classes)

c = df['label'].value_counts()
display(c)

df['label'] = df['label'].astype('category')
df['label'] = df['label'].cat.codes

df = df[['text', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))


ds = Dataset.from_pandas(df)
ds = ds.train_test_split(test_size=0.2)

display(ds)

max_sequence_length = 128


models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]

seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['text'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True) # , batched=True
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)  # , batched=True
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 12
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 100, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 100
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-16 17:27:29.555766: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-16 17:27:29.579015: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['text', 'label'], dtype='object')

20402

Unnamed: 0,text,label
0,eh mais enfaite ‘zebi’ ça veut dire bise’ en verlan nan,1
1,je vais me réveiller à 6h au lieu de 7h histoire de réviser un peu zebi oslm tu m’a plus jamais revu,1
2,ma matrixe la video il est trop chou zebi,1
3,mmh tu manquer l’odeur de ta culotte nahchtoulimek y’a zebi,1


{0, 1}

label
0    12685
1     7717
Name: count, dtype: int64

2

20402

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16321
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 4081
    })
})

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16321
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 4081
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5544,0.459178,0.80642,0.767203
200,0.3987,0.345347,0.849792,0.839143
300,0.3378,0.315817,0.871845,0.859132
400,0.2819,0.340039,0.87062,0.858794
500,0.2697,0.297994,0.875521,0.867653
600,0.2139,0.358614,0.873805,0.866183
700,0.2112,0.328354,0.879196,0.868894
800,0.1855,0.386616,0.881157,0.873694
900,0.1486,0.362102,0.883607,0.876484
1000,0.1593,0.344611,0.886792,0.879729


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5436,0.409549,0.825778,0.797802
200,0.3847,0.332016,0.857388,0.845295
300,0.3128,0.312811,0.874296,0.864601
400,0.2547,0.342192,0.875766,0.862444
500,0.2496,0.293227,0.878951,0.869262
600,0.1864,0.340344,0.875766,0.866033
700,0.178,0.3202,0.887283,0.879405
800,0.1518,0.367211,0.889243,0.881754
900,0.126,0.351527,0.885812,0.8789
1000,0.1372,0.345163,0.892918,0.885265


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5436,0.409549,0.825778,0.797802
200,0.3847,0.332016,0.857388,0.845295
300,0.3128,0.312811,0.874296,0.864601
400,0.2547,0.342192,0.875766,0.862444
500,0.2496,0.293227,0.878951,0.869262
600,0.1864,0.340344,0.875766,0.866033
700,0.178,0.3202,0.887283,0.879405
800,0.1518,0.367211,0.889243,0.881754
900,0.126,0.351527,0.885812,0.8789
1000,0.1372,0.345163,0.892918,0.885265


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5511,0.414939,0.7753,0.717968
200,0.3846,0.353279,0.852487,0.83814
300,0.3519,0.322297,0.862779,0.85005
400,0.2895,0.301971,0.87013,0.858736
500,0.2717,0.285631,0.883852,0.876385
600,0.2181,0.335656,0.873805,0.860601
700,0.2157,0.286475,0.879196,0.870978
800,0.2208,0.319154,0.890468,0.882153
900,0.1662,0.33587,0.883362,0.871645
1000,0.1622,0.333442,0.890223,0.88116


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5511,0.414939,0.7753,0.717968
200,0.3846,0.353279,0.852487,0.83814
300,0.3519,0.322297,0.862779,0.85005
400,0.2895,0.301971,0.87013,0.858736
500,0.2717,0.285631,0.883852,0.876385
600,0.2181,0.335656,0.873805,0.860601
700,0.2157,0.286475,0.879196,0.870978
800,0.2208,0.319154,0.890468,0.882153
900,0.1662,0.33587,0.883362,0.871645
1000,0.1622,0.333442,0.890223,0.88116


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5511,0.414939,0.7753,0.717968
200,0.3846,0.353279,0.852487,0.83814
300,0.3519,0.322297,0.862779,0.85005
400,0.2895,0.301971,0.87013,0.858736
500,0.2717,0.285631,0.883852,0.876385
600,0.2181,0.335656,0.873805,0.860601
700,0.2157,0.286475,0.879196,0.870978
800,0.2208,0.319154,0.890468,0.882153
900,0.1662,0.33587,0.883362,0.871645
1000,0.1622,0.333442,0.890223,0.88116


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5588,0.411026,0.829699,0.810858
200,0.3784,0.344967,0.854447,0.841045
300,0.3007,0.349942,0.863514,0.849483
400,0.2591,0.301726,0.870865,0.86171
500,0.2619,0.310194,0.876746,0.866668
600,0.1789,0.339545,0.882872,0.873052
700,0.1683,0.38099,0.880421,0.870998
800,0.171,0.418336,0.881892,0.871491
900,0.1247,0.384654,0.878461,0.871168
1000,0.1299,0.371545,0.880912,0.873858


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5588,0.411026,0.829699,0.810858
200,0.3784,0.344967,0.854447,0.841045
300,0.3007,0.349942,0.863514,0.849483
400,0.2591,0.301726,0.870865,0.86171
500,0.2619,0.310194,0.876746,0.866668
600,0.1789,0.339545,0.882872,0.873052
700,0.1683,0.38099,0.880421,0.870998
800,0.171,0.418336,0.881892,0.871491
900,0.1247,0.384654,0.878461,0.871168
1000,0.1299,0.371545,0.880912,0.873858


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5588,0.411026,0.829699,0.810858
200,0.3784,0.344967,0.854447,0.841045
300,0.3007,0.349942,0.863514,0.849483
400,0.2591,0.301726,0.870865,0.86171
500,0.2619,0.310194,0.876746,0.866668
600,0.1789,0.339545,0.882872,0.873052
700,0.1683,0.38099,0.880421,0.870998
800,0.171,0.418336,0.881892,0.871491
900,0.1247,0.384654,0.878461,0.871168
1000,0.1299,0.371545,0.880912,0.873858


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16321
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 4081
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3965,0.311262,0.858858,0.847026
200,0.3277,0.292305,0.881647,0.87298
300,0.2489,0.336736,0.875031,0.869095
400,0.1797,0.298168,0.889488,0.882308
500,0.1856,0.301157,0.881157,0.875977
600,0.1064,0.373354,0.884832,0.879013
700,0.1086,0.324828,0.889488,0.883652
800,0.0947,0.489099,0.882382,0.877294
900,0.0707,0.478762,0.890223,0.883374
1000,0.0932,0.461746,0.892673,0.884767


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3965,0.311262,0.858858,0.847026
200,0.3277,0.292305,0.881647,0.87298
300,0.2489,0.336736,0.875031,0.869095
400,0.1797,0.298168,0.889488,0.882308
500,0.1856,0.301157,0.881157,0.875977
600,0.1064,0.373354,0.884832,0.879013
700,0.1086,0.324828,0.889488,0.883652
800,0.0947,0.489099,0.882382,0.877294
900,0.0707,0.478762,0.890223,0.883374
1000,0.0932,0.461746,0.892673,0.884767


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3965,0.311262,0.858858,0.847026
200,0.3277,0.292305,0.881647,0.87298
300,0.2489,0.336736,0.875031,0.869095
400,0.1797,0.298168,0.889488,0.882308
500,0.1856,0.301157,0.881157,0.875977
600,0.1064,0.373354,0.884832,0.879013
700,0.1086,0.324828,0.889488,0.883652
800,0.0947,0.489099,0.882382,0.877294
900,0.0707,0.478762,0.890223,0.883374
1000,0.0932,0.461746,0.892673,0.884767


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4038,0.314613,0.859593,0.85012
200,0.3087,0.301105,0.876501,0.867329
300,0.2661,0.299247,0.888508,0.877801
400,0.1838,0.322765,0.887283,0.878434
500,0.1948,0.253942,0.896104,0.888903
600,0.1199,0.319725,0.895369,0.887204
700,0.1136,0.303263,0.891693,0.885419
800,0.1072,0.463727,0.892918,0.886223
900,0.0781,0.387389,0.890223,0.881504
1000,0.0985,0.366529,0.893408,0.887675


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4038,0.314613,0.859593,0.85012
200,0.3087,0.301105,0.876501,0.867329
300,0.2661,0.299247,0.888508,0.877801
400,0.1838,0.322765,0.887283,0.878434
500,0.1948,0.253942,0.896104,0.888903
600,0.1199,0.319725,0.895369,0.887204
700,0.1136,0.303263,0.891693,0.885419
800,0.1072,0.463727,0.892918,0.886223
900,0.0781,0.387389,0.890223,0.881504
1000,0.0985,0.366529,0.893408,0.887675


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4038,0.314613,0.859593,0.85012
200,0.3087,0.301105,0.876501,0.867329
300,0.2661,0.299247,0.888508,0.877801
400,0.1838,0.322765,0.887283,0.878434
500,0.1948,0.253942,0.896104,0.888903
600,0.1199,0.319725,0.895369,0.887204
700,0.1136,0.303263,0.891693,0.885419
800,0.1072,0.463727,0.892918,0.886223
900,0.0781,0.387389,0.890223,0.881504
1000,0.0985,0.366529,0.893408,0.887675


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4013,0.312691,0.865474,0.851497
200,0.3234,0.301766,0.875031,0.86331
300,0.2475,0.311041,0.881647,0.871244
400,0.1862,0.312344,0.875276,0.870597
500,0.186,0.294535,0.889733,0.883624
600,0.1137,0.36868,0.887283,0.878434
700,0.1141,0.387116,0.895614,0.889073
800,0.11,0.460286,0.897574,0.891481
900,0.0755,0.416303,0.891938,0.886491
1000,0.081,0.438132,0.894144,0.887902


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4013,0.312691,0.865474,0.851497
200,0.3234,0.301766,0.875031,0.86331
300,0.2475,0.311041,0.881647,0.871244
400,0.1862,0.312344,0.875276,0.870597
500,0.186,0.294535,0.889733,0.883624
600,0.1137,0.36868,0.887283,0.878434
700,0.1141,0.387116,0.895614,0.889073
800,0.11,0.460286,0.897574,0.891481
900,0.0755,0.416303,0.891938,0.886491
1000,0.081,0.438132,0.894144,0.887902


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4013,0.312691,0.865474,0.851497
200,0.3234,0.301766,0.875031,0.86331
300,0.2475,0.311041,0.881647,0.871244
400,0.1862,0.312344,0.875276,0.870597
500,0.186,0.294535,0.889733,0.883624
600,0.1137,0.36868,0.887283,0.878434
700,0.1141,0.387116,0.895614,0.889073
800,0.11,0.460286,0.897574,0.891481
900,0.0755,0.416303,0.891938,0.886491
1000,0.081,0.438132,0.894144,0.887902


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16321
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 4081
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6744,0.505514,0.764273,0.699823
200,0.5176,0.505401,0.768929,0.70481
300,0.5126,0.500012,0.769419,0.70663
400,0.5229,0.616922,0.768929,0.733718
500,0.5016,0.483955,0.768194,0.741063
600,0.4891,0.470176,0.767949,0.7421
700,0.467,0.45064,0.786817,0.751043
800,0.4573,0.464561,0.783141,0.757023
900,0.4324,0.45872,0.786082,0.752254
1000,0.4437,0.458032,0.788042,0.756932


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6744,0.505514,0.764273,0.699823
200,0.5176,0.505401,0.768929,0.70481
300,0.5126,0.500012,0.769419,0.70663
400,0.5229,0.616922,0.768929,0.733718
500,0.5016,0.483955,0.768194,0.741063
600,0.4891,0.470176,0.767949,0.7421
700,0.467,0.45064,0.786817,0.751043
800,0.4573,0.464561,0.783141,0.757023
900,0.4324,0.45872,0.786082,0.752254
1000,0.4437,0.458032,0.788042,0.756932


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6744,0.505514,0.764273,0.699823
200,0.5176,0.505401,0.768929,0.70481
300,0.5126,0.500012,0.769419,0.70663
400,0.5229,0.616922,0.768929,0.733718
500,0.5016,0.483955,0.768194,0.741063
600,0.4891,0.470176,0.767949,0.7421
700,0.467,0.45064,0.786817,0.751043
800,0.4573,0.464561,0.783141,0.757023
900,0.4324,0.45872,0.786082,0.752254
1000,0.4437,0.458032,0.788042,0.756932


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6554,0.596433,0.726783,0.688051
200,0.5589,0.513574,0.764028,0.695684
300,0.5245,0.507637,0.765989,0.697849
400,0.5238,0.512263,0.765009,0.697491
500,0.5098,0.523901,0.764764,0.697445
600,0.527,0.515182,0.765254,0.697896
700,0.5167,0.506161,0.77236,0.711202
800,0.5192,0.506119,0.765744,0.725956
900,0.4772,0.50193,0.77726,0.724931
1000,0.4817,0.479109,0.783141,0.729598


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6554,0.596433,0.726783,0.688051
200,0.5589,0.513574,0.764028,0.695684
300,0.5245,0.507637,0.765989,0.697849
400,0.5238,0.512263,0.765009,0.697491
500,0.5098,0.523901,0.764764,0.697445
600,0.527,0.515182,0.765254,0.697896
700,0.5167,0.506161,0.77236,0.711202
800,0.5192,0.506119,0.765744,0.725956
900,0.4772,0.50193,0.77726,0.724931
1000,0.4817,0.479109,0.783141,0.729598


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6554,0.596433,0.726783,0.688051
200,0.5589,0.513574,0.764028,0.695684
300,0.5245,0.507637,0.765989,0.697849
400,0.5238,0.512263,0.765009,0.697491
500,0.5098,0.523901,0.764764,0.697445
600,0.527,0.515182,0.765254,0.697896
700,0.5167,0.506161,0.77236,0.711202
800,0.5192,0.506119,0.765744,0.725956
900,0.4772,0.50193,0.77726,0.724931
1000,0.4817,0.479109,0.783141,0.729598


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5823,0.515445,0.765009,0.701705
200,0.5195,0.500694,0.765009,0.700499
300,0.5095,0.514866,0.77236,0.714593
400,0.4974,0.48316,0.77481,0.716406
500,0.4866,0.471885,0.77138,0.710948
600,0.46,0.45652,0.788042,0.741869
700,0.4564,0.461255,0.779711,0.724569
800,0.4607,0.446364,0.793923,0.759393
900,0.4336,0.450687,0.792453,0.754867
1000,0.4309,0.455145,0.788777,0.761551


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5823,0.515445,0.765009,0.701705
200,0.5195,0.500694,0.765009,0.700499
300,0.5095,0.514866,0.77236,0.714593
400,0.4974,0.48316,0.77481,0.716406
500,0.4866,0.471885,0.77138,0.710948
600,0.46,0.45652,0.788042,0.741869
700,0.4564,0.461255,0.779711,0.724569
800,0.4607,0.446364,0.793923,0.759393
900,0.4336,0.450687,0.792453,0.754867
1000,0.4309,0.455145,0.788777,0.761551


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.5823,0.515445,0.765009,0.701705
200,0.5195,0.500694,0.765009,0.700499
300,0.5095,0.514866,0.77236,0.714593
400,0.4974,0.48316,0.77481,0.716406
500,0.4866,0.471885,0.77138,0.710948
600,0.46,0.45652,0.788042,0.741869
700,0.4564,0.461255,0.779711,0.724569
800,0.4607,0.446364,0.793923,0.759393
900,0.4336,0.450687,0.792453,0.754867
1000,0.4309,0.455145,0.788777,0.761551


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16321
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 4081
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3648,0.308227,0.861799,0.844447
200,0.3072,0.265358,0.887528,0.877679
300,0.2303,0.324013,0.873315,0.869008
400,0.1511,0.327159,0.893654,0.886875
500,0.1474,0.303827,0.897329,0.889961
600,0.0843,0.31407,0.893408,0.886998
700,0.0886,0.347446,0.893163,0.887164
800,0.0808,0.539144,0.891203,0.883569
900,0.0643,0.568337,0.888018,0.882712
1000,0.0696,0.473754,0.893408,0.886628


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3648,0.308227,0.861799,0.844447
200,0.3072,0.265358,0.887528,0.877679
300,0.2303,0.324013,0.873315,0.869008
400,0.1511,0.327159,0.893654,0.886875
500,0.1474,0.303827,0.897329,0.889961
600,0.0843,0.31407,0.893408,0.886998
700,0.0886,0.347446,0.893163,0.887164
800,0.0808,0.539144,0.891203,0.883569
900,0.0643,0.568337,0.888018,0.882712
1000,0.0696,0.473754,0.893408,0.886628


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3648,0.308227,0.861799,0.844447
200,0.3072,0.265358,0.887528,0.877679
300,0.2303,0.324013,0.873315,0.869008
400,0.1511,0.327159,0.893654,0.886875
500,0.1474,0.303827,0.897329,0.889961
600,0.0843,0.31407,0.893408,0.886998
700,0.0886,0.347446,0.893163,0.887164
800,0.0808,0.539144,0.891203,0.883569
900,0.0643,0.568337,0.888018,0.882712
1000,0.0696,0.473754,0.893408,0.886628


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3678,0.307333,0.868905,0.861293
200,0.2936,0.271078,0.889488,0.880063
300,0.2453,0.282969,0.889733,0.879124
400,0.1491,0.312378,0.891448,0.883878
500,0.1554,0.290761,0.890958,0.885331
600,0.0904,0.371106,0.895859,0.887985
700,0.0894,0.402433,0.895614,0.887258
800,0.1128,0.557872,0.894389,0.887926
900,0.0669,0.443131,0.895124,0.885644
1000,0.0709,0.525065,0.899779,0.892292


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3678,0.307333,0.868905,0.861293
200,0.2936,0.271078,0.889488,0.880063
300,0.2453,0.282969,0.889733,0.879124
400,0.1491,0.312378,0.891448,0.883878
500,0.1554,0.290761,0.890958,0.885331
600,0.0904,0.371106,0.895859,0.887985
700,0.0894,0.402433,0.895614,0.887258
800,0.1128,0.557872,0.894389,0.887926
900,0.0669,0.443131,0.895124,0.885644
1000,0.0709,0.525065,0.899779,0.892292


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3678,0.307333,0.868905,0.861293
200,0.2936,0.271078,0.889488,0.880063
300,0.2453,0.282969,0.889733,0.879124
400,0.1491,0.312378,0.891448,0.883878
500,0.1554,0.290761,0.890958,0.885331
600,0.0904,0.371106,0.895859,0.887985
700,0.0894,0.402433,0.895614,0.887258
800,0.1128,0.557872,0.894389,0.887926
900,0.0669,0.443131,0.895124,0.885644
1000,0.0709,0.525065,0.899779,0.892292


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3857,0.295028,0.875031,0.868232
200,0.3039,0.278812,0.882872,0.872557
300,0.2253,0.315107,0.891203,0.88338
400,0.1614,0.308894,0.891203,0.884327
500,0.1517,0.29905,0.893408,0.88486
600,0.0797,0.431283,0.893654,0.885597
700,0.0936,0.436809,0.899534,0.892542
800,0.0791,0.585511,0.894634,0.888475
900,0.0598,0.589352,0.893654,0.887815
1000,0.0649,0.4793,0.894389,0.886115


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3857,0.295028,0.875031,0.868232
200,0.3039,0.278812,0.882872,0.872557
300,0.2253,0.315107,0.891203,0.88338
400,0.1614,0.308894,0.891203,0.884327
500,0.1517,0.29905,0.893408,0.88486
600,0.0797,0.431283,0.893654,0.885597
700,0.0936,0.436809,0.899534,0.892542
800,0.0791,0.585511,0.894634,0.888475
900,0.0598,0.589352,0.893654,0.887815
1000,0.0649,0.4793,0.894389,0.886115


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3857,0.295028,0.875031,0.868232
200,0.3039,0.278812,0.882872,0.872557
300,0.2253,0.315107,0.891203,0.88338
400,0.1614,0.308894,0.891203,0.884327
500,0.1517,0.29905,0.893408,0.88486
600,0.0797,0.431283,0.893654,0.885597
700,0.0936,0.436809,0.899534,0.892542
800,0.0791,0.585511,0.894634,0.888475
900,0.0598,0.589352,0.893654,0.887815
1000,0.0649,0.4793,0.894389,0.886115


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16321
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 4081
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4233,0.342401,0.847096,0.824049
200,0.3322,0.323897,0.8716,0.85847
300,0.2824,0.380295,0.847096,0.842583
400,0.2241,0.331496,0.880176,0.871613
500,0.2122,0.331454,0.873315,0.866281
600,0.1342,0.363227,0.883607,0.874272
700,0.1366,0.357502,0.881647,0.875681
800,0.1173,0.512637,0.878951,0.873367
900,0.0886,0.478263,0.879196,0.872992
1000,0.0974,0.421605,0.884832,0.877302


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4233,0.342401,0.847096,0.824049
200,0.3322,0.323897,0.8716,0.85847
300,0.2824,0.380295,0.847096,0.842583
400,0.2241,0.331496,0.880176,0.871613
500,0.2122,0.331454,0.873315,0.866281
600,0.1342,0.363227,0.883607,0.874272
700,0.1366,0.357502,0.881647,0.875681
800,0.1173,0.512637,0.878951,0.873367
900,0.0886,0.478263,0.879196,0.872992
1000,0.0974,0.421605,0.884832,0.877302


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4233,0.342401,0.847096,0.824049
200,0.3322,0.323897,0.8716,0.85847
300,0.2824,0.380295,0.847096,0.842583
400,0.2241,0.331496,0.880176,0.871613
500,0.2122,0.331454,0.873315,0.866281
600,0.1342,0.363227,0.883607,0.874272
700,0.1366,0.357502,0.881647,0.875681
800,0.1173,0.512637,0.878951,0.873367
900,0.0886,0.478263,0.879196,0.872992
1000,0.0974,0.421605,0.884832,0.877302


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4194,0.323133,0.859838,0.849678
200,0.3255,0.316781,0.871355,0.859488
300,0.2837,0.32846,0.885077,0.873799
400,0.2098,0.307214,0.884342,0.875958
500,0.213,0.286784,0.894144,0.885769
600,0.1346,0.330354,0.888753,0.881137
700,0.1302,0.303129,0.889978,0.88243
800,0.1236,0.396839,0.889488,0.881264
900,0.0923,0.384366,0.889978,0.881257
1000,0.0939,0.360093,0.891448,0.883909


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4194,0.323133,0.859838,0.849678
200,0.3255,0.316781,0.871355,0.859488
300,0.2837,0.32846,0.885077,0.873799
400,0.2098,0.307214,0.884342,0.875958
500,0.213,0.286784,0.894144,0.885769
600,0.1346,0.330354,0.888753,0.881137
700,0.1302,0.303129,0.889978,0.88243
800,0.1236,0.396839,0.889488,0.881264
900,0.0923,0.384366,0.889978,0.881257
1000,0.0939,0.360093,0.891448,0.883909


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4194,0.323133,0.859838,0.849678
200,0.3255,0.316781,0.871355,0.859488
300,0.2837,0.32846,0.885077,0.873799
400,0.2098,0.307214,0.884342,0.875958
500,0.213,0.286784,0.894144,0.885769
600,0.1346,0.330354,0.888753,0.881137
700,0.1302,0.303129,0.889978,0.88243
800,0.1236,0.396839,0.889488,0.881264
900,0.0923,0.384366,0.889978,0.881257
1000,0.0939,0.360093,0.891448,0.883909


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.422,0.361549,0.852732,0.835232
200,0.3418,0.298166,0.873805,0.86449
300,0.2739,0.319191,0.878951,0.86801
400,0.2276,0.32438,0.86866,0.863824
500,0.2173,0.309953,0.888753,0.881692
600,0.137,0.326012,0.887528,0.880127
700,0.1367,0.388993,0.889243,0.880583
800,0.1278,0.439119,0.891448,0.88369
900,0.0908,0.383065,0.890713,0.884549
1000,0.087,0.427547,0.893408,0.88657


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.422,0.361549,0.852732,0.835232
200,0.3418,0.298166,0.873805,0.86449
300,0.2739,0.319191,0.878951,0.86801
400,0.2276,0.32438,0.86866,0.863824
500,0.2173,0.309953,0.888753,0.881692
600,0.137,0.326012,0.887528,0.880127
700,0.1367,0.388993,0.889243,0.880583
800,0.1278,0.439119,0.891448,0.88369
900,0.0908,0.383065,0.890713,0.884549
1000,0.087,0.427547,0.893408,0.88657


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.422,0.361549,0.852732,0.835232
200,0.3418,0.298166,0.873805,0.86449
300,0.2739,0.319191,0.878951,0.86801
400,0.2276,0.32438,0.86866,0.863824
500,0.2173,0.309953,0.888753,0.881692
600,0.137,0.326012,0.887528,0.880127
700,0.1367,0.388993,0.889243,0.880583
800,0.1278,0.439119,0.891448,0.88369
900,0.0908,0.383065,0.890713,0.884549
1000,0.087,0.427547,0.893408,0.88657


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16321
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 4081
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.392,0.316881,0.859593,0.843188
200,0.3159,0.292084,0.884097,0.873717
300,0.2416,0.287246,0.895369,0.888368
400,0.1776,0.335721,0.882382,0.871592
500,0.1804,0.265926,0.893654,0.886553
600,0.0968,0.381419,0.896594,0.889368
700,0.1012,0.298212,0.897084,0.890718
800,0.0833,0.485421,0.887037,0.881915
900,0.0689,0.413802,0.895859,0.88892
1000,0.0715,0.454176,0.895614,0.888932


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.392,0.316881,0.859593,0.843188
200,0.3159,0.292084,0.884097,0.873717
300,0.2416,0.287246,0.895369,0.888368
400,0.1776,0.335721,0.882382,0.871592
500,0.1804,0.265926,0.893654,0.886553
600,0.0968,0.381419,0.896594,0.889368
700,0.1012,0.298212,0.897084,0.890718
800,0.0833,0.485421,0.887037,0.881915
900,0.0689,0.413802,0.895859,0.88892
1000,0.0715,0.454176,0.895614,0.888932


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.392,0.316881,0.859593,0.843188
200,0.3159,0.292084,0.884097,0.873717
300,0.2416,0.287246,0.895369,0.888368
400,0.1776,0.335721,0.882382,0.871592
500,0.1804,0.265926,0.893654,0.886553
600,0.0968,0.381419,0.896594,0.889368
700,0.1012,0.298212,0.897084,0.890718
800,0.0833,0.485421,0.887037,0.881915
900,0.0689,0.413802,0.895859,0.88892
1000,0.0715,0.454176,0.895614,0.888932


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3916,0.31067,0.867434,0.857921
200,0.3036,0.27373,0.881892,0.87305
300,0.2554,0.267761,0.892183,0.883885
400,0.1728,0.267112,0.894879,0.888334
500,0.1799,0.27791,0.892673,0.887402
600,0.0991,0.327802,0.898554,0.891809
700,0.0968,0.408092,0.892918,0.884095
800,0.121,0.460803,0.888018,0.881283
900,0.0695,0.442196,0.886547,0.876273
1000,0.0823,0.411366,0.897574,0.889247


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3916,0.31067,0.867434,0.857921
200,0.3036,0.27373,0.881892,0.87305
300,0.2554,0.267761,0.892183,0.883885
400,0.1728,0.267112,0.894879,0.888334
500,0.1799,0.27791,0.892673,0.887402
600,0.0991,0.327802,0.898554,0.891809
700,0.0968,0.408092,0.892918,0.884095
800,0.121,0.460803,0.888018,0.881283
900,0.0695,0.442196,0.886547,0.876273
1000,0.0823,0.411366,0.897574,0.889247


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3916,0.31067,0.867434,0.857921
200,0.3036,0.27373,0.881892,0.87305
300,0.2554,0.267761,0.892183,0.883885
400,0.1728,0.267112,0.894879,0.888334
500,0.1799,0.27791,0.892673,0.887402
600,0.0991,0.327802,0.898554,0.891809
700,0.0968,0.408092,0.892918,0.884095
800,0.121,0.460803,0.888018,0.881283
900,0.0695,0.442196,0.886547,0.876273
1000,0.0823,0.411366,0.897574,0.889247


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4099,0.324752,0.852732,0.845955
200,0.3202,0.28949,0.882382,0.874095
300,0.2407,0.307774,0.885567,0.875281
400,0.1808,0.293333,0.887773,0.88092
500,0.1827,0.278218,0.897819,0.890954
600,0.104,0.381885,0.881892,0.87706
700,0.1011,0.451874,0.894389,0.88681
800,0.1029,0.511619,0.894879,0.88802
900,0.0652,0.452294,0.890713,0.885008
1000,0.0701,0.430841,0.895859,0.889375


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4099,0.324752,0.852732,0.845955
200,0.3202,0.28949,0.882382,0.874095
300,0.2407,0.307774,0.885567,0.875281
400,0.1808,0.293333,0.887773,0.88092
500,0.1827,0.278218,0.897819,0.890954
600,0.104,0.381885,0.881892,0.87706
700,0.1011,0.451874,0.894389,0.88681
800,0.1029,0.511619,0.894879,0.88802
900,0.0652,0.452294,0.890713,0.885008
1000,0.0701,0.430841,0.895859,0.889375


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4099,0.324752,0.852732,0.845955
200,0.3202,0.28949,0.882382,0.874095
300,0.2407,0.307774,0.885567,0.875281
400,0.1808,0.293333,0.887773,0.88092
500,0.1827,0.278218,0.897819,0.890954
600,0.104,0.381885,0.881892,0.87706
700,0.1011,0.451874,0.894389,0.88681
800,0.1029,0.511619,0.894879,0.88802
900,0.0652,0.452294,0.890713,0.885008
1000,0.0701,0.430841,0.895859,0.889375


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16321
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 4081
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3345,0.275559,0.879686,0.867751
200,0.2853,0.257518,0.895369,0.886681
300,0.2168,0.305379,0.889488,0.883917
400,0.1489,0.316333,0.894879,0.888501
500,0.1615,0.260229,0.901985,0.896555
600,0.0822,0.337235,0.895614,0.890139
700,0.0936,0.336214,0.899289,0.893723
800,0.078,0.482541,0.897329,0.891019
900,0.0577,0.457296,0.90174,0.895356
1000,0.0721,0.49922,0.898554,0.89123


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3345,0.275559,0.879686,0.867751
200,0.2853,0.257518,0.895369,0.886681
300,0.2168,0.305379,0.889488,0.883917
400,0.1489,0.316333,0.894879,0.888501
500,0.1615,0.260229,0.901985,0.896555
600,0.0822,0.337235,0.895614,0.890139
700,0.0936,0.336214,0.899289,0.893723
800,0.078,0.482541,0.897329,0.891019
900,0.0577,0.457296,0.90174,0.895356
1000,0.0721,0.49922,0.898554,0.89123


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3345,0.275559,0.879686,0.867751
200,0.2853,0.257518,0.895369,0.886681
300,0.2168,0.305379,0.889488,0.883917
400,0.1489,0.316333,0.894879,0.888501
500,0.1615,0.260229,0.901985,0.896555
600,0.0822,0.337235,0.895614,0.890139
700,0.0936,0.336214,0.899289,0.893723
800,0.078,0.482541,0.897329,0.891019
900,0.0577,0.457296,0.90174,0.895356
1000,0.0721,0.49922,0.898554,0.89123


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.326,0.291866,0.87258,0.865034
200,0.2791,0.251395,0.891693,0.883874
300,0.2312,0.265044,0.90223,0.893409
400,0.1443,0.267153,0.904925,0.89855
500,0.152,0.255104,0.90272,0.896611
600,0.091,0.342193,0.905415,0.898447
700,0.0876,0.360282,0.90076,0.893256
800,0.1216,0.436092,0.90027,0.892819
900,0.0673,0.399853,0.899779,0.891167
1000,0.0722,0.35152,0.907621,0.900154


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.326,0.291866,0.87258,0.865034
200,0.2791,0.251395,0.891693,0.883874
300,0.2312,0.265044,0.90223,0.893409
400,0.1443,0.267153,0.904925,0.89855
500,0.152,0.255104,0.90272,0.896611
600,0.091,0.342193,0.905415,0.898447
700,0.0876,0.360282,0.90076,0.893256
800,0.1216,0.436092,0.90027,0.892819
900,0.0673,0.399853,0.899779,0.891167
1000,0.0722,0.35152,0.907621,0.900154


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.326,0.291866,0.87258,0.865034
200,0.2791,0.251395,0.891693,0.883874
300,0.2312,0.265044,0.90223,0.893409
400,0.1443,0.267153,0.904925,0.89855
500,0.152,0.255104,0.90272,0.896611
600,0.091,0.342193,0.905415,0.898447
700,0.0876,0.360282,0.90076,0.893256
800,0.1216,0.436092,0.90027,0.892819
900,0.0673,0.399853,0.899779,0.891167
1000,0.0722,0.35152,0.907621,0.900154


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3515,0.289318,0.866699,0.861923
200,0.2817,0.271015,0.893654,0.885207
300,0.2146,0.302248,0.899779,0.892733
400,0.1534,0.294887,0.90223,0.895715
500,0.1487,0.278163,0.90566,0.898945
600,0.079,0.345093,0.90076,0.894713
700,0.0914,0.374306,0.901985,0.895974
800,0.0801,0.462016,0.902965,0.895902
900,0.0582,0.398301,0.90272,0.896637
1000,0.0593,0.497955,0.90272,0.895335


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3515,0.289318,0.866699,0.861923
200,0.2817,0.271015,0.893654,0.885207
300,0.2146,0.302248,0.899779,0.892733
400,0.1534,0.294887,0.90223,0.895715
500,0.1487,0.278163,0.90566,0.898945
600,0.079,0.345093,0.90076,0.894713
700,0.0914,0.374306,0.901985,0.895974
800,0.0801,0.462016,0.902965,0.895902
900,0.0582,0.398301,0.90272,0.896637
1000,0.0593,0.497955,0.90272,0.895335


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16321 [00:00<?, ? examples/s]

Map:   0%|          | 0/4081 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3515,0.289318,0.866699,0.861923
200,0.2817,0.271015,0.893654,0.885207
300,0.2146,0.302248,0.899779,0.892733
400,0.1534,0.294887,0.90223,0.895715
500,0.1487,0.278163,0.90566,0.898945
600,0.079,0.345093,0.90076,0.894713
700,0.0914,0.374306,0.901985,0.895974
800,0.0801,0.462016,0.902965,0.895902
900,0.0582,0.398301,0.90272,0.896637
1000,0.0593,0.497955,0.90272,0.895335


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.896104,0.889196
3,alger-ia/dziribert,0.90027,0.892608
6,faisalq/EgyBERT,0.895369,0.88783
9,faisalq/SaudiBERT,0.900515,0.894413
12,otmangi/MorRoBERTa,0.899779,0.89324
15,otmangi/MorrBERT,0.907621,0.900154
18,tunis-ai/TunBERT,0.802989,0.781232
