In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'MYC_cleaned_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/MYC/DATA_CLEANED.csv', encoding='utf-16', engine='python') #, quotechar="'"  , quoting=3


      
display(df.columns)
display(df[:4])

c = df['polarity'].value_counts()
display(c)

classes = set(df['polarity'].values)
display(classes)

df['polarity'] = df['polarity'].astype('category')
df['label'] = df['polarity'].cat.codes

df = df[['sentence', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))


# ds = Dataset.from_pandas(df)
# ds = ds.train_test_split(test_size=0.2)

# display(ds)

max_sequence_length = 128



models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]

seeds = [0, 1, 42]



for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['sentence'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True)
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 15
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 100, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 100
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()
    

results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-17 11:56:16.591839: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-17 11:56:16.617630: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['sentence', 'polarity'], dtype='object')

Unnamed: 0,sentence,polarity
0,انسان عبارة عن دواء للإكتئاب❤🎤,1
1,نحبك يا فنان وااااااحسااااااااان❤,1
2,Stream zuin😂 Thank you ilyas ❤️🫶,1
3,وحق الرب الى دوا د الاكتئاب الأسطورة❤️❤️❤️❤️😂,1


polarity
 1    9999
-1    9992
Name: count, dtype: int64

{-1, 1}

2

19991

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label'],
        num_rows: 15992
    })
    test: Dataset({
        features: ['sentence', 'label'],
        num_rows: 3999
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3534,0.13331,0.96149,0.96149
200,0.0755,0.05095,0.989247,0.989247
300,0.0723,0.071345,0.982746,0.982743
400,0.051,0.042663,0.989997,0.989997
500,0.0464,0.033161,0.992498,0.992498
600,0.0386,0.039822,0.992248,0.992248
700,0.0355,0.04742,0.991248,0.991247
800,0.0294,0.038865,0.991248,0.991247
900,0.0274,0.038949,0.991498,0.991497
1000,0.0296,0.036975,0.988997,0.988997


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3455,0.093713,0.984496,0.984496
200,0.0742,0.054022,0.988997,0.988997
300,0.0478,0.044783,0.989997,0.989997
400,0.0432,0.035872,0.992248,0.992248
500,0.0428,0.038434,0.992248,0.992248
600,0.0342,0.035464,0.991498,0.991497
700,0.0325,0.038823,0.991998,0.991998
800,0.0269,0.041559,0.990498,0.990497
900,0.0257,0.03912,0.990998,0.990997
1000,0.0259,0.038782,0.989747,0.989747


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3455,0.093713,0.984496,0.984496
200,0.0742,0.054022,0.988997,0.988997
300,0.0478,0.044783,0.989997,0.989997
400,0.0432,0.035872,0.992248,0.992248
500,0.0428,0.038434,0.992248,0.992248
600,0.0342,0.035464,0.991498,0.991497
700,0.0325,0.038823,0.991998,0.991998
800,0.0269,0.041559,0.990498,0.990497
900,0.0257,0.03912,0.990998,0.990997
1000,0.0259,0.038782,0.989747,0.989747


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3413,0.092761,0.985996,0.985996
200,0.0746,0.062998,0.985996,0.985996
300,0.0461,0.048842,0.989497,0.989497
400,0.0437,0.043153,0.990498,0.990497
500,0.0427,0.042988,0.990248,0.990248
600,0.0348,0.049671,0.989497,0.989496
700,0.0231,0.052674,0.987997,0.987996
800,0.0294,0.04962,0.990248,0.990247
900,0.0186,0.0568,0.989997,0.989997
1000,0.0264,0.048038,0.990248,0.990248


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3413,0.092761,0.985996,0.985996
200,0.0746,0.062998,0.985996,0.985996
300,0.0461,0.048842,0.989497,0.989497
400,0.0437,0.043153,0.990498,0.990497
500,0.0427,0.042988,0.990248,0.990248
600,0.0348,0.049671,0.989497,0.989496
700,0.0231,0.052674,0.987997,0.987996
800,0.0294,0.04962,0.990248,0.990247
900,0.0186,0.0568,0.989997,0.989997
1000,0.0264,0.048038,0.990248,0.990248


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3413,0.092761,0.985996,0.985996
200,0.0746,0.062998,0.985996,0.985996
300,0.0461,0.048842,0.989497,0.989497
400,0.0437,0.043153,0.990498,0.990497
500,0.0427,0.042988,0.990248,0.990248
600,0.0348,0.049671,0.989497,0.989496
700,0.0231,0.052674,0.987997,0.987996
800,0.0294,0.04962,0.990248,0.990247
900,0.0186,0.0568,0.989997,0.989997
1000,0.0264,0.048038,0.990248,0.990248


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3432,0.091762,0.985746,0.985743
200,0.0702,0.056795,0.986497,0.986494
300,0.0378,0.055383,0.988247,0.988245
400,0.0372,0.062942,0.986747,0.986741
500,0.0448,0.042757,0.990748,0.990746
600,0.0275,0.04549,0.990748,0.990746
700,0.0232,0.055548,0.988497,0.988495
800,0.032,0.072515,0.983496,0.983486
900,0.024,0.051811,0.988247,0.988243
1000,0.0184,0.055738,0.990748,0.990746


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3432,0.091762,0.985746,0.985743
200,0.0702,0.056795,0.986497,0.986494
300,0.0378,0.055383,0.988247,0.988245
400,0.0372,0.062942,0.986747,0.986741
500,0.0448,0.042757,0.990748,0.990746
600,0.0275,0.04549,0.990748,0.990746
700,0.0232,0.055548,0.988497,0.988495
800,0.032,0.072515,0.983496,0.983486
900,0.024,0.051811,0.988247,0.988243
1000,0.0184,0.055738,0.990748,0.990746


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3432,0.091762,0.985746,0.985743
200,0.0702,0.056795,0.986497,0.986494
300,0.0378,0.055383,0.988247,0.988245
400,0.0372,0.062942,0.986747,0.986741
500,0.0448,0.042757,0.990748,0.990746
600,0.0275,0.04549,0.990748,0.990746
700,0.0232,0.055548,0.988497,0.988495
800,0.032,0.072515,0.983496,0.983486
900,0.024,0.051811,0.988247,0.988243
1000,0.0184,0.055738,0.990748,0.990746


DatasetDict({
    train: Dataset({
        features: ['sentence', 'label'],
        num_rows: 15992
    })
    test: Dataset({
        features: ['sentence', 'label'],
        num_rows: 3999
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1106,0.056716,0.981745,0.981745
200,0.0496,0.047482,0.987247,0.987247
300,0.0355,0.037007,0.991748,0.991748
400,0.0317,0.035646,0.991998,0.991997
500,0.0345,0.039191,0.990498,0.990497
600,0.0213,0.031925,0.993248,0.993248
700,0.0222,0.041866,0.991748,0.991748
800,0.0168,0.044481,0.990498,0.990497
900,0.0116,0.050549,0.991248,0.991247
1000,0.0102,0.062491,0.988247,0.988246


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1106,0.056716,0.981745,0.981745
200,0.0496,0.047482,0.987247,0.987247
300,0.0355,0.037007,0.991748,0.991748
400,0.0317,0.035646,0.991998,0.991997
500,0.0345,0.039191,0.990498,0.990497
600,0.0213,0.031925,0.993248,0.993248
700,0.0222,0.041866,0.991748,0.991748
800,0.0168,0.044481,0.990498,0.990497
900,0.0116,0.050549,0.991248,0.991247
1000,0.0102,0.062491,0.988247,0.988246


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1106,0.056716,0.981745,0.981745
200,0.0496,0.047482,0.987247,0.987247
300,0.0355,0.037007,0.991748,0.991748
400,0.0317,0.035646,0.991998,0.991997
500,0.0345,0.039191,0.990498,0.990497
600,0.0213,0.031925,0.993248,0.993248
700,0.0222,0.041866,0.991748,0.991748
800,0.0168,0.044481,0.990498,0.990497
900,0.0116,0.050549,0.991248,0.991247
1000,0.0102,0.062491,0.988247,0.988246


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1002,0.050596,0.988997,0.988997
200,0.0618,0.042733,0.990498,0.990498
300,0.0392,0.039811,0.991998,0.991998
400,0.0419,0.044384,0.989497,0.989497
500,0.0319,0.035644,0.992498,0.992498
600,0.0279,0.037816,0.991748,0.991748
700,0.0146,0.047688,0.990498,0.990497
800,0.0211,0.057801,0.990498,0.990498
900,0.0186,0.040404,0.992998,0.992998
1000,0.0157,0.050855,0.992498,0.992498


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1002,0.050596,0.988997,0.988997
200,0.0618,0.042733,0.990498,0.990498
300,0.0392,0.039811,0.991998,0.991998
400,0.0419,0.044384,0.989497,0.989497
500,0.0319,0.035644,0.992498,0.992498
600,0.0279,0.037816,0.991748,0.991748
700,0.0146,0.047688,0.990498,0.990497
800,0.0211,0.057801,0.990498,0.990498
900,0.0186,0.040404,0.992998,0.992998
1000,0.0157,0.050855,0.992498,0.992498


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1002,0.050596,0.988997,0.988997
200,0.0618,0.042733,0.990498,0.990498
300,0.0392,0.039811,0.991998,0.991998
400,0.0419,0.044384,0.989497,0.989497
500,0.0319,0.035644,0.992498,0.992498
600,0.0279,0.037816,0.991748,0.991748
700,0.0146,0.047688,0.990498,0.990497
800,0.0211,0.057801,0.990498,0.990498
900,0.0186,0.040404,0.992998,0.992998
1000,0.0157,0.050855,0.992498,0.992498


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1035,0.056442,0.988497,0.988495
200,0.0521,0.051471,0.988747,0.988744
300,0.036,0.052772,0.990498,0.990495
400,0.0283,0.050286,0.987247,0.987243
500,0.0376,0.043151,0.989497,0.989495
600,0.022,0.062601,0.987997,0.987994
700,0.0232,0.045568,0.990748,0.990746
800,0.0214,0.060482,0.988247,0.988242
900,0.0122,0.055298,0.988997,0.988993
1000,0.015,0.055181,0.989747,0.989745


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1035,0.056442,0.988497,0.988495
200,0.0521,0.051471,0.988747,0.988744
300,0.036,0.052772,0.990498,0.990495
400,0.0283,0.050286,0.987247,0.987243
500,0.0376,0.043151,0.989497,0.989495
600,0.022,0.062601,0.987997,0.987994
700,0.0232,0.045568,0.990748,0.990746
800,0.0214,0.060482,0.988247,0.988242
900,0.0122,0.055298,0.988997,0.988993
1000,0.015,0.055181,0.989747,0.989745


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1035,0.056442,0.988497,0.988495
200,0.0521,0.051471,0.988747,0.988744
300,0.036,0.052772,0.990498,0.990495
400,0.0283,0.050286,0.987247,0.987243
500,0.0376,0.043151,0.989497,0.989495
600,0.022,0.062601,0.987997,0.987994
700,0.0232,0.045568,0.990748,0.990746
800,0.0214,0.060482,0.988247,0.988242
900,0.0122,0.055298,0.988997,0.988993
1000,0.015,0.055181,0.989747,0.989745


DatasetDict({
    train: Dataset({
        features: ['sentence', 'label'],
        num_rows: 15992
    })
    test: Dataset({
        features: ['sentence', 'label'],
        num_rows: 3999
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6439,0.557287,0.714179,0.70092
200,0.5412,0.513803,0.747937,0.747776
300,0.5176,0.514575,0.748187,0.741525
400,0.4939,0.508914,0.765441,0.7625
500,0.5085,0.530135,0.735934,0.733591
600,0.4671,0.492214,0.749687,0.749306
700,0.4635,0.459119,0.777694,0.775858
800,0.4506,0.473693,0.764191,0.757629
900,0.4317,0.476035,0.778195,0.776729
1000,0.4339,0.493139,0.768192,0.768191


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6439,0.557287,0.714179,0.70092
200,0.5412,0.513803,0.747937,0.747776
300,0.5176,0.514575,0.748187,0.741525
400,0.4939,0.508914,0.765441,0.7625
500,0.5085,0.530135,0.735934,0.733591
600,0.4671,0.492214,0.749687,0.749306
700,0.4635,0.459119,0.777694,0.775858
800,0.4506,0.473693,0.764191,0.757629
900,0.4317,0.476035,0.778195,0.776729
1000,0.4339,0.493139,0.768192,0.768191


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6439,0.557287,0.714179,0.70092
200,0.5412,0.513803,0.747937,0.747776
300,0.5176,0.514575,0.748187,0.741525
400,0.4939,0.508914,0.765441,0.7625
500,0.5085,0.530135,0.735934,0.733591
600,0.4671,0.492214,0.749687,0.749306
700,0.4635,0.459119,0.777694,0.775858
800,0.4506,0.473693,0.764191,0.757629
900,0.4317,0.476035,0.778195,0.776729
1000,0.4339,0.493139,0.768192,0.768191


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.7074,0.62296,0.707177,0.704035
200,0.5707,0.53907,0.735684,0.733093
300,0.5267,0.59093,0.669417,0.631748
400,0.5236,0.515548,0.743936,0.733122
500,0.4913,0.502277,0.755189,0.755059
600,0.478,0.52042,0.745936,0.745856
700,0.4696,0.49393,0.765691,0.760221
800,0.4749,0.480655,0.770443,0.768362
900,0.4501,0.500718,0.751938,0.740345
1000,0.4376,0.470337,0.771193,0.769546


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.7074,0.62296,0.707177,0.704035
200,0.5707,0.53907,0.735684,0.733093
300,0.5267,0.59093,0.669417,0.631748
400,0.5236,0.515548,0.743936,0.733122
500,0.4913,0.502277,0.755189,0.755059
600,0.478,0.52042,0.745936,0.745856
700,0.4696,0.49393,0.765691,0.760221
800,0.4749,0.480655,0.770443,0.768362
900,0.4501,0.500718,0.751938,0.740345
1000,0.4376,0.470337,0.771193,0.769546


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.7074,0.62296,0.707177,0.704035
200,0.5707,0.53907,0.735684,0.733093
300,0.5267,0.59093,0.669417,0.631748
400,0.5236,0.515548,0.743936,0.733122
500,0.4913,0.502277,0.755189,0.755059
600,0.478,0.52042,0.745936,0.745856
700,0.4696,0.49393,0.765691,0.760221
800,0.4749,0.480655,0.770443,0.768362
900,0.4501,0.500718,0.751938,0.740345
1000,0.4376,0.470337,0.771193,0.769546


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6844,0.532257,0.722431,0.721835
200,0.5516,0.521531,0.744186,0.73759
300,0.5132,0.475068,0.770943,0.769324
400,0.502,0.494128,0.769692,0.763324
500,0.4872,0.478423,0.771193,0.765062
600,0.4591,0.480891,0.763941,0.755454
700,0.4583,0.478293,0.776944,0.772032
800,0.4516,0.479665,0.782696,0.780295
900,0.4446,0.469253,0.781945,0.781488
1000,0.4455,0.490395,0.786697,0.785886


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6844,0.532257,0.722431,0.721835
200,0.5516,0.521531,0.744186,0.73759
300,0.5132,0.475068,0.770943,0.769324
400,0.502,0.494128,0.769692,0.763324
500,0.4872,0.478423,0.771193,0.765062
600,0.4591,0.480891,0.763941,0.755454
700,0.4583,0.478293,0.776944,0.772032
800,0.4516,0.479665,0.782696,0.780295
900,0.4446,0.469253,0.781945,0.781488
1000,0.4455,0.490395,0.786697,0.785886


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.6844,0.532257,0.722431,0.721835
200,0.5516,0.521531,0.744186,0.73759
300,0.5132,0.475068,0.770943,0.769324
400,0.502,0.494128,0.769692,0.763324
500,0.4872,0.478423,0.771193,0.765062
600,0.4591,0.480891,0.763941,0.755454
700,0.4583,0.478293,0.776944,0.772032
800,0.4516,0.479665,0.782696,0.780295
900,0.4446,0.469253,0.781945,0.781488
1000,0.4455,0.490395,0.786697,0.785886


DatasetDict({
    train: Dataset({
        features: ['sentence', 'label'],
        num_rows: 15992
    })
    test: Dataset({
        features: ['sentence', 'label'],
        num_rows: 3999
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1101,0.062189,0.980745,0.980745
200,0.0607,0.051226,0.986747,0.986747
300,0.0373,0.054705,0.985496,0.985496
400,0.0292,0.060293,0.985996,0.985995
500,0.0254,0.058478,0.985746,0.985746
600,0.0071,0.068893,0.987497,0.987496
700,0.0085,0.083124,0.987247,0.987247
800,0.0135,0.080574,0.987497,0.987497
900,0.0052,0.096703,0.986497,0.986497
1000,0.0058,0.08487,0.987747,0.987747


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1101,0.062189,0.980745,0.980745
200,0.0607,0.051226,0.986747,0.986747
300,0.0373,0.054705,0.985496,0.985496
400,0.0292,0.060293,0.985996,0.985995
500,0.0254,0.058478,0.985746,0.985746
600,0.0071,0.068893,0.987497,0.987496
700,0.0085,0.083124,0.987247,0.987247
800,0.0135,0.080574,0.987497,0.987497
900,0.0052,0.096703,0.986497,0.986497
1000,0.0058,0.08487,0.987747,0.987747


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1101,0.062189,0.980745,0.980745
200,0.0607,0.051226,0.986747,0.986747
300,0.0373,0.054705,0.985496,0.985496
400,0.0292,0.060293,0.985996,0.985995
500,0.0254,0.058478,0.985746,0.985746
600,0.0071,0.068893,0.987497,0.987496
700,0.0085,0.083124,0.987247,0.987247
800,0.0135,0.080574,0.987497,0.987497
900,0.0052,0.096703,0.986497,0.986497
1000,0.0058,0.08487,0.987747,0.987747


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.11,0.060269,0.983496,0.983496
200,0.0655,0.045239,0.987497,0.987496
300,0.044,0.053475,0.988247,0.988246
400,0.0284,0.05407,0.986747,0.986746
500,0.0269,0.042226,0.988997,0.988997
600,0.0121,0.059931,0.985246,0.985246
700,0.0085,0.075409,0.985746,0.985746
800,0.0062,0.06254,0.988997,0.988997
900,0.0064,0.069198,0.987997,0.987997
1000,0.005,0.07711,0.988997,0.988997


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.11,0.060269,0.983496,0.983496
200,0.0655,0.045239,0.987497,0.987496
300,0.044,0.053475,0.988247,0.988246
400,0.0284,0.05407,0.986747,0.986746
500,0.0269,0.042226,0.988997,0.988997
600,0.0121,0.059931,0.985246,0.985246
700,0.0085,0.075409,0.985746,0.985746
800,0.0062,0.06254,0.988997,0.988997
900,0.0064,0.069198,0.987997,0.987997
1000,0.005,0.07711,0.988997,0.988997


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.11,0.060269,0.983496,0.983496
200,0.0655,0.045239,0.987497,0.987496
300,0.044,0.053475,0.988247,0.988246
400,0.0284,0.05407,0.986747,0.986746
500,0.0269,0.042226,0.988997,0.988997
600,0.0121,0.059931,0.985246,0.985246
700,0.0085,0.075409,0.985746,0.985746
800,0.0062,0.06254,0.988997,0.988997
900,0.0064,0.069198,0.987997,0.987997
1000,0.005,0.07711,0.988997,0.988997


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1103,0.06218,0.981495,0.981489
200,0.0663,0.056309,0.983246,0.983238
300,0.0355,0.06426,0.983996,0.983995
400,0.0242,0.075328,0.984246,0.98424
500,0.0281,0.066252,0.981495,0.981494
600,0.0124,0.075097,0.985246,0.985243
700,0.0087,0.071546,0.986497,0.986492
800,0.0094,0.077641,0.984246,0.984239
900,0.0028,0.102637,0.981745,0.981733
1000,0.0064,0.100901,0.984246,0.984239


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1103,0.06218,0.981495,0.981489
200,0.0663,0.056309,0.983246,0.983238
300,0.0355,0.06426,0.983996,0.983995
400,0.0242,0.075328,0.984246,0.98424
500,0.0281,0.066252,0.981495,0.981494
600,0.0124,0.075097,0.985246,0.985243
700,0.0087,0.071546,0.986497,0.986492
800,0.0094,0.077641,0.984246,0.984239
900,0.0028,0.102637,0.981745,0.981733
1000,0.0064,0.100901,0.984246,0.984239


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1103,0.06218,0.981495,0.981489
200,0.0663,0.056309,0.983246,0.983238
300,0.0355,0.06426,0.983996,0.983995
400,0.0242,0.075328,0.984246,0.98424
500,0.0281,0.066252,0.981495,0.981494
600,0.0124,0.075097,0.985246,0.985243
700,0.0087,0.071546,0.986497,0.986492
800,0.0094,0.077641,0.984246,0.984239
900,0.0028,0.102637,0.981745,0.981733
1000,0.0064,0.100901,0.984246,0.984239


DatasetDict({
    train: Dataset({
        features: ['sentence', 'label'],
        num_rows: 15992
    })
    test: Dataset({
        features: ['sentence', 'label'],
        num_rows: 3999
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1086,0.061989,0.982496,0.982496
200,0.064,0.049419,0.986997,0.986996
300,0.0451,0.043262,0.986997,0.986996
400,0.0424,0.04115,0.989247,0.989247
500,0.0316,0.067833,0.985996,0.985996
600,0.0266,0.053497,0.987997,0.987997
700,0.0214,0.063412,0.987247,0.987246
800,0.0239,0.064205,0.987747,0.987747
900,0.0159,0.064247,0.989497,0.989497
1000,0.0194,0.074464,0.982246,0.982243


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1086,0.061989,0.982496,0.982496
200,0.064,0.049419,0.986997,0.986996
300,0.0451,0.043262,0.986997,0.986996
400,0.0424,0.04115,0.989247,0.989247
500,0.0316,0.067833,0.985996,0.985996
600,0.0266,0.053497,0.987997,0.987997
700,0.0214,0.063412,0.987247,0.987246
800,0.0239,0.064205,0.987747,0.987747
900,0.0159,0.064247,0.989497,0.989497
1000,0.0194,0.074464,0.982246,0.982243


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1086,0.061989,0.982496,0.982496
200,0.064,0.049419,0.986997,0.986996
300,0.0451,0.043262,0.986997,0.986996
400,0.0424,0.04115,0.989247,0.989247
500,0.0316,0.067833,0.985996,0.985996
600,0.0266,0.053497,0.987997,0.987997
700,0.0214,0.063412,0.987247,0.987246
800,0.0239,0.064205,0.987747,0.987747
900,0.0159,0.064247,0.989497,0.989497
1000,0.0194,0.074464,0.982246,0.982243


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1128,0.055652,0.985246,0.985246
200,0.066,0.054693,0.985746,0.985746
300,0.0515,0.044798,0.988997,0.988997
400,0.0416,0.046045,0.986997,0.986996
500,0.0364,0.040983,0.989247,0.989247
600,0.0306,0.053347,0.987247,0.987245
700,0.02,0.050705,0.989747,0.989747
800,0.0211,0.060937,0.988997,0.988997
900,0.0179,0.052165,0.989747,0.989747
1000,0.0142,0.074763,0.986747,0.986746


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1128,0.055652,0.985246,0.985246
200,0.066,0.054693,0.985746,0.985746
300,0.0515,0.044798,0.988997,0.988997
400,0.0416,0.046045,0.986997,0.986996
500,0.0364,0.040983,0.989247,0.989247
600,0.0306,0.053347,0.987247,0.987245
700,0.02,0.050705,0.989747,0.989747
800,0.0211,0.060937,0.988997,0.988997
900,0.0179,0.052165,0.989747,0.989747
1000,0.0142,0.074763,0.986747,0.986746


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1128,0.055652,0.985246,0.985246
200,0.066,0.054693,0.985746,0.985746
300,0.0515,0.044798,0.988997,0.988997
400,0.0416,0.046045,0.986997,0.986996
500,0.0364,0.040983,0.989247,0.989247
600,0.0306,0.053347,0.987247,0.987245
700,0.02,0.050705,0.989747,0.989747
800,0.0211,0.060937,0.988997,0.988997
900,0.0179,0.052165,0.989747,0.989747
1000,0.0142,0.074763,0.986747,0.986746


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1053,0.071781,0.982496,0.982492
200,0.0654,0.102828,0.976494,0.976494
300,0.0457,0.07948,0.983996,0.983988
400,0.0356,0.062415,0.982496,0.982492
500,0.0391,0.070771,0.982996,0.982995
600,0.0264,0.07031,0.984996,0.984993
700,0.0255,0.064445,0.986747,0.986743
800,0.0207,0.108764,0.978745,0.978723
900,0.0184,0.07293,0.983996,0.983987
1000,0.017,0.074583,0.984996,0.984989


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1053,0.071781,0.982496,0.982492
200,0.0654,0.102828,0.976494,0.976494
300,0.0457,0.07948,0.983996,0.983988
400,0.0356,0.062415,0.982496,0.982492
500,0.0391,0.070771,0.982996,0.982995
600,0.0264,0.07031,0.984996,0.984993
700,0.0255,0.064445,0.986747,0.986743
800,0.0207,0.108764,0.978745,0.978723
900,0.0184,0.07293,0.983996,0.983987
1000,0.017,0.074583,0.984996,0.984989


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1053,0.071781,0.982496,0.982492
200,0.0654,0.102828,0.976494,0.976494
300,0.0457,0.07948,0.983996,0.983988
400,0.0356,0.062415,0.982496,0.982492
500,0.0391,0.070771,0.982996,0.982995
600,0.0264,0.07031,0.984996,0.984993
700,0.0255,0.064445,0.986747,0.986743
800,0.0207,0.108764,0.978745,0.978723
900,0.0184,0.07293,0.983996,0.983987
1000,0.017,0.074583,0.984996,0.984989


DatasetDict({
    train: Dataset({
        features: ['sentence', 'label'],
        num_rows: 15992
    })
    test: Dataset({
        features: ['sentence', 'label'],
        num_rows: 3999
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1492,0.038755,0.990748,0.990747
200,0.049,0.039134,0.990998,0.990997
300,0.0373,0.02968,0.992248,0.992248
400,0.0293,0.036332,0.991248,0.991247
500,0.0337,0.039246,0.989997,0.989996
600,0.0201,0.030881,0.992248,0.992248
700,0.0148,0.049025,0.991498,0.991498
800,0.0144,0.039977,0.992248,0.992248
900,0.0069,0.045242,0.991748,0.991748
1000,0.0059,0.046736,0.989497,0.989497


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1492,0.038755,0.990748,0.990747
200,0.049,0.039134,0.990998,0.990997
300,0.0373,0.02968,0.992248,0.992248
400,0.0293,0.036332,0.991248,0.991247
500,0.0337,0.039246,0.989997,0.989996
600,0.0201,0.030881,0.992248,0.992248
700,0.0148,0.049025,0.991498,0.991498
800,0.0144,0.039977,0.992248,0.992248
900,0.0069,0.045242,0.991748,0.991748
1000,0.0059,0.046736,0.989497,0.989497


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1492,0.038755,0.990748,0.990747
200,0.049,0.039134,0.990998,0.990997
300,0.0373,0.02968,0.992248,0.992248
400,0.0293,0.036332,0.991248,0.991247
500,0.0337,0.039246,0.989997,0.989996
600,0.0201,0.030881,0.992248,0.992248
700,0.0148,0.049025,0.991498,0.991498
800,0.0144,0.039977,0.992248,0.992248
900,0.0069,0.045242,0.991748,0.991748
1000,0.0059,0.046736,0.989497,0.989497


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1336,0.052336,0.988497,0.988497
200,0.0583,0.039484,0.990248,0.990247
300,0.0399,0.04023,0.990998,0.990998
400,0.0377,0.042829,0.991998,0.991998
500,0.0296,0.045262,0.990748,0.990747
600,0.0271,0.046275,0.989497,0.989497
700,0.017,0.067399,0.985496,0.985496
800,0.0176,0.051085,0.989747,0.989747
900,0.0109,0.051626,0.990748,0.990748
1000,0.0143,0.060427,0.988997,0.988997


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1336,0.052336,0.988497,0.988497
200,0.0583,0.039484,0.990248,0.990247
300,0.0399,0.04023,0.990998,0.990998
400,0.0377,0.042829,0.991998,0.991998
500,0.0296,0.045262,0.990748,0.990747
600,0.0271,0.046275,0.989497,0.989497
700,0.017,0.067399,0.985496,0.985496
800,0.0176,0.051085,0.989747,0.989747
900,0.0109,0.051626,0.990748,0.990748
1000,0.0143,0.060427,0.988997,0.988997


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1336,0.052336,0.988497,0.988497
200,0.0583,0.039484,0.990248,0.990247
300,0.0399,0.04023,0.990998,0.990998
400,0.0377,0.042829,0.991998,0.991998
500,0.0296,0.045262,0.990748,0.990747
600,0.0271,0.046275,0.989497,0.989497
700,0.017,0.067399,0.985496,0.985496
800,0.0176,0.051085,0.989747,0.989747
900,0.0109,0.051626,0.990748,0.990748
1000,0.0143,0.060427,0.988997,0.988997


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1283,0.062239,0.984996,0.984989
200,0.0597,0.051773,0.985996,0.98599
300,0.0346,0.04352,0.990998,0.990996
400,0.0278,0.051806,0.990498,0.990496
500,0.0347,0.04852,0.989997,0.989996
600,0.0222,0.057067,0.989997,0.989996
700,0.0165,0.053241,0.988997,0.988994
800,0.0167,0.091018,0.980995,0.980981
900,0.0129,0.052545,0.991498,0.991496
1000,0.0104,0.066899,0.986497,0.986491


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1283,0.062239,0.984996,0.984989
200,0.0597,0.051773,0.985996,0.98599
300,0.0346,0.04352,0.990998,0.990996
400,0.0278,0.051806,0.990498,0.990496
500,0.0347,0.04852,0.989997,0.989996
600,0.0222,0.057067,0.989997,0.989996
700,0.0165,0.053241,0.988997,0.988994
800,0.0167,0.091018,0.980995,0.980981
900,0.0129,0.052545,0.991498,0.991496
1000,0.0104,0.066899,0.986497,0.986491


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.1283,0.062239,0.984996,0.984989
200,0.0597,0.051773,0.985996,0.98599
300,0.0346,0.04352,0.990998,0.990996
400,0.0278,0.051806,0.990498,0.990496
500,0.0347,0.04852,0.989997,0.989996
600,0.0222,0.057067,0.989997,0.989996
700,0.0165,0.053241,0.988997,0.988994
800,0.0167,0.091018,0.980995,0.980981
900,0.0129,0.052545,0.991498,0.991496
1000,0.0104,0.066899,0.986497,0.986491


DatasetDict({
    train: Dataset({
        features: ['sentence', 'label'],
        num_rows: 15992
    })
    test: Dataset({
        features: ['sentence', 'label'],
        num_rows: 3999
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4308,0.386736,0.817704,0.817704
200,0.3765,0.370518,0.84146,0.84093
300,0.3256,0.401469,0.843461,0.843454
400,0.2439,0.386555,0.843461,0.84265
500,0.2512,0.418995,0.836209,0.836172
600,0.1333,0.502631,0.834459,0.834253
700,0.1396,0.598013,0.828707,0.828663
800,0.1216,0.69441,0.830208,0.830187
900,0.0918,0.70684,0.835959,0.835846
1000,0.0967,0.769178,0.827207,0.826008


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4308,0.386736,0.817704,0.817704
200,0.3765,0.370518,0.84146,0.84093
300,0.3256,0.401469,0.843461,0.843454
400,0.2439,0.386555,0.843461,0.84265
500,0.2512,0.418995,0.836209,0.836172
600,0.1333,0.502631,0.834459,0.834253
700,0.1396,0.598013,0.828707,0.828663
800,0.1216,0.69441,0.830208,0.830187
900,0.0918,0.70684,0.835959,0.835846
1000,0.0967,0.769178,0.827207,0.826008


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4308,0.386736,0.817704,0.817704
200,0.3765,0.370518,0.84146,0.84093
300,0.3256,0.401469,0.843461,0.843454
400,0.2439,0.386555,0.843461,0.84265
500,0.2512,0.418995,0.836209,0.836172
600,0.1333,0.502631,0.834459,0.834253
700,0.1396,0.598013,0.828707,0.828663
800,0.1216,0.69441,0.830208,0.830187
900,0.0918,0.70684,0.835959,0.835846
1000,0.0967,0.769178,0.827207,0.826008


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4286,0.391824,0.823956,0.822186
200,0.3777,0.392781,0.821705,0.821359
300,0.3001,0.409247,0.829207,0.828621
400,0.2551,0.407711,0.824456,0.823388
500,0.2454,0.438606,0.821955,0.821652
600,0.1409,0.618501,0.823206,0.822823
700,0.1374,0.609169,0.824456,0.823739
800,0.1132,0.746261,0.827957,0.827685
900,0.0965,0.739006,0.817454,0.817338
1000,0.1006,0.785655,0.820955,0.820474


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4286,0.391824,0.823956,0.822186
200,0.3777,0.392781,0.821705,0.821359
300,0.3001,0.409247,0.829207,0.828621
400,0.2551,0.407711,0.824456,0.823388
500,0.2454,0.438606,0.821955,0.821652
600,0.1409,0.618501,0.823206,0.822823
700,0.1374,0.609169,0.824456,0.823739
800,0.1132,0.746261,0.827957,0.827685
900,0.0965,0.739006,0.817454,0.817338
1000,0.1006,0.785655,0.820955,0.820474


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4286,0.391824,0.823956,0.822186
200,0.3777,0.392781,0.821705,0.821359
300,0.3001,0.409247,0.829207,0.828621
400,0.2551,0.407711,0.824456,0.823388
500,0.2454,0.438606,0.821955,0.821652
600,0.1409,0.618501,0.823206,0.822823
700,0.1374,0.609169,0.824456,0.823739
800,0.1132,0.746261,0.827957,0.827685
900,0.0965,0.739006,0.817454,0.817338
1000,0.1006,0.785655,0.820955,0.820474


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4107,0.38376,0.830708,0.830341
200,0.3948,0.371048,0.837709,0.837217
300,0.3122,0.437873,0.833208,0.832847
400,0.2538,0.421026,0.830708,0.828819
500,0.238,0.429124,0.832208,0.831233
600,0.1354,0.639273,0.805451,0.805285
700,0.137,0.626983,0.822206,0.822201
800,0.1153,0.762388,0.787447,0.787401
900,0.0901,0.788049,0.820955,0.820041
1000,0.1005,0.784352,0.824456,0.823739


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4107,0.38376,0.830708,0.830341
200,0.3948,0.371048,0.837709,0.837217
300,0.3122,0.437873,0.833208,0.832847
400,0.2538,0.421026,0.830708,0.828819
500,0.238,0.429124,0.832208,0.831233
600,0.1354,0.639273,0.805451,0.805285
700,0.137,0.626983,0.822206,0.822201
800,0.1153,0.762388,0.787447,0.787401
900,0.0901,0.788049,0.820955,0.820041
1000,0.1005,0.784352,0.824456,0.823739


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15992 [00:00<?, ? examples/s]

Map:   0%|          | 0/3999 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.4107,0.38376,0.830708,0.830341
200,0.3948,0.371048,0.837709,0.837217
300,0.3122,0.437873,0.833208,0.832847
400,0.2538,0.421026,0.830708,0.828819
500,0.238,0.429124,0.832208,0.831233
600,0.1354,0.639273,0.805451,0.805285
700,0.137,0.626983,0.822206,0.822201
800,0.1153,0.762388,0.787447,0.787401
900,0.0901,0.788049,0.820955,0.820041
1000,0.1005,0.784352,0.824456,0.823739


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.990998,0.990997
3,alger-ia/dziribert,0.988997,0.988997
6,faisalq/EgyBERT,0.993498,0.993498
8,faisalq/SaudiBERT,0.993248,0.993248
11,otmangi/MorRoBERTa,0.993498,0.993498
14,otmangi/MorrBERT,0.843461,0.843454
17,tunis-ai/TunBERT,0.786697,0.785886
