In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'MSAC_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')



dataset = load_dataset('AbderrahmanSkiredj1/MSAC_darija_sentiment_analysis')
df = pd.DataFrame(dataset['train']) 

      
display(df.columns)
display(len(df))
display(df[:4])

df['label'] = df['label'].replace(',ne', 'neg')


classes = set(df['label'].values)
display(classes)

c = df['label'].value_counts()
display(c)

df['label'] = df['label'].astype('category')
df['label'] = df['label'].cat.codes

df = df[['text', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))


# ds = Dataset.from_pandas(df)
# ds = ds.train_test_split(test_size=0.2)

# display(ds)

max_sequence_length = 128


models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]

seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
    
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['text'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True) # , batched=True
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)  # , batched=True
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 25
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 20, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 20
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-17 04:53:32.046391: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-17 04:53:32.072230: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Found cached dataset parquet (/home/ffq/.cache/huggingface/datasets/AbderrahmanSkiredj1___parquet/AbderrahmanSkiredj1--MSAC_darija_sentiment_analysis-78ec287cfc3da3ad/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/1 [00:00<?, ?it/s]

Index(['text', 'label'], dtype='object')

2000

Unnamed: 0,text,label
0,'طوال حياتي لم المس اي تغير حتى قدمت هذه الحكومة فل نقف بجانها بصوتنا',pos
1,'منتوج رائع وثمن مناسب ....جميل',pos
2,'كلنا ابن كيران لمتافق معايا يدير جيم',pos
3,'وفقك الله لولاية اخرى حقاش مكينش محسن منك',pos


{'neg', 'pos'}

label
pos    1000
neg    1000
Name: count, dtype: int64

2

2000

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6711,0.623361,0.8875,0.887189
40,0.5151,0.42565,0.9,0.898001
60,0.3526,0.32046,0.9175,0.916345
80,0.2484,0.285425,0.9175,0.916026
100,0.1998,0.260188,0.915,0.913651
120,0.1449,0.276651,0.905,0.904762
140,0.112,0.257271,0.925,0.924517
160,0.0788,0.271034,0.9175,0.916788
180,0.0743,0.40838,0.8775,0.877438
200,0.0684,0.283268,0.9125,0.911275


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6734,0.629427,0.8825,0.882287
40,0.5276,0.446657,0.8925,0.889203
60,0.3658,0.335914,0.9125,0.911054
80,0.2384,0.261721,0.9275,0.927003
100,0.1806,0.258179,0.9225,0.921594
120,0.1405,0.25709,0.925,0.924081
140,0.1114,0.279988,0.9125,0.9121
160,0.0822,0.287968,0.92,0.919547
180,0.0672,0.301588,0.92,0.919547
200,0.052,0.354486,0.905,0.904712


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6734,0.629427,0.8825,0.882287
40,0.5276,0.446657,0.8925,0.889203
60,0.3658,0.335914,0.9125,0.911054
80,0.2384,0.261721,0.9275,0.927003
100,0.1806,0.258179,0.9225,0.921594
120,0.1405,0.25709,0.925,0.924081
140,0.1114,0.279988,0.9125,0.9121
160,0.0822,0.287968,0.92,0.919547
180,0.0672,0.301588,0.92,0.919547
200,0.052,0.354486,0.905,0.904712


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6718,0.618632,0.8675,0.86726
40,0.5216,0.419557,0.9125,0.912038
60,0.3556,0.341722,0.905,0.903941
80,0.2603,0.284746,0.92,0.919547
100,0.1927,0.263112,0.9225,0.922196
120,0.1328,0.321021,0.9,0.89991
140,0.1305,0.282505,0.915,0.914519
160,0.0958,0.300322,0.9125,0.9121
180,0.0809,0.322264,0.9075,0.907291
200,0.0555,0.384721,0.8975,0.897392


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6718,0.618632,0.8675,0.86726
40,0.5216,0.419557,0.9125,0.912038
60,0.3556,0.341722,0.905,0.903941
80,0.2603,0.284746,0.92,0.919547
100,0.1927,0.263112,0.9225,0.922196
120,0.1328,0.321021,0.9,0.89991
140,0.1305,0.282505,0.915,0.914519
160,0.0958,0.300322,0.9125,0.9121
180,0.0809,0.322264,0.9075,0.907291
200,0.0555,0.384721,0.8975,0.897392


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6718,0.618632,0.8675,0.86726
40,0.5216,0.419557,0.9125,0.912038
60,0.3556,0.341722,0.905,0.903941
80,0.2603,0.284746,0.92,0.919547
100,0.1927,0.263112,0.9225,0.922196
120,0.1328,0.321021,0.9,0.89991
140,0.1305,0.282505,0.915,0.914519
160,0.0958,0.300322,0.9125,0.9121
180,0.0809,0.322264,0.9075,0.907291
200,0.0555,0.384721,0.8975,0.897392


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6759,0.626273,0.8775,0.876853
40,0.5359,0.420074,0.9225,0.922196
60,0.3643,0.311947,0.9275,0.927062
80,0.2648,0.260896,0.93,0.929788
100,0.1799,0.272641,0.92,0.91995
120,0.1305,0.247837,0.9275,0.9273
140,0.1225,0.291791,0.91,0.909998
160,0.1012,0.30008,0.915,0.914991
180,0.089,0.278332,0.92,0.919757
200,0.0763,0.286613,0.92,0.919711


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6759,0.626273,0.8775,0.876853
40,0.5359,0.420074,0.9225,0.922196
60,0.3643,0.311947,0.9275,0.927062
80,0.2648,0.260896,0.93,0.929788
100,0.1799,0.272641,0.92,0.91995
120,0.1305,0.247837,0.9275,0.9273
140,0.1225,0.291791,0.91,0.909998
160,0.1012,0.30008,0.915,0.914991
180,0.089,0.278332,0.92,0.919757
200,0.0763,0.286613,0.92,0.919711


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6759,0.626273,0.8775,0.876853
40,0.5359,0.420074,0.9225,0.922196
60,0.3643,0.311947,0.9275,0.927062
80,0.2648,0.260896,0.93,0.929788
100,0.1799,0.272641,0.92,0.91995
120,0.1305,0.247837,0.9275,0.9273
140,0.1225,0.291791,0.91,0.909998
160,0.1012,0.30008,0.915,0.914991
180,0.089,0.278332,0.92,0.919757
200,0.0763,0.286613,0.92,0.919711


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3357,0.282074,0.91,0.909345
40,0.1502,0.29263,0.9125,0.911825
60,0.0722,0.353163,0.9125,0.911971
80,0.0458,0.374537,0.9125,0.911275
100,0.0179,0.439981,0.91,0.909919
120,0.0095,0.492295,0.9175,0.917001
140,0.0342,0.619916,0.9075,0.905971
160,0.0135,0.519317,0.9125,0.911825
180,0.0015,0.533843,0.91,0.90942
200,0.0002,0.580735,0.9125,0.912038


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3357,0.282074,0.91,0.909345
40,0.1502,0.29263,0.9125,0.911825
60,0.0722,0.353163,0.9125,0.911971
80,0.0458,0.374537,0.9125,0.911275
100,0.0179,0.439981,0.91,0.909919
120,0.0095,0.492295,0.9175,0.917001
140,0.0342,0.619916,0.9075,0.905971
160,0.0135,0.519317,0.9125,0.911825
180,0.0015,0.533843,0.91,0.90942
200,0.0002,0.580735,0.9125,0.912038


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3357,0.282074,0.91,0.909345
40,0.1502,0.29263,0.9125,0.911825
60,0.0722,0.353163,0.9125,0.911971
80,0.0458,0.374537,0.9125,0.911275
100,0.0179,0.439981,0.91,0.909919
120,0.0095,0.492295,0.9175,0.917001
140,0.0342,0.619916,0.9075,0.905971
160,0.0135,0.519317,0.9125,0.911825
180,0.0015,0.533843,0.91,0.90942
200,0.0002,0.580735,0.9125,0.912038


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3759,0.280129,0.8975,0.895806
40,0.1668,0.360078,0.8775,0.877481
60,0.0951,0.276195,0.9275,0.927168
80,0.0481,0.254283,0.93,0.929604
100,0.0166,0.390551,0.91,0.90989
120,0.006,0.430523,0.9175,0.917226
140,0.0079,0.434293,0.92,0.919711
160,0.0058,0.482077,0.91,0.909919
180,0.0042,0.424801,0.9275,0.927003
200,0.001,0.480772,0.92,0.919547


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3759,0.280129,0.8975,0.895806
40,0.1668,0.360078,0.8775,0.877481
60,0.0951,0.276195,0.9275,0.927168
80,0.0481,0.254283,0.93,0.929604
100,0.0166,0.390551,0.91,0.90989
120,0.006,0.430523,0.9175,0.917226
140,0.0079,0.434293,0.92,0.919711
160,0.0058,0.482077,0.91,0.909919
180,0.0042,0.424801,0.9275,0.927003
200,0.001,0.480772,0.92,0.919547


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3759,0.280129,0.8975,0.895806
40,0.1668,0.360078,0.8775,0.877481
60,0.0951,0.276195,0.9275,0.927168
80,0.0481,0.254283,0.93,0.929604
100,0.0166,0.390551,0.91,0.90989
120,0.006,0.430523,0.9175,0.917226
140,0.0079,0.434293,0.92,0.919711
160,0.0058,0.482077,0.91,0.909919
180,0.0042,0.424801,0.9275,0.927003
200,0.001,0.480772,0.92,0.919547


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3777,0.172619,0.945,0.944646
40,0.1623,0.207973,0.93,0.929549
60,0.0801,0.219208,0.94,0.939946
80,0.0735,0.40802,0.915,0.914947
100,0.0341,0.236104,0.945,0.944767
120,0.0156,0.336589,0.9325,0.932449
140,0.0021,0.383747,0.9325,0.932479
160,0.0098,0.326952,0.935,0.934959
180,0.0044,0.326683,0.9475,0.947426
200,0.0017,0.346037,0.9425,0.942439


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3777,0.172619,0.945,0.944646
40,0.1623,0.207973,0.93,0.929549
60,0.0801,0.219208,0.94,0.939946
80,0.0735,0.40802,0.915,0.914947
100,0.0341,0.236104,0.945,0.944767
120,0.0156,0.336589,0.9325,0.932449
140,0.0021,0.383747,0.9325,0.932479
160,0.0098,0.326952,0.935,0.934959
180,0.0044,0.326683,0.9475,0.947426
200,0.0017,0.346037,0.9425,0.942439


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3777,0.172619,0.945,0.944646
40,0.1623,0.207973,0.93,0.929549
60,0.0801,0.219208,0.94,0.939946
80,0.0735,0.40802,0.915,0.914947
100,0.0341,0.236104,0.945,0.944767
120,0.0156,0.336589,0.9325,0.932449
140,0.0021,0.383747,0.9325,0.932479
160,0.0098,0.326952,0.935,0.934959
180,0.0044,0.326683,0.9475,0.947426
200,0.0017,0.346037,0.9425,0.942439


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.8127,0.846562,0.46,0.315068
40,0.7156,0.689436,0.54,0.350649
60,0.7011,0.705453,0.46,0.315068
80,0.6995,0.721399,0.6,0.554367
100,0.6506,0.7115,0.57,0.548129
120,0.6292,0.702778,0.62,0.612946
140,0.6128,0.651643,0.655,0.653536
160,0.5938,0.667141,0.63,0.620279
180,0.5567,0.653534,0.6625,0.66138
200,0.5328,0.703871,0.575,0.54023


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.8127,0.846562,0.46,0.315068
40,0.7156,0.689436,0.54,0.350649
60,0.7011,0.705453,0.46,0.315068
80,0.6995,0.721399,0.6,0.554367
100,0.6506,0.7115,0.57,0.548129
120,0.6292,0.702778,0.62,0.612946
140,0.6128,0.651643,0.655,0.653536
160,0.5938,0.667141,0.63,0.620279
180,0.5567,0.653534,0.6625,0.66138
200,0.5328,0.703871,0.575,0.54023


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.8127,0.846562,0.46,0.315068
40,0.7156,0.689436,0.54,0.350649
60,0.7011,0.705453,0.46,0.315068
80,0.6995,0.721399,0.6,0.554367
100,0.6506,0.7115,0.57,0.548129
120,0.6292,0.702778,0.62,0.612946
140,0.6128,0.651643,0.655,0.653536
160,0.5938,0.667141,0.63,0.620279
180,0.5567,0.653534,0.6625,0.66138
200,0.5328,0.703871,0.575,0.54023


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.8179,0.691453,0.6175,0.566667
40,0.6991,0.725612,0.4675,0.318569
60,0.7069,0.627602,0.6525,0.646758
80,0.6617,0.616834,0.6725,0.672252
100,0.6605,0.63721,0.6575,0.648211
120,0.6564,0.585336,0.7,0.693095
140,0.6668,0.603579,0.6975,0.692501
160,0.5712,0.588413,0.725,0.724828
180,0.5995,0.585744,0.725,0.721632
200,0.6213,0.572735,0.715,0.710836


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.8179,0.691453,0.6175,0.566667
40,0.6991,0.725612,0.4675,0.318569
60,0.7069,0.627602,0.6525,0.646758
80,0.6617,0.616834,0.6725,0.672252
100,0.6605,0.63721,0.6575,0.648211
120,0.6564,0.585336,0.7,0.693095
140,0.6668,0.603579,0.6975,0.692501
160,0.5712,0.588413,0.725,0.724828
180,0.5995,0.585744,0.725,0.721632
200,0.6213,0.572735,0.715,0.710836


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.8179,0.691453,0.6175,0.566667
40,0.6991,0.725612,0.4675,0.318569
60,0.7069,0.627602,0.6525,0.646758
80,0.6617,0.616834,0.6725,0.672252
100,0.6605,0.63721,0.6575,0.648211
120,0.6564,0.585336,0.7,0.693095
140,0.6668,0.603579,0.6975,0.692501
160,0.5712,0.588413,0.725,0.724828
180,0.5995,0.585744,0.725,0.721632
200,0.6213,0.572735,0.715,0.710836


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.9377,0.697587,0.5225,0.343186
40,0.7095,0.70604,0.4775,0.323181
60,0.6984,0.70855,0.5225,0.343186
80,0.7065,0.695698,0.4775,0.323181
100,0.6957,0.692885,0.53,0.360348
120,0.6961,0.692822,0.6125,0.589336
140,0.6953,0.691337,0.5225,0.343186
160,0.7019,0.708181,0.4775,0.323181
180,0.6741,0.657917,0.615,0.579648
200,0.6685,0.660195,0.6875,0.687404


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.9377,0.697587,0.5225,0.343186
40,0.7095,0.70604,0.4775,0.323181
60,0.6984,0.70855,0.5225,0.343186
80,0.7065,0.695698,0.4775,0.323181
100,0.6957,0.692885,0.53,0.360348
120,0.6961,0.692822,0.6125,0.589336
140,0.6953,0.691337,0.5225,0.343186
160,0.7019,0.708181,0.4775,0.323181
180,0.6741,0.657917,0.615,0.579648
200,0.6685,0.660195,0.6875,0.687404


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.9377,0.697587,0.5225,0.343186
40,0.7095,0.70604,0.4775,0.323181
60,0.6984,0.70855,0.5225,0.343186
80,0.7065,0.695698,0.4775,0.323181
100,0.6957,0.692885,0.53,0.360348
120,0.6961,0.692822,0.6125,0.589336
140,0.6953,0.691337,0.5225,0.343186
160,0.7019,0.708181,0.4775,0.323181
180,0.6741,0.657917,0.615,0.579648
200,0.6685,0.660195,0.6875,0.687404


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3514,0.310417,0.885,0.882353
40,0.1537,0.277965,0.8975,0.896302
60,0.0729,0.344863,0.895,0.894483
80,0.0299,0.46286,0.905,0.904657
100,0.0097,0.559397,0.885,0.884712
120,0.0108,0.608482,0.89,0.888653
140,0.0047,0.639634,0.8825,0.881695
160,0.0003,0.680051,0.88,0.879127
180,0.0001,0.71729,0.8825,0.881372
200,0.0051,0.693762,0.8875,0.887059


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3514,0.310417,0.885,0.882353
40,0.1537,0.277965,0.8975,0.896302
60,0.0729,0.344863,0.895,0.894483
80,0.0299,0.46286,0.905,0.904657
100,0.0097,0.559397,0.885,0.884712
120,0.0108,0.608482,0.89,0.888653
140,0.0047,0.639634,0.8825,0.881695
160,0.0003,0.680051,0.88,0.879127
180,0.0001,0.71729,0.8825,0.881372
200,0.0051,0.693762,0.8875,0.887059


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3514,0.310417,0.885,0.882353
40,0.1537,0.277965,0.8975,0.896302
60,0.0729,0.344863,0.895,0.894483
80,0.0299,0.46286,0.905,0.904657
100,0.0097,0.559397,0.885,0.884712
120,0.0108,0.608482,0.89,0.888653
140,0.0047,0.639634,0.8825,0.881695
160,0.0003,0.680051,0.88,0.879127
180,0.0001,0.71729,0.8825,0.881372
200,0.0051,0.693762,0.8875,0.887059


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3858,0.30878,0.885,0.883953
40,0.1462,0.366244,0.88,0.878907
60,0.096,0.3963,0.8875,0.886185
80,0.0361,0.50791,0.88,0.879566
100,0.0162,0.499842,0.895,0.894324
120,0.0065,0.605711,0.8925,0.892143
140,0.0026,0.650772,0.8925,0.891932
160,0.0009,0.665048,0.8975,0.896958
180,0.0002,0.68394,0.8975,0.896958
200,0.0001,0.700376,0.8975,0.896958


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3858,0.30878,0.885,0.883953
40,0.1462,0.366244,0.88,0.878907
60,0.096,0.3963,0.8875,0.886185
80,0.0361,0.50791,0.88,0.879566
100,0.0162,0.499842,0.895,0.894324
120,0.0065,0.605711,0.8925,0.892143
140,0.0026,0.650772,0.8925,0.891932
160,0.0009,0.665048,0.8975,0.896958
180,0.0002,0.68394,0.8975,0.896958
200,0.0001,0.700376,0.8975,0.896958


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3858,0.30878,0.885,0.883953
40,0.1462,0.366244,0.88,0.878907
60,0.096,0.3963,0.8875,0.886185
80,0.0361,0.50791,0.88,0.879566
100,0.0162,0.499842,0.895,0.894324
120,0.0065,0.605711,0.8925,0.892143
140,0.0026,0.650772,0.8925,0.891932
160,0.0009,0.665048,0.8975,0.896958
180,0.0002,0.68394,0.8975,0.896958
200,0.0001,0.700376,0.8975,0.896958


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3685,0.231529,0.9175,0.917122
40,0.1673,0.310858,0.9,0.899997
60,0.0554,0.303344,0.9125,0.912499
80,0.0553,0.36526,0.9025,0.902324
100,0.0147,0.394112,0.9075,0.906941
120,0.0016,0.494054,0.8975,0.897217
140,0.0021,0.560415,0.9025,0.902363
160,0.001,0.708867,0.895,0.894997
180,0.0002,0.555697,0.9075,0.906786
200,0.0002,0.701258,0.8975,0.897484


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3685,0.231529,0.9175,0.917122
40,0.1673,0.310858,0.9,0.899997
60,0.0554,0.303344,0.9125,0.912499
80,0.0553,0.36526,0.9025,0.902324
100,0.0147,0.394112,0.9075,0.906941
120,0.0016,0.494054,0.8975,0.897217
140,0.0021,0.560415,0.9025,0.902363
160,0.001,0.708867,0.895,0.894997
180,0.0002,0.555697,0.9075,0.906786
200,0.0002,0.701258,0.8975,0.897484


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3685,0.231529,0.9175,0.917122
40,0.1673,0.310858,0.9,0.899997
60,0.0554,0.303344,0.9125,0.912499
80,0.0553,0.36526,0.9025,0.902324
100,0.0147,0.394112,0.9075,0.906941
120,0.0016,0.494054,0.8975,0.897217
140,0.0021,0.560415,0.9025,0.902363
160,0.001,0.708867,0.895,0.894997
180,0.0002,0.555697,0.9075,0.906786
200,0.0002,0.701258,0.8975,0.897484


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4144,0.351708,0.88,0.879491
40,0.2044,0.392982,0.8775,0.876199
60,0.1393,0.36532,0.8875,0.886305
80,0.047,0.549021,0.8775,0.876443
100,0.0316,0.572549,0.8775,0.875311
120,0.0275,0.530476,0.875,0.873016
140,0.0173,0.679643,0.895,0.894044
160,0.007,0.66789,0.88,0.878247
180,0.0005,0.733595,0.8875,0.886058
200,0.0046,0.76496,0.8825,0.88071


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4144,0.351708,0.88,0.879491
40,0.2044,0.392982,0.8775,0.876199
60,0.1393,0.36532,0.8875,0.886305
80,0.047,0.549021,0.8775,0.876443
100,0.0316,0.572549,0.8775,0.875311
120,0.0275,0.530476,0.875,0.873016
140,0.0173,0.679643,0.895,0.894044
160,0.007,0.66789,0.88,0.878247
180,0.0005,0.733595,0.8875,0.886058
200,0.0046,0.76496,0.8825,0.88071


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4144,0.351708,0.88,0.879491
40,0.2044,0.392982,0.8775,0.876199
60,0.1393,0.36532,0.8875,0.886305
80,0.047,0.549021,0.8775,0.876443
100,0.0316,0.572549,0.8775,0.875311
120,0.0275,0.530476,0.875,0.873016
140,0.0173,0.679643,0.895,0.894044
160,0.007,0.66789,0.88,0.878247
180,0.0005,0.733595,0.8875,0.886058
200,0.0046,0.76496,0.8825,0.88071


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5186,0.573056,0.79,0.775395
40,0.2698,0.359069,0.8775,0.87737
60,0.175,0.341114,0.89,0.889901
80,0.1003,0.423064,0.89,0.889102
100,0.0564,0.580996,0.86,0.859996
120,0.0355,0.519589,0.8925,0.892078
140,0.0114,0.634894,0.8875,0.887189
160,0.0264,0.517325,0.8875,0.887296
180,0.0032,0.742547,0.875,0.873469
200,0.0076,0.648559,0.89,0.889458


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5186,0.573056,0.79,0.775395
40,0.2698,0.359069,0.8775,0.87737
60,0.175,0.341114,0.89,0.889901
80,0.1003,0.423064,0.89,0.889102
100,0.0564,0.580996,0.86,0.859996
120,0.0355,0.519589,0.8925,0.892078
140,0.0114,0.634894,0.8875,0.887189
160,0.0264,0.517325,0.8875,0.887296
180,0.0032,0.742547,0.875,0.873469
200,0.0076,0.648559,0.89,0.889458


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5186,0.573056,0.79,0.775395
40,0.2698,0.359069,0.8775,0.87737
60,0.175,0.341114,0.89,0.889901
80,0.1003,0.423064,0.89,0.889102
100,0.0564,0.580996,0.86,0.859996
120,0.0355,0.519589,0.8925,0.892078
140,0.0114,0.634894,0.8875,0.887189
160,0.0264,0.517325,0.8875,0.887296
180,0.0032,0.742547,0.875,0.873469
200,0.0076,0.648559,0.89,0.889458


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4456,0.293966,0.885,0.884997
40,0.246,0.260554,0.9075,0.907402
60,0.157,0.314758,0.8775,0.877462
80,0.0989,0.34136,0.8975,0.896302
100,0.0719,0.321921,0.915,0.914864
120,0.0178,0.424079,0.915,0.914828
140,0.0134,0.541909,0.9025,0.902118
160,0.0064,0.630132,0.895,0.894871
180,0.0039,0.674232,0.905,0.904463
200,0.0005,0.720992,0.9,0.899977


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4456,0.293966,0.885,0.884997
40,0.246,0.260554,0.9075,0.907402
60,0.157,0.314758,0.8775,0.877462
80,0.0989,0.34136,0.8975,0.896302
100,0.0719,0.321921,0.915,0.914864
120,0.0178,0.424079,0.915,0.914828
140,0.0134,0.541909,0.9025,0.902118
160,0.0064,0.630132,0.895,0.894871
180,0.0039,0.674232,0.905,0.904463
200,0.0005,0.720992,0.9,0.899977


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4456,0.293966,0.885,0.884997
40,0.246,0.260554,0.9075,0.907402
60,0.157,0.314758,0.8775,0.877462
80,0.0989,0.34136,0.8975,0.896302
100,0.0719,0.321921,0.915,0.914864
120,0.0178,0.424079,0.915,0.914828
140,0.0134,0.541909,0.9025,0.902118
160,0.0064,0.630132,0.895,0.894871
180,0.0039,0.674232,0.905,0.904463
200,0.0005,0.720992,0.9,0.899977


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4234,0.375146,0.8625,0.86104
40,0.1914,0.353061,0.8775,0.87593
60,0.1346,0.415203,0.8625,0.861439
80,0.0492,0.565761,0.8675,0.866228
100,0.0284,0.767492,0.8675,0.865132
120,0.0144,0.774728,0.865,0.863495
140,0.005,0.839078,0.8625,0.860892
160,0.0041,0.898217,0.8725,0.872077
180,0.0083,0.9088,0.8675,0.865951
200,0.0002,0.958374,0.8625,0.861773


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4234,0.375146,0.8625,0.86104
40,0.1914,0.353061,0.8775,0.87593
60,0.1346,0.415203,0.8625,0.861439
80,0.0492,0.565761,0.8675,0.866228
100,0.0284,0.767492,0.8675,0.865132
120,0.0144,0.774728,0.865,0.863495
140,0.005,0.839078,0.8625,0.860892
160,0.0041,0.898217,0.8725,0.872077
180,0.0083,0.9088,0.8675,0.865951
200,0.0002,0.958374,0.8625,0.861773


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4234,0.375146,0.8625,0.86104
40,0.1914,0.353061,0.8775,0.87593
60,0.1346,0.415203,0.8625,0.861439
80,0.0492,0.565761,0.8675,0.866228
100,0.0284,0.767492,0.8675,0.865132
120,0.0144,0.774728,0.865,0.863495
140,0.005,0.839078,0.8625,0.860892
160,0.0041,0.898217,0.8725,0.872077
180,0.0083,0.9088,0.8675,0.865951
200,0.0002,0.958374,0.8625,0.861773


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4423,0.325336,0.885,0.884163
40,0.1971,0.374105,0.87,0.869736
60,0.1173,0.382365,0.89,0.888653
80,0.0527,0.535645,0.88,0.879636
100,0.0291,0.558257,0.8925,0.892008
120,0.0117,0.647145,0.885,0.884349
140,0.007,0.726268,0.88,0.878392
160,0.0093,0.775977,0.88,0.878095
180,0.0021,0.773699,0.8825,0.88071
200,0.0124,0.757702,0.9025,0.901748


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4423,0.325336,0.885,0.884163
40,0.1971,0.374105,0.87,0.869736
60,0.1173,0.382365,0.89,0.888653
80,0.0527,0.535645,0.88,0.879636
100,0.0291,0.558257,0.8925,0.892008
120,0.0117,0.647145,0.885,0.884349
140,0.007,0.726268,0.88,0.878392
160,0.0093,0.775977,0.88,0.878095
180,0.0021,0.773699,0.8825,0.88071
200,0.0124,0.757702,0.9025,0.901748


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4423,0.325336,0.885,0.884163
40,0.1971,0.374105,0.87,0.869736
60,0.1173,0.382365,0.89,0.888653
80,0.0527,0.535645,0.88,0.879636
100,0.0291,0.558257,0.8925,0.892008
120,0.0117,0.647145,0.885,0.884349
140,0.007,0.726268,0.88,0.878392
160,0.0093,0.775977,0.88,0.878095
180,0.0021,0.773699,0.8825,0.88071
200,0.0124,0.757702,0.9025,0.901748


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4329,0.29444,0.8725,0.872077
40,0.2353,0.277948,0.8975,0.896516
60,0.0891,0.319238,0.8975,0.896958
80,0.0598,0.34868,0.9075,0.907291
100,0.0255,0.436487,0.9,0.89984
120,0.009,0.525227,0.9075,0.907011
140,0.0038,0.606076,0.895,0.894554
160,0.01,0.615652,0.9025,0.90228
180,0.0002,0.639148,0.9,0.899639
200,0.0002,0.662997,0.8975,0.897356


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4329,0.29444,0.8725,0.872077
40,0.2353,0.277948,0.8975,0.896516
60,0.0891,0.319238,0.8975,0.896958
80,0.0598,0.34868,0.9075,0.907291
100,0.0255,0.436487,0.9,0.89984
120,0.009,0.525227,0.9075,0.907011
140,0.0038,0.606076,0.895,0.894554
160,0.01,0.615652,0.9025,0.90228
180,0.0002,0.639148,0.9,0.899639
200,0.0002,0.662997,0.8975,0.897356


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4329,0.29444,0.8725,0.872077
40,0.2353,0.277948,0.8975,0.896516
60,0.0891,0.319238,0.8975,0.896958
80,0.0598,0.34868,0.9075,0.907291
100,0.0255,0.436487,0.9,0.89984
120,0.009,0.525227,0.9075,0.907011
140,0.0038,0.606076,0.895,0.894554
160,0.01,0.615652,0.9025,0.90228
180,0.0002,0.639148,0.9,0.899639
200,0.0002,0.662997,0.8975,0.897356


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3888,0.430432,0.8325,0.832415
40,0.188,0.423666,0.86,0.859716
60,0.1117,0.479348,0.8725,0.87232
80,0.061,0.51918,0.875,0.874293
100,0.0278,0.636171,0.875,0.874746
120,0.0133,0.687853,0.8675,0.8668
140,0.0065,0.735141,0.8825,0.882175
160,0.0105,0.770969,0.8825,0.88179
180,0.0051,0.883838,0.87,0.869841
200,0.0075,0.845489,0.88,0.879699


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3888,0.430432,0.8325,0.832415
40,0.188,0.423666,0.86,0.859716
60,0.1117,0.479348,0.8725,0.87232
80,0.061,0.51918,0.875,0.874293
100,0.0278,0.636171,0.875,0.874746
120,0.0133,0.687853,0.8675,0.8668
140,0.0065,0.735141,0.8825,0.882175
160,0.0105,0.770969,0.8825,0.88179
180,0.0051,0.883838,0.87,0.869841
200,0.0075,0.845489,0.88,0.879699


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3888,0.430432,0.8325,0.832415
40,0.188,0.423666,0.86,0.859716
60,0.1117,0.479348,0.8725,0.87232
80,0.061,0.51918,0.875,0.874293
100,0.0278,0.636171,0.875,0.874746
120,0.0133,0.687853,0.8675,0.8668
140,0.0065,0.735141,0.8825,0.882175
160,0.0105,0.770969,0.8825,0.88179
180,0.0051,0.883838,0.87,0.869841
200,0.0075,0.845489,0.88,0.879699


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4561,0.38658,0.8375,0.830552
40,0.2103,0.386699,0.87,0.869919
60,0.1116,0.44645,0.8625,0.86118
80,0.0613,0.536362,0.8525,0.851825
100,0.0264,0.832581,0.8425,0.842475
120,0.0097,0.93649,0.855,0.854942
140,0.0043,1.076903,0.8475,0.847499
160,0.0078,0.979117,0.8475,0.847285
180,0.0097,0.994744,0.85,0.849865
200,0.005,1.004411,0.855,0.854706


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4561,0.38658,0.8375,0.830552
40,0.2103,0.386699,0.87,0.869919
60,0.1116,0.44645,0.8625,0.86118
80,0.0613,0.536362,0.8525,0.851825
100,0.0264,0.832581,0.8425,0.842475
120,0.0097,0.93649,0.855,0.854942
140,0.0043,1.076903,0.8475,0.847499
160,0.0078,0.979117,0.8475,0.847285
180,0.0097,0.994744,0.85,0.849865
200,0.005,1.004411,0.855,0.854706


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4561,0.38658,0.8375,0.830552
40,0.2103,0.386699,0.87,0.869919
60,0.1116,0.44645,0.8625,0.86118
80,0.0613,0.536362,0.8525,0.851825
100,0.0264,0.832581,0.8425,0.842475
120,0.0097,0.93649,0.855,0.854942
140,0.0043,1.076903,0.8475,0.847499
160,0.0078,0.979117,0.8475,0.847285
180,0.0097,0.994744,0.85,0.849865
200,0.005,1.004411,0.855,0.854706


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4476,0.282623,0.8825,0.881962
40,0.2172,0.333315,0.885,0.884163
60,0.1049,0.369374,0.8875,0.887189
80,0.0448,0.458445,0.895,0.893829
100,0.0326,0.468346,0.9,0.899508
120,0.0214,0.526556,0.9,0.899997
140,0.0156,0.52045,0.8975,0.897499
160,0.0171,0.551183,0.905,0.904979
180,0.0101,0.600113,0.895,0.894406
200,0.0069,0.604532,0.9,0.899576


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4476,0.282623,0.8825,0.881962
40,0.2172,0.333315,0.885,0.884163
60,0.1049,0.369374,0.8875,0.887189
80,0.0448,0.458445,0.895,0.893829
100,0.0326,0.468346,0.9,0.899508
120,0.0214,0.526556,0.9,0.899997
140,0.0156,0.52045,0.8975,0.897499
160,0.0171,0.551183,0.905,0.904979
180,0.0101,0.600113,0.895,0.894406
200,0.0069,0.604532,0.9,0.899576


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4476,0.282623,0.8825,0.881962
40,0.2172,0.333315,0.885,0.884163
60,0.1049,0.369374,0.8875,0.887189
80,0.0448,0.458445,0.895,0.893829
100,0.0326,0.468346,0.9,0.899508
120,0.0214,0.526556,0.9,0.899997
140,0.0156,0.52045,0.8975,0.897499
160,0.0171,0.551183,0.905,0.904979
180,0.0101,0.600113,0.895,0.894406
200,0.0069,0.604532,0.9,0.899576


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.915,0.914864
3,alger-ia/dziribert,0.9175,0.917122
6,faisalq/EgyBERT,0.93,0.929788
9,faisalq/SaudiBERT,0.9475,0.947426
15,otmangi/MorRoBERTa,0.9075,0.907291
18,otmangi/MorrBERT,0.905,0.904979
21,tunis-ai/TunBERT,0.735,0.734196
