In [5]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'dreviews_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


dataset = load_dataset('ohidaoui/darija-reviews')


df = pd.DataFrame(dataset['test'])

display(len(df))
      
display(df.columns)
display(df[:4])

df['label'] = df['label'].replace('negative ', 'negative')

c = df['label'].value_counts()
display(c)

classes = set(df['label'].values)
display(classes)

df['label'] = df['label'].astype('category')
df['label'] = df['label'].cat.codes

df = df[['review', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))



max_sequence_length = 128



models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]


seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
            
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['review'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True)
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 30
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 10, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 10
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



Found cached dataset parquet (/home/ffq/.cache/huggingface/datasets/ohidaoui___parquet/ohidaoui--darija-reviews-34030453886e5230/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/1 [00:00<?, ?it/s]

851

Index(['review', 'label', 'topic', 'writing_style'], dtype='object')

Unnamed: 0,review,label,topic,writing_style
0,داسيا فقدت أهم ميزة كانت تميزها وهي السعر للأسف !,negative,automotive,Arabic
1,لاأظنها ستنجح كالنسخ الأولى,negative,automotive,Arabic
2,Khas ykon tghyir mbanch li lfar9 rir fa dizayn lkrasa mikanik tablo fih dak lblstik lmryat 3lach mdyrinch lhm daw volm mzl kif mahwa jwant kifma howa fin kyn lfr9 fdizyn ama ljiti tchof mkynch fra9 fra9 howa y9riban 40% mli kt9ol l3mrya hdi bnsba li ana nas 3morya ama l3morya kolchi daw mzl khsha whd 10 ans wnchofo ana knfdl dacia l9dima,negative,automotive,Arabizi
3,هناك اختيارات أحسن وماركات عالميه أجود من داصيا.,negative,automotive,Arabic


label
positive    456
negative    273
neutral     122
Name: count, dtype: int64

{'negative', 'neutral', 'positive'}

3

851

DatasetDict({
    train: Dataset({
        features: ['review', 'label'],
        num_rows: 680
    })
    test: Dataset({
        features: ['review', 'label'],
        num_rows: 171
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0863,1.061175,0.549708,0.236478
20,1.0495,1.018015,0.549708,0.236478
30,1.0229,0.983613,0.549708,0.236478
40,0.9732,0.942434,0.549708,0.236478
50,0.9332,0.891527,0.549708,0.236478
60,0.8734,0.836687,0.619883,0.362784
70,0.8042,0.786484,0.690058,0.463187
80,0.7158,0.739635,0.730994,0.508961
90,0.6442,0.705058,0.74269,0.515048
100,0.5517,0.669328,0.766082,0.534219


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0839,1.060004,0.549708,0.236478
20,1.0492,1.018689,0.549708,0.236478
30,1.024,0.981788,0.549708,0.236478
40,0.9758,0.94222,0.549708,0.236478
50,0.9357,0.895262,0.549708,0.236478
60,0.8693,0.841421,0.549708,0.236478
70,0.7964,0.805077,0.672515,0.455684
80,0.7127,0.757254,0.707602,0.566098
90,0.6229,0.713323,0.736842,0.606044
100,0.5425,0.733059,0.725146,0.552254


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0839,1.060004,0.549708,0.236478
20,1.0492,1.018689,0.549708,0.236478
30,1.024,0.981788,0.549708,0.236478
40,0.9758,0.94222,0.549708,0.236478
50,0.9357,0.895262,0.549708,0.236478
60,0.8693,0.841421,0.549708,0.236478
70,0.7964,0.805077,0.672515,0.455684
80,0.7127,0.757254,0.707602,0.566098
90,0.6229,0.713323,0.736842,0.606044
100,0.5425,0.733059,0.725146,0.552254


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0829,1.061969,0.526316,0.229885
20,1.0478,1.02398,0.526316,0.229885
30,1.0142,0.992162,0.526316,0.229885
40,0.9783,0.959116,0.526316,0.229885
50,0.9438,0.919374,0.526316,0.229885
60,0.8876,0.854469,0.526316,0.229885
70,0.7943,0.819656,0.71345,0.495282
80,0.7288,0.77686,0.725146,0.595314
90,0.6574,0.815678,0.701754,0.533542
100,0.5876,0.713672,0.74269,0.524862


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0829,1.061969,0.526316,0.229885
20,1.0478,1.02398,0.526316,0.229885
30,1.0142,0.992162,0.526316,0.229885
40,0.9783,0.959116,0.526316,0.229885
50,0.9438,0.919374,0.526316,0.229885
60,0.8876,0.854469,0.526316,0.229885
70,0.7943,0.819656,0.71345,0.495282
80,0.7288,0.77686,0.725146,0.595314
90,0.6574,0.815678,0.701754,0.533542
100,0.5876,0.713672,0.74269,0.524862


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0829,1.061969,0.526316,0.229885
20,1.0478,1.02398,0.526316,0.229885
30,1.0142,0.992162,0.526316,0.229885
40,0.9783,0.959116,0.526316,0.229885
50,0.9438,0.919374,0.526316,0.229885
60,0.8876,0.854469,0.526316,0.229885
70,0.7943,0.819656,0.71345,0.495282
80,0.7288,0.77686,0.725146,0.595314
90,0.6574,0.815678,0.701754,0.533542
100,0.5876,0.713672,0.74269,0.524862


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0827,1.064542,0.502924,0.223087
20,1.0489,1.030953,0.502924,0.223087
30,1.0071,1.00032,0.502924,0.223087
40,0.9688,0.9713,0.502924,0.223087
50,0.9187,0.923463,0.549708,0.325537
60,0.8461,0.866802,0.684211,0.483655
70,0.7551,0.824547,0.684211,0.488144
80,0.6683,0.799418,0.678363,0.484285
90,0.6021,0.796208,0.684211,0.482764
100,0.5508,0.796175,0.695906,0.490355


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0827,1.064542,0.502924,0.223087
20,1.0489,1.030953,0.502924,0.223087
30,1.0071,1.00032,0.502924,0.223087
40,0.9688,0.9713,0.502924,0.223087
50,0.9187,0.923463,0.549708,0.325537
60,0.8461,0.866802,0.684211,0.483655
70,0.7551,0.824547,0.684211,0.488144
80,0.6683,0.799418,0.678363,0.484285
90,0.6021,0.796208,0.684211,0.482764
100,0.5508,0.796175,0.695906,0.490355


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0827,1.064542,0.502924,0.223087
20,1.0489,1.030953,0.502924,0.223087
30,1.0071,1.00032,0.502924,0.223087
40,0.9688,0.9713,0.502924,0.223087
50,0.9187,0.923463,0.549708,0.325537
60,0.8461,0.866802,0.684211,0.483655
70,0.7551,0.824547,0.684211,0.488144
80,0.6683,0.799418,0.678363,0.484285
90,0.6021,0.796208,0.684211,0.482764
100,0.5508,0.796175,0.695906,0.490355


DatasetDict({
    train: Dataset({
        features: ['review', 'label'],
        num_rows: 680
    })
    test: Dataset({
        features: ['review', 'label'],
        num_rows: 171
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9307,0.738917,0.660819,0.44864
20,0.6652,0.576352,0.74269,0.567377
30,0.4607,0.550605,0.748538,0.672391
40,0.2913,0.59478,0.77193,0.677774
50,0.1801,0.761205,0.760234,0.628363
60,0.15,0.929222,0.77193,0.649259
70,0.0843,0.85629,0.736842,0.639832
80,0.0437,0.964261,0.719298,0.639065
90,0.0094,1.310681,0.736842,0.651979
100,0.0359,1.172758,0.760234,0.681196


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9307,0.738917,0.660819,0.44864
20,0.6652,0.576352,0.74269,0.567377
30,0.4607,0.550605,0.748538,0.672391
40,0.2913,0.59478,0.77193,0.677774
50,0.1801,0.761205,0.760234,0.628363
60,0.15,0.929222,0.77193,0.649259
70,0.0843,0.85629,0.736842,0.639832
80,0.0437,0.964261,0.719298,0.639065
90,0.0094,1.310681,0.736842,0.651979
100,0.0359,1.172758,0.760234,0.681196


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9307,0.738917,0.660819,0.44864
20,0.6652,0.576352,0.74269,0.567377
30,0.4607,0.550605,0.748538,0.672391
40,0.2913,0.59478,0.77193,0.677774
50,0.1801,0.761205,0.760234,0.628363
60,0.15,0.929222,0.77193,0.649259
70,0.0843,0.85629,0.736842,0.639832
80,0.0437,0.964261,0.719298,0.639065
90,0.0094,1.310681,0.736842,0.651979
100,0.0359,1.172758,0.760234,0.681196


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9038,0.683332,0.690058,0.473939
20,0.5855,0.597106,0.730994,0.630041
30,0.3498,0.606022,0.783626,0.653465
40,0.243,0.616928,0.77193,0.662947
50,0.1761,0.82056,0.760234,0.69944
60,0.1016,0.691121,0.795322,0.711636
70,0.076,0.773662,0.783626,0.718335
80,0.0142,0.89475,0.777778,0.6913
90,0.0196,1.029151,0.795322,0.704484
100,0.0022,1.089019,0.766082,0.678863


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9038,0.683332,0.690058,0.473939
20,0.5855,0.597106,0.730994,0.630041
30,0.3498,0.606022,0.783626,0.653465
40,0.243,0.616928,0.77193,0.662947
50,0.1761,0.82056,0.760234,0.69944
60,0.1016,0.691121,0.795322,0.711636
70,0.076,0.773662,0.783626,0.718335
80,0.0142,0.89475,0.777778,0.6913
90,0.0196,1.029151,0.795322,0.704484
100,0.0022,1.089019,0.766082,0.678863


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9038,0.683332,0.690058,0.473939
20,0.5855,0.597106,0.730994,0.630041
30,0.3498,0.606022,0.783626,0.653465
40,0.243,0.616928,0.77193,0.662947
50,0.1761,0.82056,0.760234,0.69944
60,0.1016,0.691121,0.795322,0.711636
70,0.076,0.773662,0.783626,0.718335
80,0.0142,0.89475,0.777778,0.6913
90,0.0196,1.029151,0.795322,0.704484
100,0.0022,1.089019,0.766082,0.678863


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9127,0.795531,0.649123,0.447115
20,0.6609,0.602636,0.71345,0.637984
30,0.3917,0.579485,0.766082,0.672766
40,0.2516,0.665673,0.777778,0.710119
50,0.123,0.974034,0.736842,0.672331
60,0.0987,0.884375,0.730994,0.643262
70,0.0544,1.225827,0.71345,0.616235
80,0.0403,1.136976,0.748538,0.652168
90,0.0143,1.165173,0.754386,0.696989
100,0.0279,1.434041,0.74269,0.653632


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9127,0.795531,0.649123,0.447115
20,0.6609,0.602636,0.71345,0.637984
30,0.3917,0.579485,0.766082,0.672766
40,0.2516,0.665673,0.777778,0.710119
50,0.123,0.974034,0.736842,0.672331
60,0.0987,0.884375,0.730994,0.643262
70,0.0544,1.225827,0.71345,0.616235
80,0.0403,1.136976,0.748538,0.652168
90,0.0143,1.165173,0.754386,0.696989
100,0.0279,1.434041,0.74269,0.653632


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9127,0.795531,0.649123,0.447115
20,0.6609,0.602636,0.71345,0.637984
30,0.3917,0.579485,0.766082,0.672766
40,0.2516,0.665673,0.777778,0.710119
50,0.123,0.974034,0.736842,0.672331
60,0.0987,0.884375,0.730994,0.643262
70,0.0544,1.225827,0.71345,0.616235
80,0.0403,1.136976,0.748538,0.652168
90,0.0143,1.165173,0.754386,0.696989
100,0.0279,1.434041,0.74269,0.653632


DatasetDict({
    train: Dataset({
        features: ['review', 'label'],
        num_rows: 680
    })
    test: Dataset({
        features: ['review', 'label'],
        num_rows: 171
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.1038,0.955955,0.549708,0.236478
20,1.0072,0.983952,0.549708,0.236478
30,1.0477,0.962625,0.549708,0.236478
40,0.9855,0.954538,0.549708,0.236478
50,0.9773,0.907543,0.614035,0.35912
60,0.9781,0.902185,0.555556,0.24907
70,0.9201,0.835734,0.625731,0.436129
80,0.8645,0.808866,0.637427,0.43397
90,0.8638,0.791539,0.672515,0.463168
100,0.7914,0.823541,0.654971,0.456836


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.1038,0.955955,0.549708,0.236478
20,1.0072,0.983952,0.549708,0.236478
30,1.0477,0.962625,0.549708,0.236478
40,0.9855,0.954538,0.549708,0.236478
50,0.9773,0.907543,0.614035,0.35912
60,0.9781,0.902185,0.555556,0.24907
70,0.9201,0.835734,0.625731,0.436129
80,0.8645,0.808866,0.637427,0.43397
90,0.8638,0.791539,0.672515,0.463168
100,0.7914,0.823541,0.654971,0.456836


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.1038,0.955955,0.549708,0.236478
20,1.0072,0.983952,0.549708,0.236478
30,1.0477,0.962625,0.549708,0.236478
40,0.9855,0.954538,0.549708,0.236478
50,0.9773,0.907543,0.614035,0.35912
60,0.9781,0.902185,0.555556,0.24907
70,0.9201,0.835734,0.625731,0.436129
80,0.8645,0.808866,0.637427,0.43397
90,0.8638,0.791539,0.672515,0.463168
100,0.7914,0.823541,0.654971,0.456836


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.1333,0.977088,0.526316,0.229885
20,1.0013,0.967748,0.526316,0.229885
30,0.986,0.963733,0.526316,0.229885
40,0.9917,0.955471,0.526316,0.229885
50,0.9758,0.915757,0.590643,0.421618
60,0.962,0.992535,0.438596,0.287328
70,0.9276,0.888338,0.631579,0.442307
80,0.9016,0.853217,0.619883,0.434935
90,0.8668,0.897568,0.596491,0.380371
100,0.8872,0.851781,0.631579,0.44237


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.1333,0.977088,0.526316,0.229885
20,1.0013,0.967748,0.526316,0.229885
30,0.986,0.963733,0.526316,0.229885
40,0.9917,0.955471,0.526316,0.229885
50,0.9758,0.915757,0.590643,0.421618
60,0.962,0.992535,0.438596,0.287328
70,0.9276,0.888338,0.631579,0.442307
80,0.9016,0.853217,0.619883,0.434935
90,0.8668,0.897568,0.596491,0.380371
100,0.8872,0.851781,0.631579,0.44237


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.1333,0.977088,0.526316,0.229885
20,1.0013,0.967748,0.526316,0.229885
30,0.986,0.963733,0.526316,0.229885
40,0.9917,0.955471,0.526316,0.229885
50,0.9758,0.915757,0.590643,0.421618
60,0.962,0.992535,0.438596,0.287328
70,0.9276,0.888338,0.631579,0.442307
80,0.9016,0.853217,0.619883,0.434935
90,0.8668,0.897568,0.596491,0.380371
100,0.8872,0.851781,0.631579,0.44237


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0559,1.03057,0.502924,0.223087
20,0.9831,1.015201,0.502924,0.223087
30,0.9814,0.957794,0.54386,0.351541
40,0.9351,1.042219,0.502924,0.223087
50,0.9376,0.968644,0.573099,0.361605
60,0.9129,1.00859,0.549708,0.320294
70,0.8425,0.933317,0.584795,0.400486
80,0.7856,0.925053,0.596491,0.422775
90,0.7699,1.136351,0.590643,0.394439
100,0.722,1.049506,0.608187,0.421922


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0559,1.03057,0.502924,0.223087
20,0.9831,1.015201,0.502924,0.223087
30,0.9814,0.957794,0.54386,0.351541
40,0.9351,1.042219,0.502924,0.223087
50,0.9376,0.968644,0.573099,0.361605
60,0.9129,1.00859,0.549708,0.320294
70,0.8425,0.933317,0.584795,0.400486
80,0.7856,0.925053,0.596491,0.422775
90,0.7699,1.136351,0.590643,0.394439
100,0.722,1.049506,0.608187,0.421922


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0559,1.03057,0.502924,0.223087
20,0.9831,1.015201,0.502924,0.223087
30,0.9814,0.957794,0.54386,0.351541
40,0.9351,1.042219,0.502924,0.223087
50,0.9376,0.968644,0.573099,0.361605
60,0.9129,1.00859,0.549708,0.320294
70,0.8425,0.933317,0.584795,0.400486
80,0.7856,0.925053,0.596491,0.422775
90,0.7699,1.136351,0.590643,0.394439
100,0.722,1.049506,0.608187,0.421922


DatasetDict({
    train: Dataset({
        features: ['review', 'label'],
        num_rows: 680
    })
    test: Dataset({
        features: ['review', 'label'],
        num_rows: 171
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9171,0.657833,0.730994,0.533519
20,0.6295,0.536412,0.77193,0.62846
30,0.3476,0.495374,0.812865,0.755253
40,0.1496,0.636637,0.766082,0.681199
50,0.084,0.750665,0.766082,0.643308
60,0.0257,0.80098,0.754386,0.685436
70,0.0048,0.958777,0.766082,0.623676
80,0.0027,1.215472,0.74269,0.690562
90,0.0017,1.130858,0.748538,0.676093
100,0.001,1.122306,0.754386,0.636689


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9171,0.657833,0.730994,0.533519
20,0.6295,0.536412,0.77193,0.62846
30,0.3476,0.495374,0.812865,0.755253
40,0.1496,0.636637,0.766082,0.681199
50,0.084,0.750665,0.766082,0.643308
60,0.0257,0.80098,0.754386,0.685436
70,0.0048,0.958777,0.766082,0.623676
80,0.0027,1.215472,0.74269,0.690562
90,0.0017,1.130858,0.748538,0.676093
100,0.001,1.122306,0.754386,0.636689


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9171,0.657833,0.730994,0.533519
20,0.6295,0.536412,0.77193,0.62846
30,0.3476,0.495374,0.812865,0.755253
40,0.1496,0.636637,0.766082,0.681199
50,0.084,0.750665,0.766082,0.643308
60,0.0257,0.80098,0.754386,0.685436
70,0.0048,0.958777,0.766082,0.623676
80,0.0027,1.215472,0.74269,0.690562
90,0.0017,1.130858,0.748538,0.676093
100,0.001,1.122306,0.754386,0.636689


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.894,0.679346,0.725146,0.625433
20,0.5732,0.619629,0.719298,0.608418
30,0.332,0.64173,0.760234,0.69709
40,0.1708,0.813177,0.754386,0.694485
50,0.0864,0.75833,0.766082,0.66392
60,0.0243,0.909494,0.77193,0.695552
70,0.0083,1.149382,0.760234,0.660846
80,0.0021,1.297395,0.760234,0.693431
90,0.0013,1.391625,0.754386,0.683767
100,0.001,1.392861,0.754386,0.680351


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.894,0.679346,0.725146,0.625433
20,0.5732,0.619629,0.719298,0.608418
30,0.332,0.64173,0.760234,0.69709
40,0.1708,0.813177,0.754386,0.694485
50,0.0864,0.75833,0.766082,0.66392
60,0.0243,0.909494,0.77193,0.695552
70,0.0083,1.149382,0.760234,0.660846
80,0.0021,1.297395,0.760234,0.693431
90,0.0013,1.391625,0.754386,0.683767
100,0.001,1.392861,0.754386,0.680351


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.894,0.679346,0.725146,0.625433
20,0.5732,0.619629,0.719298,0.608418
30,0.332,0.64173,0.760234,0.69709
40,0.1708,0.813177,0.754386,0.694485
50,0.0864,0.75833,0.766082,0.66392
60,0.0243,0.909494,0.77193,0.695552
70,0.0083,1.149382,0.760234,0.660846
80,0.0021,1.297395,0.760234,0.693431
90,0.0013,1.391625,0.754386,0.683767
100,0.001,1.392861,0.754386,0.680351


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8842,0.76711,0.631579,0.471041
20,0.58,0.654289,0.730994,0.657684
30,0.3045,0.654505,0.77193,0.699787
40,0.1378,0.750603,0.77193,0.720009
50,0.0435,0.992158,0.754386,0.696943
60,0.013,1.094746,0.777778,0.719327
70,0.005,1.235594,0.760234,0.698679
80,0.0021,1.25834,0.789474,0.732036
90,0.0019,1.542776,0.748538,0.676599
100,0.001,1.44081,0.777778,0.718544


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8842,0.76711,0.631579,0.471041
20,0.58,0.654289,0.730994,0.657684
30,0.3045,0.654505,0.77193,0.699787
40,0.1378,0.750603,0.77193,0.720009
50,0.0435,0.992158,0.754386,0.696943
60,0.013,1.094746,0.777778,0.719327
70,0.005,1.235594,0.760234,0.698679
80,0.0021,1.25834,0.789474,0.732036
90,0.0019,1.542776,0.748538,0.676599
100,0.001,1.44081,0.777778,0.718544


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8842,0.76711,0.631579,0.471041
20,0.58,0.654289,0.730994,0.657684
30,0.3045,0.654505,0.77193,0.699787
40,0.1378,0.750603,0.77193,0.720009
50,0.0435,0.992158,0.754386,0.696943
60,0.013,1.094746,0.777778,0.719327
70,0.005,1.235594,0.760234,0.698679
80,0.0021,1.25834,0.789474,0.732036
90,0.0019,1.542776,0.748538,0.676599
100,0.001,1.44081,0.777778,0.718544


DatasetDict({
    train: Dataset({
        features: ['review', 'label'],
        num_rows: 680
    })
    test: Dataset({
        features: ['review', 'label'],
        num_rows: 171
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9696,0.795534,0.637427,0.388846
20,0.7272,0.603909,0.719298,0.492407
30,0.5194,0.564651,0.74269,0.665547
40,0.3317,0.588129,0.760234,0.673016
50,0.2334,0.530724,0.783626,0.677104
60,0.1142,0.962078,0.766082,0.648224
70,0.1942,0.753961,0.783626,0.731498
80,0.0834,0.914723,0.760234,0.69717
90,0.084,0.923592,0.777778,0.697959
100,0.0283,0.842628,0.795322,0.740021


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9696,0.795534,0.637427,0.388846
20,0.7272,0.603909,0.719298,0.492407
30,0.5194,0.564651,0.74269,0.665547
40,0.3317,0.588129,0.760234,0.673016
50,0.2334,0.530724,0.783626,0.677104
60,0.1142,0.962078,0.766082,0.648224
70,0.1942,0.753961,0.783626,0.731498
80,0.0834,0.914723,0.760234,0.69717
90,0.084,0.923592,0.777778,0.697959
100,0.0283,0.842628,0.795322,0.740021


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9696,0.795534,0.637427,0.388846
20,0.7272,0.603909,0.719298,0.492407
30,0.5194,0.564651,0.74269,0.665547
40,0.3317,0.588129,0.760234,0.673016
50,0.2334,0.530724,0.783626,0.677104
60,0.1142,0.962078,0.766082,0.648224
70,0.1942,0.753961,0.783626,0.731498
80,0.0834,0.914723,0.760234,0.69717
90,0.084,0.923592,0.777778,0.697959
100,0.0283,0.842628,0.795322,0.740021


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9,0.653807,0.701754,0.484848
20,0.6161,0.681597,0.725146,0.524411
30,0.3664,0.606385,0.74269,0.626421
40,0.2723,0.914722,0.736842,0.637198
50,0.2184,1.022244,0.730994,0.602758
60,0.1125,0.747645,0.760234,0.642876
70,0.0721,0.860162,0.730994,0.65242
80,0.0616,0.901724,0.766082,0.695247
90,0.0466,1.096196,0.748538,0.626874
100,0.0381,1.19359,0.74269,0.636915


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9,0.653807,0.701754,0.484848
20,0.6161,0.681597,0.725146,0.524411
30,0.3664,0.606385,0.74269,0.626421
40,0.2723,0.914722,0.736842,0.637198
50,0.2184,1.022244,0.730994,0.602758
60,0.1125,0.747645,0.760234,0.642876
70,0.0721,0.860162,0.730994,0.65242
80,0.0616,0.901724,0.766082,0.695247
90,0.0466,1.096196,0.748538,0.626874
100,0.0381,1.19359,0.74269,0.636915


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9,0.653807,0.701754,0.484848
20,0.6161,0.681597,0.725146,0.524411
30,0.3664,0.606385,0.74269,0.626421
40,0.2723,0.914722,0.736842,0.637198
50,0.2184,1.022244,0.730994,0.602758
60,0.1125,0.747645,0.760234,0.642876
70,0.0721,0.860162,0.730994,0.65242
80,0.0616,0.901724,0.766082,0.695247
90,0.0466,1.096196,0.748538,0.626874
100,0.0381,1.19359,0.74269,0.636915


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9169,0.876244,0.614035,0.42096
20,0.6931,0.755305,0.660819,0.554742
30,0.4541,0.792554,0.690058,0.537415
40,0.302,0.76651,0.730994,0.658538
50,0.1796,0.916187,0.74269,0.670175
60,0.1652,0.960117,0.77193,0.691824
70,0.1204,1.012089,0.730994,0.666215
80,0.0716,1.137959,0.695906,0.64683
90,0.0471,1.363088,0.719298,0.661012
100,0.0496,1.407665,0.719298,0.656993


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9169,0.876244,0.614035,0.42096
20,0.6931,0.755305,0.660819,0.554742
30,0.4541,0.792554,0.690058,0.537415
40,0.302,0.76651,0.730994,0.658538
50,0.1796,0.916187,0.74269,0.670175
60,0.1652,0.960117,0.77193,0.691824
70,0.1204,1.012089,0.730994,0.666215
80,0.0716,1.137959,0.695906,0.64683
90,0.0471,1.363088,0.719298,0.661012
100,0.0496,1.407665,0.719298,0.656993


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9169,0.876244,0.614035,0.42096
20,0.6931,0.755305,0.660819,0.554742
30,0.4541,0.792554,0.690058,0.537415
40,0.302,0.76651,0.730994,0.658538
50,0.1796,0.916187,0.74269,0.670175
60,0.1652,0.960117,0.77193,0.691824
70,0.1204,1.012089,0.730994,0.666215
80,0.0716,1.137959,0.695906,0.64683
90,0.0471,1.363088,0.719298,0.661012
100,0.0496,1.407665,0.719298,0.656993


DatasetDict({
    train: Dataset({
        features: ['review', 'label'],
        num_rows: 680
    })
    test: Dataset({
        features: ['review', 'label'],
        num_rows: 171
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9408,0.729024,0.690058,0.473102
20,0.7219,0.652799,0.695906,0.562824
30,0.4871,0.654006,0.707602,0.609259
40,0.295,0.765373,0.736842,0.60283
50,0.1916,0.769938,0.736842,0.656474
60,0.0833,0.918485,0.725146,0.637264
70,0.0321,1.158455,0.725146,0.626081
80,0.0051,1.356818,0.71345,0.615735
90,0.0019,1.537312,0.707602,0.603973
100,0.0011,1.59632,0.707602,0.603973


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9408,0.729024,0.690058,0.473102
20,0.7219,0.652799,0.695906,0.562824
30,0.4871,0.654006,0.707602,0.609259
40,0.295,0.765373,0.736842,0.60283
50,0.1916,0.769938,0.736842,0.656474
60,0.0833,0.918485,0.725146,0.637264
70,0.0321,1.158455,0.725146,0.626081
80,0.0051,1.356818,0.71345,0.615735
90,0.0019,1.537312,0.707602,0.603973
100,0.0011,1.59632,0.707602,0.603973


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9408,0.729024,0.690058,0.473102
20,0.7219,0.652799,0.695906,0.562824
30,0.4871,0.654006,0.707602,0.609259
40,0.295,0.765373,0.736842,0.60283
50,0.1916,0.769938,0.736842,0.656474
60,0.0833,0.918485,0.725146,0.637264
70,0.0321,1.158455,0.725146,0.626081
80,0.0051,1.356818,0.71345,0.615735
90,0.0019,1.537312,0.707602,0.603973
100,0.0011,1.59632,0.707602,0.603973


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9262,0.72795,0.660819,0.473254
20,0.6365,0.653357,0.748538,0.645141
30,0.3813,0.748424,0.71345,0.64409
40,0.2379,0.915584,0.719298,0.647681
50,0.0766,1.112188,0.725146,0.663762
60,0.0311,1.194125,0.748538,0.680167
70,0.0127,1.386351,0.748538,0.669172
80,0.0048,1.478761,0.71345,0.637601
90,0.0018,1.51835,0.748538,0.662626
100,0.0012,1.539509,0.74269,0.652054


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9262,0.72795,0.660819,0.473254
20,0.6365,0.653357,0.748538,0.645141
30,0.3813,0.748424,0.71345,0.64409
40,0.2379,0.915584,0.719298,0.647681
50,0.0766,1.112188,0.725146,0.663762
60,0.0311,1.194125,0.748538,0.680167
70,0.0127,1.386351,0.748538,0.669172
80,0.0048,1.478761,0.71345,0.637601
90,0.0018,1.51835,0.748538,0.662626
100,0.0012,1.539509,0.74269,0.652054


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9262,0.72795,0.660819,0.473254
20,0.6365,0.653357,0.748538,0.645141
30,0.3813,0.748424,0.71345,0.64409
40,0.2379,0.915584,0.719298,0.647681
50,0.0766,1.112188,0.725146,0.663762
60,0.0311,1.194125,0.748538,0.680167
70,0.0127,1.386351,0.748538,0.669172
80,0.0048,1.478761,0.71345,0.637601
90,0.0018,1.51835,0.748538,0.662626
100,0.0012,1.539509,0.74269,0.652054


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9166,0.81135,0.637427,0.447965
20,0.6819,0.692657,0.701754,0.561247
30,0.4882,0.659842,0.736842,0.638056
40,0.3108,0.714185,0.754386,0.688676
50,0.1636,0.744598,0.760234,0.689499
60,0.0622,0.920391,0.74269,0.66102
70,0.0216,1.079296,0.754386,0.676982
80,0.0042,1.230929,0.760234,0.667701
90,0.0022,1.242159,0.760234,0.680173
100,0.001,1.301058,0.77193,0.702497


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9166,0.81135,0.637427,0.447965
20,0.6819,0.692657,0.701754,0.561247
30,0.4882,0.659842,0.736842,0.638056
40,0.3108,0.714185,0.754386,0.688676
50,0.1636,0.744598,0.760234,0.689499
60,0.0622,0.920391,0.74269,0.66102
70,0.0216,1.079296,0.754386,0.676982
80,0.0042,1.230929,0.760234,0.667701
90,0.0022,1.242159,0.760234,0.680173
100,0.001,1.301058,0.77193,0.702497


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9166,0.81135,0.637427,0.447965
20,0.6819,0.692657,0.701754,0.561247
30,0.4882,0.659842,0.736842,0.638056
40,0.3108,0.714185,0.754386,0.688676
50,0.1636,0.744598,0.760234,0.689499
60,0.0622,0.920391,0.74269,0.66102
70,0.0216,1.079296,0.754386,0.676982
80,0.0042,1.230929,0.760234,0.667701
90,0.0022,1.242159,0.760234,0.680173
100,0.001,1.301058,0.77193,0.702497


DatasetDict({
    train: Dataset({
        features: ['review', 'label'],
        num_rows: 680
    })
    test: Dataset({
        features: ['review', 'label'],
        num_rows: 171
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8907,0.655918,0.701754,0.557969
20,0.5832,0.539254,0.77193,0.672943
30,0.359,0.530747,0.783626,0.718762
40,0.1702,0.648623,0.77193,0.700626
50,0.0874,0.793387,0.74269,0.635815
60,0.0237,0.894862,0.760234,0.672902
70,0.025,1.036612,0.754386,0.656363
80,0.0126,1.109487,0.74269,0.643343
90,0.0031,1.095922,0.748538,0.678209
100,0.01,1.100882,0.754386,0.674938


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8907,0.655918,0.701754,0.557969
20,0.5832,0.539254,0.77193,0.672943
30,0.359,0.530747,0.783626,0.718762
40,0.1702,0.648623,0.77193,0.700626
50,0.0874,0.793387,0.74269,0.635815
60,0.0237,0.894862,0.760234,0.672902
70,0.025,1.036612,0.754386,0.656363
80,0.0126,1.109487,0.74269,0.643343
90,0.0031,1.095922,0.748538,0.678209
100,0.01,1.100882,0.754386,0.674938


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8907,0.655918,0.701754,0.557969
20,0.5832,0.539254,0.77193,0.672943
30,0.359,0.530747,0.783626,0.718762
40,0.1702,0.648623,0.77193,0.700626
50,0.0874,0.793387,0.74269,0.635815
60,0.0237,0.894862,0.760234,0.672902
70,0.025,1.036612,0.754386,0.656363
80,0.0126,1.109487,0.74269,0.643343
90,0.0031,1.095922,0.748538,0.678209
100,0.01,1.100882,0.754386,0.674938


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8827,0.689102,0.736842,0.670872
20,0.5186,0.604748,0.760234,0.69473
30,0.2507,0.655632,0.766082,0.695343
40,0.1254,0.923887,0.748538,0.686097
50,0.0513,1.151796,0.736842,0.657593
60,0.0538,1.13266,0.766082,0.704831
70,0.0328,1.257689,0.725146,0.638695
80,0.0277,1.292087,0.74269,0.688817
90,0.0024,1.419079,0.730994,0.676613
100,0.0089,1.486625,0.725146,0.668392


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8827,0.689102,0.736842,0.670872
20,0.5186,0.604748,0.760234,0.69473
30,0.2507,0.655632,0.766082,0.695343
40,0.1254,0.923887,0.748538,0.686097
50,0.0513,1.151796,0.736842,0.657593
60,0.0538,1.13266,0.766082,0.704831
70,0.0328,1.257689,0.725146,0.638695
80,0.0277,1.292087,0.74269,0.688817
90,0.0024,1.419079,0.730994,0.676613
100,0.0089,1.486625,0.725146,0.668392


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8827,0.689102,0.736842,0.670872
20,0.5186,0.604748,0.760234,0.69473
30,0.2507,0.655632,0.766082,0.695343
40,0.1254,0.923887,0.748538,0.686097
50,0.0513,1.151796,0.736842,0.657593
60,0.0538,1.13266,0.766082,0.704831
70,0.0328,1.257689,0.725146,0.638695
80,0.0277,1.292087,0.74269,0.688817
90,0.0024,1.419079,0.730994,0.676613
100,0.0089,1.486625,0.725146,0.668392


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8578,0.737201,0.672515,0.54815
20,0.5212,0.620659,0.748538,0.682428
30,0.2806,0.628046,0.766082,0.703291
40,0.1474,0.656563,0.766082,0.706579
50,0.0637,0.790032,0.766082,0.69663
60,0.0126,0.94016,0.795322,0.736419
70,0.0229,0.979153,0.795322,0.733234
80,0.0151,1.119048,0.77193,0.696211
90,0.0124,1.297755,0.74269,0.663988
100,0.0025,1.201116,0.777778,0.71773


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8578,0.737201,0.672515,0.54815
20,0.5212,0.620659,0.748538,0.682428
30,0.2806,0.628046,0.766082,0.703291
40,0.1474,0.656563,0.766082,0.706579
50,0.0637,0.790032,0.766082,0.69663
60,0.0126,0.94016,0.795322,0.736419
70,0.0229,0.979153,0.795322,0.733234
80,0.0151,1.119048,0.77193,0.696211
90,0.0124,1.297755,0.74269,0.663988
100,0.0025,1.201116,0.777778,0.71773


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/680 [00:00<?, ? examples/s]

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8578,0.737201,0.672515,0.54815
20,0.5212,0.620659,0.748538,0.682428
30,0.2806,0.628046,0.766082,0.703291
40,0.1474,0.656563,0.766082,0.706579
50,0.0637,0.790032,0.766082,0.69663
60,0.0126,0.94016,0.795322,0.736419
70,0.0229,0.979153,0.795322,0.733234
80,0.0151,1.119048,0.77193,0.696211
90,0.0124,1.297755,0.74269,0.663988
100,0.0025,1.201116,0.777778,0.71773


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.812865,0.755959
3,alger-ia/dziribert,0.812865,0.755253
6,faisalq/EgyBERT,0.766082,0.693996
9,faisalq/SaudiBERT,0.824561,0.752299
12,otmangi/MorRoBERTa,0.77193,0.702497
15,otmangi/MorrBERT,0.812865,0.762196
18,tunis-ai/TunBERT,0.654971,0.568556
