In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'TCMD_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/Tweet_Classification_Moroccan_Dataset/data.csv', encoding='utf-16', engine='python', sep='\t') #, quotechar="'"  , quoting=3

display(len(df))
      
display(df.columns)
display(df[:4])



c = df['Topic'].value_counts()
display(c)

classes = set(df['Topic'].values)
display(classes)

df['Topic'] = df['Topic'].astype('category')
df['label'] = df['Topic'].cat.codes

df = df[['Tweet', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))


max_sequence_length = 128



models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]


seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
            
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['Tweet'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True)
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 20
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 25, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 25
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-18 20:11:13.035247: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-18 20:11:13.058838: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2399

Index(['Tweet', 'Topic'], dtype='object')

Unnamed: 0,Tweet,Topic
0,برا وخيط: أمل بنكيران... مصممة تبتكر الزي المغربي التقليدي بلمسة عالمية,ميديا
1,ربورتاج: قافلة توجيه مُتنقّلة تُطلع تلاميذ العيون على مختلف الآفاق الدراسية,مجتمع
2,ساعة الفطور: طريقة تحضير «وراب» بالخضر وصلصة «تزاتزيكي »,ميديا
3,ربورتاج: مائدة إفطار رمضانية تجمع الديانات الثلاث بالدار البيضاء,ثقافة


Topic
مجتمع     357
رياضة     354
ميديا     353
دولي      348
اقتصاد    344
سياسة     333
ثقافة     310
Name: count, dtype: int64

{'اقتصاد', 'ثقافة', 'دولي', 'رياضة', 'سياسة', 'مجتمع', 'ميديا'}

7

2399

DatasetDict({
    train: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 1919
    })
    test: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 480
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.9258,1.881989,0.5875,0.563767
50,1.8114,1.72748,0.620833,0.599088
75,1.637,1.56499,0.689583,0.686443
100,1.4696,1.437649,0.725,0.722898
125,1.3042,1.31866,0.74375,0.745346
150,1.1703,1.212885,0.772917,0.773906
175,1.033,1.126774,0.775,0.776824
200,0.9776,1.146692,0.735417,0.734942
225,1.0065,1.135166,0.729167,0.72675
250,0.9302,1.10259,0.75,0.750059


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.9272,1.884115,0.629167,0.611806
50,1.8168,1.738224,0.6625,0.648219
75,1.6432,1.567737,0.733333,0.733134
100,1.4558,1.4084,0.783333,0.783507
125,1.2608,1.261198,0.785417,0.786499
150,1.0916,1.137491,0.79375,0.795078
175,0.9333,1.029167,0.814583,0.816468
200,0.8027,0.957799,0.797917,0.798048
225,0.6853,0.888444,0.814583,0.817979
250,0.5702,0.836712,0.8,0.802127


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.9272,1.884115,0.629167,0.611806
50,1.8168,1.738224,0.6625,0.648219
75,1.6432,1.567737,0.733333,0.733134
100,1.4558,1.4084,0.783333,0.783507
125,1.2608,1.261198,0.785417,0.786499
150,1.0916,1.137491,0.79375,0.795078
175,0.9333,1.029167,0.814583,0.816468
200,0.8027,0.957799,0.797917,0.798048
225,0.6853,0.888444,0.814583,0.817979
250,0.5702,0.836712,0.8,0.802127


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.9246,1.875118,0.604167,0.575431
50,1.8146,1.724536,0.6875,0.666085
75,1.6345,1.548578,0.7375,0.732003
100,1.4595,1.404138,0.75,0.745263
125,1.2918,1.2744,0.7875,0.784326
150,1.1211,1.136107,0.802083,0.800196
175,0.9625,1.044891,0.79375,0.792819
200,0.8285,0.954754,0.791667,0.790923
225,0.6894,0.891586,0.802083,0.801597
250,0.5963,0.830267,0.795833,0.796548


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.9246,1.875118,0.604167,0.575431
50,1.8146,1.724536,0.6875,0.666085
75,1.6345,1.548578,0.7375,0.732003
100,1.4595,1.404138,0.75,0.745263
125,1.2918,1.2744,0.7875,0.784326
150,1.1211,1.136107,0.802083,0.800196
175,0.9625,1.044891,0.79375,0.792819
200,0.8285,0.954754,0.791667,0.790923
225,0.6894,0.891586,0.802083,0.801597
250,0.5963,0.830267,0.795833,0.796548


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.9246,1.875118,0.604167,0.575431
50,1.8146,1.724536,0.6875,0.666085
75,1.6345,1.548578,0.7375,0.732003
100,1.4595,1.404138,0.75,0.745263
125,1.2918,1.2744,0.7875,0.784326
150,1.1211,1.136107,0.802083,0.800196
175,0.9625,1.044891,0.79375,0.792819
200,0.8285,0.954754,0.791667,0.790923
225,0.6894,0.891586,0.802083,0.801597
250,0.5963,0.830267,0.795833,0.796548


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.9275,1.881628,0.675,0.670868
50,1.8179,1.733034,0.6875,0.679777
75,1.6452,1.573261,0.714583,0.715108
100,1.4516,1.405772,0.74375,0.746564
125,1.2723,1.282484,0.764583,0.767075
150,1.0986,1.151927,0.802083,0.806143
175,0.9448,1.053997,0.7875,0.790027
200,0.8141,0.984882,0.785417,0.782946
225,0.6901,0.905348,0.8,0.801868
250,0.582,0.876862,0.789583,0.791549


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.9275,1.881628,0.675,0.670868
50,1.8179,1.733034,0.6875,0.679777
75,1.6452,1.573261,0.714583,0.715108
100,1.4516,1.405772,0.74375,0.746564
125,1.2723,1.282484,0.764583,0.767075
150,1.0986,1.151927,0.802083,0.806143
175,0.9448,1.053997,0.7875,0.790027
200,0.8141,0.984882,0.785417,0.782946
225,0.6901,0.905348,0.8,0.801868
250,0.582,0.876862,0.789583,0.791549


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.9275,1.881628,0.675,0.670868
50,1.8179,1.733034,0.6875,0.679777
75,1.6452,1.573261,0.714583,0.715108
100,1.4516,1.405772,0.74375,0.746564
125,1.2723,1.282484,0.764583,0.767075
150,1.0986,1.151927,0.802083,0.806143
175,0.9448,1.053997,0.7875,0.790027
200,0.8141,0.984882,0.785417,0.782946
225,0.6901,0.905348,0.8,0.801868
250,0.582,0.876862,0.789583,0.791549


DatasetDict({
    train: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 1919
    })
    test: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 480
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.2772,0.789792,0.741667,0.740925
50,0.5203,0.652472,0.8,0.80448
75,0.3098,0.698779,0.783333,0.786507
100,0.1664,0.743144,0.78125,0.780418
125,0.1079,0.792529,0.789583,0.791088
150,0.0452,0.939097,0.785417,0.786656
175,0.038,0.953261,0.8,0.801241
200,0.0207,1.054096,0.789583,0.790077
225,0.0156,1.059727,0.79375,0.795547
250,0.0134,1.118329,0.795833,0.80046


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.2772,0.789792,0.741667,0.740925
50,0.5203,0.652472,0.8,0.80448
75,0.3098,0.698779,0.783333,0.786507
100,0.1664,0.743144,0.78125,0.780418
125,0.1079,0.792529,0.789583,0.791088
150,0.0452,0.939097,0.785417,0.786656
175,0.038,0.953261,0.8,0.801241
200,0.0207,1.054096,0.789583,0.790077
225,0.0156,1.059727,0.79375,0.795547
250,0.0134,1.118329,0.795833,0.80046


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.2772,0.789792,0.741667,0.740925
50,0.5203,0.652472,0.8,0.80448
75,0.3098,0.698779,0.783333,0.786507
100,0.1664,0.743144,0.78125,0.780418
125,0.1079,0.792529,0.789583,0.791088
150,0.0452,0.939097,0.785417,0.786656
175,0.038,0.953261,0.8,0.801241
200,0.0207,1.054096,0.789583,0.790077
225,0.0156,1.059727,0.79375,0.795547
250,0.0134,1.118329,0.795833,0.80046


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.3225,0.807567,0.739583,0.730532
50,0.6144,0.587967,0.797917,0.795685
75,0.3135,0.601423,0.81875,0.818793
100,0.2028,0.596602,0.827083,0.826289
125,0.1164,0.660253,0.816667,0.816386
150,0.0616,0.787425,0.816667,0.812607
175,0.0351,0.848969,0.808333,0.80509
200,0.0275,0.805845,0.835417,0.834487
225,0.018,0.921561,0.8125,0.811506
250,0.0163,1.016644,0.802083,0.79848


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.3225,0.807567,0.739583,0.730532
50,0.6144,0.587967,0.797917,0.795685
75,0.3135,0.601423,0.81875,0.818793
100,0.2028,0.596602,0.827083,0.826289
125,0.1164,0.660253,0.816667,0.816386
150,0.0616,0.787425,0.816667,0.812607
175,0.0351,0.848969,0.808333,0.80509
200,0.0275,0.805845,0.835417,0.834487
225,0.018,0.921561,0.8125,0.811506
250,0.0163,1.016644,0.802083,0.79848


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.3225,0.807567,0.739583,0.730532
50,0.6144,0.587967,0.797917,0.795685
75,0.3135,0.601423,0.81875,0.818793
100,0.2028,0.596602,0.827083,0.826289
125,0.1164,0.660253,0.816667,0.816386
150,0.0616,0.787425,0.816667,0.812607
175,0.0351,0.848969,0.808333,0.80509
200,0.0275,0.805845,0.835417,0.834487
225,0.018,0.921561,0.8125,0.811506
250,0.0163,1.016644,0.802083,0.79848


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.2811,0.771552,0.7625,0.762593
50,0.5391,0.668558,0.7875,0.789392
75,0.2986,0.647695,0.808333,0.810272
100,0.1722,0.70163,0.810417,0.814004
125,0.1032,0.771657,0.80625,0.81047
150,0.0724,0.817346,0.797917,0.801422
175,0.0325,0.887113,0.802083,0.80485
200,0.0418,0.978635,0.804167,0.807657
225,0.0162,0.999167,0.80625,0.810232
250,0.0165,1.04538,0.80625,0.808574


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.2811,0.771552,0.7625,0.762593
50,0.5391,0.668558,0.7875,0.789392
75,0.2986,0.647695,0.808333,0.810272
100,0.1722,0.70163,0.810417,0.814004
125,0.1032,0.771657,0.80625,0.81047
150,0.0724,0.817346,0.797917,0.801422
175,0.0325,0.887113,0.802083,0.80485
200,0.0418,0.978635,0.804167,0.807657
225,0.0162,0.999167,0.80625,0.810232
250,0.0165,1.04538,0.80625,0.808574


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.2811,0.771552,0.7625,0.762593
50,0.5391,0.668558,0.7875,0.789392
75,0.2986,0.647695,0.808333,0.810272
100,0.1722,0.70163,0.810417,0.814004
125,0.1032,0.771657,0.80625,0.81047
150,0.0724,0.817346,0.797917,0.801422
175,0.0325,0.887113,0.802083,0.80485
200,0.0418,0.978635,0.804167,0.807657
225,0.0162,0.999167,0.80625,0.810232
250,0.0165,1.04538,0.80625,0.808574


DatasetDict({
    train: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 1919
    })
    test: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 480
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,2.034,1.969181,0.13125,0.033149
50,1.9691,1.966663,0.145833,0.036364
75,1.967,1.951666,0.158333,0.039054
100,1.9592,1.954454,0.13125,0.033149
125,1.9658,1.966113,0.154167,0.038164
150,1.9652,1.949532,0.158333,0.039054
175,1.9494,1.952034,0.16875,0.065493
200,1.9318,1.878046,0.225,0.140684
225,1.8486,1.816481,0.24375,0.210898
250,1.8292,1.829102,0.258333,0.198476


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,2.034,1.969181,0.13125,0.033149
50,1.9691,1.966663,0.145833,0.036364
75,1.967,1.951666,0.158333,0.039054
100,1.9592,1.954454,0.13125,0.033149
125,1.9658,1.966113,0.154167,0.038164
150,1.9652,1.949532,0.158333,0.039054
175,1.9494,1.952034,0.16875,0.065493
200,1.9318,1.878046,0.225,0.140684
225,1.8486,1.816481,0.24375,0.210898
250,1.8292,1.829102,0.258333,0.198476


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,2.034,1.969181,0.13125,0.033149
50,1.9691,1.966663,0.145833,0.036364
75,1.967,1.951666,0.158333,0.039054
100,1.9592,1.954454,0.13125,0.033149
125,1.9658,1.966113,0.154167,0.038164
150,1.9652,1.949532,0.158333,0.039054
175,1.9494,1.952034,0.16875,0.065493
200,1.9318,1.878046,0.225,0.140684
225,1.8486,1.816481,0.24375,0.210898
250,1.8292,1.829102,0.258333,0.198476


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,2.0413,1.94797,0.15,0.037267
50,1.9724,1.960099,0.135417,0.034076
75,1.9662,1.957088,0.1625,0.039939
100,1.9846,1.969649,0.145833,0.036364
125,1.9669,1.950071,0.135417,0.034076
150,1.9692,1.956401,0.15,0.037267
175,1.9607,1.953265,0.1375,0.034537
200,1.9667,1.971525,0.1375,0.034537
225,1.958,1.954722,0.160417,0.073739
250,1.9462,1.907465,0.208333,0.090814


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,2.0413,1.94797,0.15,0.037267
50,1.9724,1.960099,0.135417,0.034076
75,1.9662,1.957088,0.1625,0.039939
100,1.9846,1.969649,0.145833,0.036364
125,1.9669,1.950071,0.135417,0.034076
150,1.9692,1.956401,0.15,0.037267
175,1.9607,1.953265,0.1375,0.034537
200,1.9667,1.971525,0.1375,0.034537
225,1.958,1.954722,0.160417,0.073739
250,1.9462,1.907465,0.208333,0.090814


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,2.0413,1.94797,0.15,0.037267
50,1.9724,1.960099,0.135417,0.034076
75,1.9662,1.957088,0.1625,0.039939
100,1.9846,1.969649,0.145833,0.036364
125,1.9669,1.950071,0.135417,0.034076
150,1.9692,1.956401,0.15,0.037267
175,1.9607,1.953265,0.1375,0.034537
200,1.9667,1.971525,0.1375,0.034537
225,1.958,1.954722,0.160417,0.073739
250,1.9462,1.907465,0.208333,0.090814


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,2.0368,1.954506,0.18125,0.04384
50,1.9778,2.014264,0.145833,0.036364
75,1.9668,1.989754,0.145833,0.036364
100,1.9712,1.970622,0.14375,0.035909
125,1.9669,1.947315,0.145833,0.036364
150,1.9546,1.88394,0.220833,0.104834
175,1.8727,1.879016,0.204167,0.151837
200,1.8313,1.849543,0.1875,0.125737
225,1.7838,1.82915,0.264583,0.228431
250,1.7243,1.799717,0.266667,0.206518


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,2.0368,1.954506,0.18125,0.04384
50,1.9778,2.014264,0.145833,0.036364
75,1.9668,1.989754,0.145833,0.036364
100,1.9712,1.970622,0.14375,0.035909
125,1.9669,1.947315,0.145833,0.036364
150,1.9546,1.88394,0.220833,0.104834
175,1.8727,1.879016,0.204167,0.151837
200,1.8313,1.849543,0.1875,0.125737
225,1.7838,1.82915,0.264583,0.228431
250,1.7243,1.799717,0.266667,0.206518


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,2.0368,1.954506,0.18125,0.04384
50,1.9778,2.014264,0.145833,0.036364
75,1.9668,1.989754,0.145833,0.036364
100,1.9712,1.970622,0.14375,0.035909
125,1.9669,1.947315,0.145833,0.036364
150,1.9546,1.88394,0.220833,0.104834
175,1.8727,1.879016,0.204167,0.151837
200,1.8313,1.849543,0.1875,0.125737
225,1.7838,1.82915,0.264583,0.228431
250,1.7243,1.799717,0.266667,0.206518


DatasetDict({
    train: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 1919
    })
    test: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 480
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.3886,0.898182,0.7,0.702479
50,0.7014,0.731329,0.766667,0.767101
75,0.3823,0.710537,0.766667,0.766025
100,0.181,0.721491,0.770833,0.769948
125,0.0824,0.773198,0.785417,0.784778
150,0.0435,0.887003,0.789583,0.78938
175,0.0169,0.961218,0.795833,0.795368
200,0.0138,1.040312,0.789583,0.789597
225,0.0107,1.019923,0.80625,0.806102
250,0.0079,1.035478,0.810417,0.810724


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.3886,0.898182,0.7,0.702479
50,0.7014,0.731329,0.766667,0.767101
75,0.3823,0.710537,0.766667,0.766025
100,0.181,0.721491,0.770833,0.769948
125,0.0824,0.773198,0.785417,0.784778
150,0.0435,0.887003,0.789583,0.78938
175,0.0169,0.961218,0.795833,0.795368
200,0.0138,1.040312,0.789583,0.789597
225,0.0107,1.019923,0.80625,0.806102
250,0.0079,1.035478,0.810417,0.810724


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.3886,0.898182,0.7,0.702479
50,0.7014,0.731329,0.766667,0.767101
75,0.3823,0.710537,0.766667,0.766025
100,0.181,0.721491,0.770833,0.769948
125,0.0824,0.773198,0.785417,0.784778
150,0.0435,0.887003,0.789583,0.78938
175,0.0169,0.961218,0.795833,0.795368
200,0.0138,1.040312,0.789583,0.789597
225,0.0107,1.019923,0.80625,0.806102
250,0.0079,1.035478,0.810417,0.810724


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4193,0.899823,0.710417,0.704427
50,0.7059,0.71882,0.75625,0.75655
75,0.3671,0.741376,0.7375,0.736683
100,0.2066,0.730282,0.791667,0.791146
125,0.0823,0.818778,0.802083,0.801412
150,0.0386,0.971719,0.775,0.772376
175,0.0242,1.18669,0.760417,0.756006
200,0.026,1.178724,0.7625,0.761724
225,0.0106,1.02519,0.797917,0.797083
250,0.0124,1.057452,0.789583,0.789569


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4193,0.899823,0.710417,0.704427
50,0.7059,0.71882,0.75625,0.75655
75,0.3671,0.741376,0.7375,0.736683
100,0.2066,0.730282,0.791667,0.791146
125,0.0823,0.818778,0.802083,0.801412
150,0.0386,0.971719,0.775,0.772376
175,0.0242,1.18669,0.760417,0.756006
200,0.026,1.178724,0.7625,0.761724
225,0.0106,1.02519,0.797917,0.797083
250,0.0124,1.057452,0.789583,0.789569


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4193,0.899823,0.710417,0.704427
50,0.7059,0.71882,0.75625,0.75655
75,0.3671,0.741376,0.7375,0.736683
100,0.2066,0.730282,0.791667,0.791146
125,0.0823,0.818778,0.802083,0.801412
150,0.0386,0.971719,0.775,0.772376
175,0.0242,1.18669,0.760417,0.756006
200,0.026,1.178724,0.7625,0.761724
225,0.0106,1.02519,0.797917,0.797083
250,0.0124,1.057452,0.789583,0.789569


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4,0.939757,0.691667,0.691085
50,0.6376,0.807879,0.735417,0.737532
75,0.3699,0.777808,0.75625,0.760976
100,0.1701,0.862941,0.75,0.752794
125,0.0722,0.892245,0.777083,0.78173
150,0.0521,1.030017,0.7625,0.759135
175,0.022,1.105217,0.766667,0.765829
200,0.0204,1.158355,0.760417,0.765424
225,0.0154,1.21793,0.764583,0.765493
250,0.0127,1.241812,0.76875,0.771924


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4,0.939757,0.691667,0.691085
50,0.6376,0.807879,0.735417,0.737532
75,0.3699,0.777808,0.75625,0.760976
100,0.1701,0.862941,0.75,0.752794
125,0.0722,0.892245,0.777083,0.78173
150,0.0521,1.030017,0.7625,0.759135
175,0.022,1.105217,0.766667,0.765829
200,0.0204,1.158355,0.760417,0.765424
225,0.0154,1.21793,0.764583,0.765493
250,0.0127,1.241812,0.76875,0.771924


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4,0.939757,0.691667,0.691085
50,0.6376,0.807879,0.735417,0.737532
75,0.3699,0.777808,0.75625,0.760976
100,0.1701,0.862941,0.75,0.752794
125,0.0722,0.892245,0.777083,0.78173
150,0.0521,1.030017,0.7625,0.759135
175,0.022,1.105217,0.766667,0.765829
200,0.0204,1.158355,0.760417,0.765424
225,0.0154,1.21793,0.764583,0.765493
250,0.0127,1.241812,0.76875,0.771924


DatasetDict({
    train: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 1919
    })
    test: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 480
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5366,1.014449,0.647917,0.648209
50,0.7695,0.81265,0.733333,0.735025
75,0.4717,0.84304,0.7375,0.73531
100,0.2887,0.772175,0.75,0.749759
125,0.1683,0.936126,0.7375,0.734853
150,0.0801,0.975672,0.747917,0.744049
175,0.0395,1.10153,0.741667,0.736467
200,0.0278,1.139319,0.78125,0.780749
225,0.0308,1.239547,0.754167,0.752543
250,0.0178,1.184533,0.76875,0.770647


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5366,1.014449,0.647917,0.648209
50,0.7695,0.81265,0.733333,0.735025
75,0.4717,0.84304,0.7375,0.73531
100,0.2887,0.772175,0.75,0.749759
125,0.1683,0.936126,0.7375,0.734853
150,0.0801,0.975672,0.747917,0.744049
175,0.0395,1.10153,0.741667,0.736467
200,0.0278,1.139319,0.78125,0.780749
225,0.0308,1.239547,0.754167,0.752543
250,0.0178,1.184533,0.76875,0.770647


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5366,1.014449,0.647917,0.648209
50,0.7695,0.81265,0.733333,0.735025
75,0.4717,0.84304,0.7375,0.73531
100,0.2887,0.772175,0.75,0.749759
125,0.1683,0.936126,0.7375,0.734853
150,0.0801,0.975672,0.747917,0.744049
175,0.0395,1.10153,0.741667,0.736467
200,0.0278,1.139319,0.78125,0.780749
225,0.0308,1.239547,0.754167,0.752543
250,0.0178,1.184533,0.76875,0.770647


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5224,1.032828,0.639583,0.626189
50,0.8174,0.798223,0.716667,0.713822
75,0.4691,0.79838,0.741667,0.73547
100,0.2786,0.903616,0.754167,0.750076
125,0.1493,0.968609,0.75,0.743324
150,0.0686,1.041673,0.75625,0.753497
175,0.0353,1.256595,0.75,0.745889
200,0.0293,1.231501,0.752083,0.751178
225,0.016,1.213194,0.775,0.773196
250,0.0162,1.379692,0.75625,0.753274


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5224,1.032828,0.639583,0.626189
50,0.8174,0.798223,0.716667,0.713822
75,0.4691,0.79838,0.741667,0.73547
100,0.2786,0.903616,0.754167,0.750076
125,0.1493,0.968609,0.75,0.743324
150,0.0686,1.041673,0.75625,0.753497
175,0.0353,1.256595,0.75,0.745889
200,0.0293,1.231501,0.752083,0.751178
225,0.016,1.213194,0.775,0.773196
250,0.0162,1.379692,0.75625,0.753274


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5224,1.032828,0.639583,0.626189
50,0.8174,0.798223,0.716667,0.713822
75,0.4691,0.79838,0.741667,0.73547
100,0.2786,0.903616,0.754167,0.750076
125,0.1493,0.968609,0.75,0.743324
150,0.0686,1.041673,0.75625,0.753497
175,0.0353,1.256595,0.75,0.745889
200,0.0293,1.231501,0.752083,0.751178
225,0.016,1.213194,0.775,0.773196
250,0.0162,1.379692,0.75625,0.753274


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5313,1.025934,0.675,0.678755
50,0.7886,0.842168,0.729167,0.73191
75,0.4429,0.80455,0.754167,0.762404
100,0.2437,0.917342,0.745833,0.741507
125,0.118,1.001361,0.745833,0.751801
150,0.0723,1.059842,0.747917,0.752757
175,0.0451,1.190081,0.733333,0.737599
200,0.0554,1.137125,0.7625,0.768614
225,0.0224,1.227654,0.752083,0.756656
250,0.0161,1.243558,0.75,0.75701


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5313,1.025934,0.675,0.678755
50,0.7886,0.842168,0.729167,0.73191
75,0.4429,0.80455,0.754167,0.762404
100,0.2437,0.917342,0.745833,0.741507
125,0.118,1.001361,0.745833,0.751801
150,0.0723,1.059842,0.747917,0.752757
175,0.0451,1.190081,0.733333,0.737599
200,0.0554,1.137125,0.7625,0.768614
225,0.0224,1.227654,0.752083,0.756656
250,0.0161,1.243558,0.75,0.75701


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5313,1.025934,0.675,0.678755
50,0.7886,0.842168,0.729167,0.73191
75,0.4429,0.80455,0.754167,0.762404
100,0.2437,0.917342,0.745833,0.741507
125,0.118,1.001361,0.745833,0.751801
150,0.0723,1.059842,0.747917,0.752757
175,0.0451,1.190081,0.733333,0.737599
200,0.0554,1.137125,0.7625,0.768614
225,0.0224,1.227654,0.752083,0.756656
250,0.0161,1.243558,0.75,0.75701


DatasetDict({
    train: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 1919
    })
    test: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 480
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.6688,1.206065,0.591667,0.592985
50,0.9555,0.947075,0.685417,0.688493
75,0.5807,0.900047,0.714583,0.717614
100,0.3327,0.888587,0.747917,0.750431
125,0.1788,0.976145,0.725,0.726544
150,0.0967,1.045437,0.729167,0.729874
175,0.0536,1.119776,0.725,0.726417
200,0.0341,1.22763,0.733333,0.735745
225,0.0195,1.245796,0.74375,0.746395
250,0.015,1.325438,0.73125,0.732733


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.6688,1.206065,0.591667,0.592985
50,0.9555,0.947075,0.685417,0.688493
75,0.5807,0.900047,0.714583,0.717614
100,0.3327,0.888587,0.747917,0.750431
125,0.1788,0.976145,0.725,0.726544
150,0.0967,1.045437,0.729167,0.729874
175,0.0536,1.119776,0.725,0.726417
200,0.0341,1.22763,0.733333,0.735745
225,0.0195,1.245796,0.74375,0.746395
250,0.015,1.325438,0.73125,0.732733


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.6688,1.206065,0.591667,0.592985
50,0.9555,0.947075,0.685417,0.688493
75,0.5807,0.900047,0.714583,0.717614
100,0.3327,0.888587,0.747917,0.750431
125,0.1788,0.976145,0.725,0.726544
150,0.0967,1.045437,0.729167,0.729874
175,0.0536,1.119776,0.725,0.726417
200,0.0341,1.22763,0.733333,0.735745
225,0.0195,1.245796,0.74375,0.746395
250,0.015,1.325438,0.73125,0.732733


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.6443,1.145029,0.595833,0.580887
50,0.9716,0.929896,0.658333,0.652369
75,0.5908,0.874545,0.7125,0.708619
100,0.3679,0.897956,0.710417,0.704605
125,0.1894,0.974132,0.725,0.722739
150,0.1063,1.053923,0.722917,0.718335
175,0.0512,1.174,0.7125,0.711161
200,0.0366,1.263888,0.735417,0.733075
225,0.0308,1.307122,0.70625,0.70027
250,0.0143,1.366643,0.710417,0.706864


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.6443,1.145029,0.595833,0.580887
50,0.9716,0.929896,0.658333,0.652369
75,0.5908,0.874545,0.7125,0.708619
100,0.3679,0.897956,0.710417,0.704605
125,0.1894,0.974132,0.725,0.722739
150,0.1063,1.053923,0.722917,0.718335
175,0.0512,1.174,0.7125,0.711161
200,0.0366,1.263888,0.735417,0.733075
225,0.0308,1.307122,0.70625,0.70027
250,0.0143,1.366643,0.710417,0.706864


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.6443,1.145029,0.595833,0.580887
50,0.9716,0.929896,0.658333,0.652369
75,0.5908,0.874545,0.7125,0.708619
100,0.3679,0.897956,0.710417,0.704605
125,0.1894,0.974132,0.725,0.722739
150,0.1063,1.053923,0.722917,0.718335
175,0.0512,1.174,0.7125,0.711161
200,0.0366,1.263888,0.735417,0.733075
225,0.0308,1.307122,0.70625,0.70027
250,0.0143,1.366643,0.710417,0.706864


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.6511,1.294646,0.533333,0.523719
50,0.9436,1.074046,0.633333,0.635097
75,0.6085,1.008356,0.664583,0.669655
100,0.317,1.062713,0.666667,0.668912
125,0.1975,1.158361,0.666667,0.671576
150,0.106,1.233513,0.675,0.679361
175,0.0481,1.317482,0.695833,0.70073
200,0.0478,1.413756,0.670833,0.677346
225,0.0174,1.478149,0.691667,0.694751
250,0.0191,1.561362,0.697917,0.699337


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.6511,1.294646,0.533333,0.523719
50,0.9436,1.074046,0.633333,0.635097
75,0.6085,1.008356,0.664583,0.669655
100,0.317,1.062713,0.666667,0.668912
125,0.1975,1.158361,0.666667,0.671576
150,0.106,1.233513,0.675,0.679361
175,0.0481,1.317482,0.695833,0.70073
200,0.0478,1.413756,0.670833,0.677346
225,0.0174,1.478149,0.691667,0.694751
250,0.0191,1.561362,0.697917,0.699337


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.6511,1.294646,0.533333,0.523719
50,0.9436,1.074046,0.633333,0.635097
75,0.6085,1.008356,0.664583,0.669655
100,0.317,1.062713,0.666667,0.668912
125,0.1975,1.158361,0.666667,0.671576
150,0.106,1.233513,0.675,0.679361
175,0.0481,1.317482,0.695833,0.70073
200,0.0478,1.413756,0.670833,0.677346
225,0.0174,1.478149,0.691667,0.694751
250,0.0191,1.561362,0.697917,0.699337


DatasetDict({
    train: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 1919
    })
    test: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 480
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5257,1.16015,0.627083,0.636201
50,0.8338,0.948233,0.689583,0.693585
75,0.5079,0.861818,0.7125,0.714073
100,0.2715,0.903307,0.733333,0.73712
125,0.1316,1.003772,0.733333,0.738332
150,0.0631,1.205986,0.725,0.725772
175,0.0326,1.283381,0.71875,0.716198
200,0.0186,1.371716,0.73125,0.732278
225,0.0161,1.485464,0.7,0.700585
250,0.0141,1.479308,0.704167,0.707396


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5257,1.16015,0.627083,0.636201
50,0.8338,0.948233,0.689583,0.693585
75,0.5079,0.861818,0.7125,0.714073
100,0.2715,0.903307,0.733333,0.73712
125,0.1316,1.003772,0.733333,0.738332
150,0.0631,1.205986,0.725,0.725772
175,0.0326,1.283381,0.71875,0.716198
200,0.0186,1.371716,0.73125,0.732278
225,0.0161,1.485464,0.7,0.700585
250,0.0141,1.479308,0.704167,0.707396


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.5257,1.16015,0.627083,0.636201
50,0.8338,0.948233,0.689583,0.693585
75,0.5079,0.861818,0.7125,0.714073
100,0.2715,0.903307,0.733333,0.73712
125,0.1316,1.003772,0.733333,0.738332
150,0.0631,1.205986,0.725,0.725772
175,0.0326,1.283381,0.71875,0.716198
200,0.0186,1.371716,0.73125,0.732278
225,0.0161,1.485464,0.7,0.700585
250,0.0141,1.479308,0.704167,0.707396


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4932,1.068575,0.654167,0.652266
50,0.8289,0.871804,0.7,0.696639
75,0.4712,0.853406,0.725,0.72027
100,0.2602,0.908631,0.729167,0.725099
125,0.1341,0.974752,0.752083,0.748821
150,0.0763,1.107046,0.733333,0.729743
175,0.0413,1.242257,0.710417,0.706979
200,0.024,1.190388,0.741667,0.738152
225,0.0223,1.328773,0.727083,0.722704
250,0.0127,1.341473,0.747917,0.745397


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4932,1.068575,0.654167,0.652266
50,0.8289,0.871804,0.7,0.696639
75,0.4712,0.853406,0.725,0.72027
100,0.2602,0.908631,0.729167,0.725099
125,0.1341,0.974752,0.752083,0.748821
150,0.0763,1.107046,0.733333,0.729743
175,0.0413,1.242257,0.710417,0.706979
200,0.024,1.190388,0.741667,0.738152
225,0.0223,1.328773,0.727083,0.722704
250,0.0127,1.341473,0.747917,0.745397


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4932,1.068575,0.654167,0.652266
50,0.8289,0.871804,0.7,0.696639
75,0.4712,0.853406,0.725,0.72027
100,0.2602,0.908631,0.729167,0.725099
125,0.1341,0.974752,0.752083,0.748821
150,0.0763,1.107046,0.733333,0.729743
175,0.0413,1.242257,0.710417,0.706979
200,0.024,1.190388,0.741667,0.738152
225,0.0223,1.328773,0.727083,0.722704
250,0.0127,1.341473,0.747917,0.745397


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4799,1.155702,0.614583,0.614182
50,0.8225,0.963618,0.675,0.678448
75,0.4653,0.983825,0.714583,0.718559
100,0.2349,1.044207,0.7,0.704417
125,0.1233,1.202165,0.697917,0.700769
150,0.0663,1.241941,0.695833,0.699047
175,0.0304,1.366338,0.7,0.703801
200,0.0509,1.440182,0.691667,0.696254
225,0.0163,1.541231,0.69375,0.699595
250,0.0106,1.551683,0.689583,0.695911


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4799,1.155702,0.614583,0.614182
50,0.8225,0.963618,0.675,0.678448
75,0.4653,0.983825,0.714583,0.718559
100,0.2349,1.044207,0.7,0.704417
125,0.1233,1.202165,0.697917,0.700769
150,0.0663,1.241941,0.695833,0.699047
175,0.0304,1.366338,0.7,0.703801
200,0.0509,1.440182,0.691667,0.696254
225,0.0163,1.541231,0.69375,0.699595
250,0.0106,1.551683,0.689583,0.695911


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,1.4799,1.155702,0.614583,0.614182
50,0.8225,0.963618,0.675,0.678448
75,0.4653,0.983825,0.714583,0.718559
100,0.2349,1.044207,0.7,0.704417
125,0.1233,1.202165,0.697917,0.700769
150,0.0663,1.241941,0.695833,0.699047
175,0.0304,1.366338,0.7,0.703801
200,0.0509,1.440182,0.691667,0.696254
225,0.0163,1.541231,0.69375,0.699595
250,0.0106,1.551683,0.689583,0.695911


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.795833,0.793818
3,alger-ia/dziribert,0.816667,0.817497
9,faisalq/EgyBERT,0.822917,0.823959
11,faisalq/SaudiBERT,0.835417,0.834487
14,otmangi/MorRoBERTa,0.75,0.75363
17,otmangi/MorrBERT,0.752083,0.748821
20,tunis-ai/TunBERT,0.414583,0.415796
