In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'MAC_2'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/MAC corpus.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3


      
display(df.columns)
display(df[:4])

df['tweets'] = df['tweets'].astype('str')

classes = set(df['type'].values)
display(classes)

c = df['type'].value_counts()
display(c)

df['type'] = df['type'].astype('category')
df['label'] = df['type'].cat.codes

df = df[['tweets', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))


# ds = Dataset.from_pandas(df)
# ds = ds.train_test_split(test_size=0.2)

# display(ds)

max_sequence_length = 128


models = [ 
        'faisalq/EgyBERT',            
    'faisalq/SaudiBERT',            
    'tunis-ai/TunBERT',
    'alger-ia/dziribert',
    'SI2M-Lab/DarijaBERT',
    'otmangi/MorRoBERTa',
    'otmangi/MorrBERT'
            
]


seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['tweets'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True) # , batched=True
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)  # , batched=True
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 10
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 80, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 80
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



Index(['tweets', 'type', 'class'], dtype='object')

Unnamed: 0,tweets,type,class
0,ماهي آرائكم متتبعي الكرام,neutral,standard
1,عبقرية المدرب وروعة اللاعبين,positive,standard
2,أمة عربية واحدة خالدة,positive,standard
3,عاد إليكم من جديد وأخيرا درت جيم لهاد الصفحة,neutral,dialectal


{'mixed', 'negative', 'neutral', 'positive'}

type
positive    9897
neutral     4039
negative    3508
mixed        643
Name: count, dtype: int64

4

18087

DatasetDict({
    train: Dataset({
        features: ['tweets', 'label'],
        num_rows: 14469
    })
    test: Dataset({
        features: ['tweets', 'label'],
        num_rows: 3618
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1772,0.910012,0.772526,0.54672
160,0.8086,0.66408,0.818408,0.594674
240,0.6185,0.546926,0.83665,0.609404
320,0.4837,0.476078,0.842178,0.618763
400,0.4497,0.436467,0.855721,0.627696
480,0.3876,0.414397,0.869541,0.640759
560,0.3036,0.388091,0.8712,0.648004
640,0.293,0.397655,0.878939,0.704229
720,0.256,0.4341,0.879491,0.756332
800,0.2113,0.391947,0.883637,0.768831


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1578,0.88208,0.704809,0.400416
160,0.8035,0.667633,0.806799,0.576324
240,0.6327,0.543904,0.84052,0.6124
320,0.4943,0.475724,0.848811,0.622743
400,0.4536,0.443025,0.861249,0.632022
480,0.3895,0.421924,0.865119,0.634831
560,0.311,0.384418,0.873134,0.665601
640,0.2922,0.388405,0.877833,0.695935
720,0.2596,0.387907,0.883085,0.758585
800,0.212,0.387049,0.888336,0.773624


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1578,0.88208,0.704809,0.400416
160,0.8035,0.667633,0.806799,0.576324
240,0.6327,0.543904,0.84052,0.6124
320,0.4943,0.475724,0.848811,0.622743
400,0.4536,0.443025,0.861249,0.632022
480,0.3895,0.421924,0.865119,0.634831
560,0.311,0.384418,0.873134,0.665601
640,0.2922,0.388405,0.877833,0.695935
720,0.2596,0.387907,0.883085,0.758585
800,0.212,0.387049,0.888336,0.773624


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.183,0.960736,0.710061,0.4162
160,0.8416,0.71153,0.752626,0.492742
240,0.6594,0.599496,0.812604,0.585209
320,0.5267,0.536255,0.826147,0.600904
400,0.4767,0.493341,0.834992,0.606636
480,0.3853,0.465931,0.852957,0.626375
560,0.3128,0.446332,0.857656,0.632644
640,0.3045,0.437651,0.861802,0.686145
720,0.2506,0.451197,0.865948,0.735613
800,0.1945,0.440127,0.872582,0.753454


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.183,0.960736,0.710061,0.4162
160,0.8416,0.71153,0.752626,0.492742
240,0.6594,0.599496,0.812604,0.585209
320,0.5267,0.536255,0.826147,0.600904
400,0.4767,0.493341,0.834992,0.606636
480,0.3853,0.465931,0.852957,0.626375
560,0.3128,0.446332,0.857656,0.632644
640,0.3045,0.437651,0.861802,0.686145
720,0.2506,0.451197,0.865948,0.735613
800,0.1945,0.440127,0.872582,0.753454


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.183,0.960736,0.710061,0.4162
160,0.8416,0.71153,0.752626,0.492742
240,0.6594,0.599496,0.812604,0.585209
320,0.5267,0.536255,0.826147,0.600904
400,0.4767,0.493341,0.834992,0.606636
480,0.3853,0.465931,0.852957,0.626375
560,0.3128,0.446332,0.857656,0.632644
640,0.3045,0.437651,0.861802,0.686145
720,0.2506,0.451197,0.865948,0.735613
800,0.1945,0.440127,0.872582,0.753454


faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1568,0.899167,0.718629,0.440839
160,0.8013,0.676461,0.810116,0.589772
240,0.6351,0.562309,0.831122,0.606538
320,0.4913,0.502254,0.838861,0.61875
400,0.453,0.454736,0.845218,0.620135
480,0.381,0.432814,0.857933,0.633292
560,0.3043,0.440143,0.854063,0.62913
640,0.2976,0.424244,0.866777,0.682455
720,0.2498,0.448682,0.874793,0.761626
800,0.2012,0.426428,0.876175,0.772762


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1568,0.899167,0.718629,0.440839
160,0.8013,0.676461,0.810116,0.589772
240,0.6351,0.562309,0.831122,0.606538
320,0.4913,0.502254,0.838861,0.61875
400,0.453,0.454736,0.845218,0.620135
480,0.381,0.432814,0.857933,0.633292
560,0.3043,0.440143,0.854063,0.62913
640,0.2976,0.424244,0.866777,0.682455
720,0.2498,0.448682,0.874793,0.761626
800,0.2012,0.426428,0.876175,0.772762


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1568,0.899167,0.718629,0.440839
160,0.8013,0.676461,0.810116,0.589772
240,0.6351,0.562309,0.831122,0.606538
320,0.4913,0.502254,0.838861,0.61875
400,0.453,0.454736,0.845218,0.620135
480,0.381,0.432814,0.857933,0.633292
560,0.3043,0.440143,0.854063,0.62913
640,0.2976,0.424244,0.866777,0.682455
720,0.2498,0.448682,0.874793,0.761626
800,0.2012,0.426428,0.876175,0.772762


DatasetDict({
    train: Dataset({
        features: ['tweets', 'label'],
        num_rows: 14469
    })
    test: Dataset({
        features: ['tweets', 'label'],
        num_rows: 3618
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.6446,0.543701,0.795191,0.580667
160,0.4701,0.403207,0.854339,0.690439
240,0.3739,0.391685,0.877557,0.777461
320,0.2406,0.393597,0.873411,0.776623
400,0.2228,0.339826,0.896352,0.820837
480,0.1766,0.440901,0.888336,0.795433
560,0.1067,0.402051,0.896075,0.823885
640,0.1017,0.451153,0.896628,0.823193
720,0.0785,0.463689,0.899668,0.824277
800,0.0683,0.433733,0.901603,0.82999


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.6446,0.543701,0.795191,0.580667
160,0.4701,0.403207,0.854339,0.690439
240,0.3739,0.391685,0.877557,0.777461
320,0.2406,0.393597,0.873411,0.776623
400,0.2228,0.339826,0.896352,0.820837
480,0.1766,0.440901,0.888336,0.795433
560,0.1067,0.402051,0.896075,0.823885
640,0.1017,0.451153,0.896628,0.823193
720,0.0785,0.463689,0.899668,0.824277
800,0.0683,0.433733,0.901603,0.82999


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.6446,0.543701,0.795191,0.580667
160,0.4701,0.403207,0.854339,0.690439
240,0.3739,0.391685,0.877557,0.777461
320,0.2406,0.393597,0.873411,0.776623
400,0.2228,0.339826,0.896352,0.820837
480,0.1766,0.440901,0.888336,0.795433
560,0.1067,0.402051,0.896075,0.823885
640,0.1017,0.451153,0.896628,0.823193
720,0.0785,0.463689,0.899668,0.824277
800,0.0683,0.433733,0.901603,0.82999


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.6341,0.482755,0.826976,0.632347
160,0.4499,0.418987,0.8466,0.710102
240,0.3861,0.418229,0.871476,0.765987
320,0.2279,0.425243,0.867606,0.742411
400,0.2333,0.366208,0.886678,0.791106
480,0.1488,0.440347,0.890271,0.800282
560,0.1171,0.394237,0.89497,0.816988
640,0.0968,0.475081,0.883361,0.794281
720,0.0954,0.478023,0.893311,0.804918
800,0.0661,0.486449,0.893035,0.812552


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.6341,0.482755,0.826976,0.632347
160,0.4499,0.418987,0.8466,0.710102
240,0.3861,0.418229,0.871476,0.765987
320,0.2279,0.425243,0.867606,0.742411
400,0.2333,0.366208,0.886678,0.791106
480,0.1488,0.440347,0.890271,0.800282
560,0.1171,0.394237,0.89497,0.816988
640,0.0968,0.475081,0.883361,0.794281
720,0.0954,0.478023,0.893311,0.804918
800,0.0661,0.486449,0.893035,0.812552


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.6341,0.482755,0.826976,0.632347
160,0.4499,0.418987,0.8466,0.710102
240,0.3861,0.418229,0.871476,0.765987
320,0.2279,0.425243,0.867606,0.742411
400,0.2333,0.366208,0.886678,0.791106
480,0.1488,0.440347,0.890271,0.800282
560,0.1171,0.394237,0.89497,0.816988
640,0.0968,0.475081,0.883361,0.794281
720,0.0954,0.478023,0.893311,0.804918
800,0.0661,0.486449,0.893035,0.812552


faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.6212,0.496423,0.819237,0.637957
160,0.4958,0.398133,0.859867,0.726142
240,0.3846,0.384782,0.871476,0.766381
320,0.2248,0.388294,0.880873,0.785387
400,0.2149,0.357009,0.889165,0.807194
480,0.1672,0.387873,0.891929,0.820447
560,0.1139,0.453893,0.889165,0.807048
640,0.1032,0.462731,0.888889,0.810357
720,0.075,0.510053,0.895246,0.827243
800,0.0749,0.493667,0.893035,0.821179


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.6212,0.496423,0.819237,0.637957
160,0.4958,0.398133,0.859867,0.726142
240,0.3846,0.384782,0.871476,0.766381
320,0.2248,0.388294,0.880873,0.785387
400,0.2149,0.357009,0.889165,0.807194
480,0.1672,0.387873,0.891929,0.820447
560,0.1139,0.453893,0.889165,0.807048
640,0.1032,0.462731,0.888889,0.810357
720,0.075,0.510053,0.895246,0.827243
800,0.0749,0.493667,0.893035,0.821179


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.6212,0.496423,0.819237,0.637957
160,0.4958,0.398133,0.859867,0.726142
240,0.3846,0.384782,0.871476,0.766381
320,0.2248,0.388294,0.880873,0.785387
400,0.2149,0.357009,0.889165,0.807194
480,0.1672,0.387873,0.891929,0.820447
560,0.1139,0.453893,0.889165,0.807048
640,0.1032,0.462731,0.888889,0.810357
720,0.075,0.510053,0.895246,0.827243
800,0.0749,0.493667,0.893035,0.821179


DatasetDict({
    train: Dataset({
        features: ['tweets', 'label'],
        num_rows: 14469
    })
    test: Dataset({
        features: ['tweets', 'label'],
        num_rows: 3618
    })
})

tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1284,1.1053,0.482034,0.250425
160,1.0731,1.006594,0.563018,0.193819
240,1.0401,1.002104,0.556661,0.276359
320,1.0382,0.989321,0.560254,0.271248
400,1.0057,1.004147,0.549751,0.275969
480,1.0241,0.984106,0.57435,0.304976
560,1.002,0.997178,0.5445,0.2901
640,0.9913,0.965805,0.567441,0.241017
720,0.962,1.013663,0.576009,0.286966
800,0.9556,0.95423,0.575456,0.332636


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1284,1.1053,0.482034,0.250425
160,1.0731,1.006594,0.563018,0.193819
240,1.0401,1.002104,0.556661,0.276359
320,1.0382,0.989321,0.560254,0.271248
400,1.0057,1.004147,0.549751,0.275969
480,1.0241,0.984106,0.57435,0.304976
560,1.002,0.997178,0.5445,0.2901
640,0.9913,0.965805,0.567441,0.241017
720,0.962,1.013663,0.576009,0.286966
800,0.9556,0.95423,0.575456,0.332636


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1284,1.1053,0.482034,0.250425
160,1.0731,1.006594,0.563018,0.193819
240,1.0401,1.002104,0.556661,0.276359
320,1.0382,0.989321,0.560254,0.271248
400,1.0057,1.004147,0.549751,0.275969
480,1.0241,0.984106,0.57435,0.304976
560,1.002,0.997178,0.5445,0.2901
640,0.9913,0.965805,0.567441,0.241017
720,0.962,1.013663,0.576009,0.286966
800,0.9556,0.95423,0.575456,0.332636


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1303,1.068016,0.55749,0.178971
160,1.085,1.023155,0.55749,0.178971
240,1.0589,1.001812,0.565229,0.253576
320,1.0301,1.044184,0.564953,0.223448
400,1.0193,0.997346,0.55141,0.277117
480,0.9987,0.984896,0.567717,0.289845
560,1.0013,0.983463,0.563847,0.289048
640,0.9855,0.992035,0.566335,0.25958
720,0.9853,0.968239,0.570481,0.280416
800,0.9686,0.979761,0.554174,0.297559


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1303,1.068016,0.55749,0.178971
160,1.085,1.023155,0.55749,0.178971
240,1.0589,1.001812,0.565229,0.253576
320,1.0301,1.044184,0.564953,0.223448
400,1.0193,0.997346,0.55141,0.277117
480,0.9987,0.984896,0.567717,0.289845
560,1.0013,0.983463,0.563847,0.289048
640,0.9855,0.992035,0.566335,0.25958
720,0.9853,0.968239,0.570481,0.280416
800,0.9686,0.979761,0.554174,0.297559


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1303,1.068016,0.55749,0.178971
160,1.085,1.023155,0.55749,0.178971
240,1.0589,1.001812,0.565229,0.253576
320,1.0301,1.044184,0.564953,0.223448
400,1.0193,0.997346,0.55141,0.277117
480,0.9987,0.984896,0.567717,0.289845
560,1.0013,0.983463,0.563847,0.289048
640,0.9855,0.992035,0.566335,0.25958
720,0.9853,0.968239,0.570481,0.280416
800,0.9686,0.979761,0.554174,0.297559


tunis-ai/TunBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1146,1.051322,0.525981,0.252828
160,1.0638,1.007896,0.558596,0.285346
240,1.0495,1.035597,0.488944,0.269117
320,1.0185,0.988897,0.566888,0.276663
400,1.0186,0.988837,0.546711,0.274806
480,0.9882,0.971189,0.558872,0.281794
560,0.9811,1.040122,0.566059,0.244673
640,0.9858,0.964334,0.585406,0.316109
720,0.9725,0.96459,0.583195,0.313883
800,0.9725,0.97527,0.556661,0.327899


tunis-ai/TunBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1146,1.051322,0.525981,0.252828
160,1.0638,1.007896,0.558596,0.285346
240,1.0495,1.035597,0.488944,0.269117
320,1.0185,0.988897,0.566888,0.276663
400,1.0186,0.988837,0.546711,0.274806
480,0.9882,0.971189,0.558872,0.281794
560,0.9811,1.040122,0.566059,0.244673
640,0.9858,0.964334,0.585406,0.316109
720,0.9725,0.96459,0.583195,0.313883
800,0.9725,0.97527,0.556661,0.327899


tunis-ai/TunBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tunis-ai/TunBERT and are newly initialized: ['classifier.bias', 'classifier.weight', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.word_embeddings.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.wei

Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,1.1146,1.051322,0.525981,0.252828
160,1.0638,1.007896,0.558596,0.285346
240,1.0495,1.035597,0.488944,0.269117
320,1.0185,0.988897,0.566888,0.276663
400,1.0186,0.988837,0.546711,0.274806
480,0.9882,0.971189,0.558872,0.281794
560,0.9811,1.040122,0.566059,0.244673
640,0.9858,0.964334,0.585406,0.316109
720,0.9725,0.96459,0.583195,0.313883
800,0.9725,0.97527,0.556661,0.327899


DatasetDict({
    train: Dataset({
        features: ['tweets', 'label'],
        num_rows: 14469
    })
    test: Dataset({
        features: ['tweets', 'label'],
        num_rows: 3618
    })
})

alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7114,0.600578,0.770315,0.55548
160,0.5628,0.482046,0.821448,0.614642
240,0.454,0.461653,0.844389,0.693453
320,0.2728,0.474056,0.844666,0.691889
400,0.2494,0.422411,0.864013,0.746237
480,0.195,0.520346,0.867883,0.765875
560,0.1136,0.542095,0.861802,0.758092
640,0.1126,0.544756,0.868988,0.776356
720,0.102,0.569597,0.8712,0.770083
800,0.0722,0.585998,0.875345,0.777491


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7114,0.600578,0.770315,0.55548
160,0.5628,0.482046,0.821448,0.614642
240,0.454,0.461653,0.844389,0.693453
320,0.2728,0.474056,0.844666,0.691889
400,0.2494,0.422411,0.864013,0.746237
480,0.195,0.520346,0.867883,0.765875
560,0.1136,0.542095,0.861802,0.758092
640,0.1126,0.544756,0.868988,0.776356
720,0.102,0.569597,0.8712,0.770083
800,0.0722,0.585998,0.875345,0.777491


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7114,0.600578,0.770315,0.55548
160,0.5628,0.482046,0.821448,0.614642
240,0.454,0.461653,0.844389,0.693453
320,0.2728,0.474056,0.844666,0.691889
400,0.2494,0.422411,0.864013,0.746237
480,0.195,0.520346,0.867883,0.765875
560,0.1136,0.542095,0.861802,0.758092
640,0.1126,0.544756,0.868988,0.776356
720,0.102,0.569597,0.8712,0.770083
800,0.0722,0.585998,0.875345,0.777491


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.728,0.586255,0.785793,0.563649
160,0.5515,0.495278,0.811222,0.647517
240,0.458,0.518481,0.830569,0.708879
320,0.254,0.527058,0.841349,0.697358
400,0.2749,0.462947,0.857656,0.750712
480,0.1837,0.592593,0.856551,0.747783
560,0.1203,0.588174,0.855998,0.73937
640,0.1192,0.5456,0.855721,0.743429
720,0.1074,0.634824,0.86042,0.753684
800,0.0794,0.628436,0.862078,0.742832


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.728,0.586255,0.785793,0.563649
160,0.5515,0.495278,0.811222,0.647517
240,0.458,0.518481,0.830569,0.708879
320,0.254,0.527058,0.841349,0.697358
400,0.2749,0.462947,0.857656,0.750712
480,0.1837,0.592593,0.856551,0.747783
560,0.1203,0.588174,0.855998,0.73937
640,0.1192,0.5456,0.855721,0.743429
720,0.1074,0.634824,0.86042,0.753684
800,0.0794,0.628436,0.862078,0.742832


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.728,0.586255,0.785793,0.563649
160,0.5515,0.495278,0.811222,0.647517
240,0.458,0.518481,0.830569,0.708879
320,0.254,0.527058,0.841349,0.697358
400,0.2749,0.462947,0.857656,0.750712
480,0.1837,0.592593,0.856551,0.747783
560,0.1203,0.588174,0.855998,0.73937
640,0.1192,0.5456,0.855721,0.743429
720,0.1074,0.634824,0.86042,0.753684
800,0.0794,0.628436,0.862078,0.742832


alger-ia/dziribert, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7039,0.600609,0.772803,0.557633
160,0.5575,0.488068,0.819237,0.635259
240,0.4637,0.470338,0.835821,0.702779
320,0.2691,0.446147,0.84743,0.726399
400,0.2556,0.491177,0.854892,0.755281
480,0.1909,0.538926,0.850193,0.756566
560,0.1134,0.569967,0.861526,0.769099
640,0.1186,0.530933,0.863737,0.773078
720,0.0841,0.625041,0.867054,0.785844
800,0.0677,0.66358,0.866224,0.772008


alger-ia/dziribert, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7039,0.600609,0.772803,0.557633
160,0.5575,0.488068,0.819237,0.635259
240,0.4637,0.470338,0.835821,0.702779
320,0.2691,0.446147,0.84743,0.726399
400,0.2556,0.491177,0.854892,0.755281
480,0.1909,0.538926,0.850193,0.756566
560,0.1134,0.569967,0.861526,0.769099
640,0.1186,0.530933,0.863737,0.773078
720,0.0841,0.625041,0.867054,0.785844
800,0.0677,0.66358,0.866224,0.772008


alger-ia/dziribert, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alger-ia/dziribert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7039,0.600609,0.772803,0.557633
160,0.5575,0.488068,0.819237,0.635259
240,0.4637,0.470338,0.835821,0.702779
320,0.2691,0.446147,0.84743,0.726399
400,0.2556,0.491177,0.854892,0.755281
480,0.1909,0.538926,0.850193,0.756566
560,0.1134,0.569967,0.861526,0.769099
640,0.1186,0.530933,0.863737,0.773078
720,0.0841,0.625041,0.867054,0.785844
800,0.0677,0.66358,0.866224,0.772008


DatasetDict({
    train: Dataset({
        features: ['tweets', 'label'],
        num_rows: 14469
    })
    test: Dataset({
        features: ['tweets', 'label'],
        num_rows: 3618
    })
})

SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.762,0.644586,0.759812,0.540449
160,0.5928,0.502976,0.820066,0.625149
240,0.501,0.51562,0.836374,0.701419
320,0.3392,0.486978,0.834992,0.668491
400,0.3156,0.459927,0.848535,0.728115
480,0.2581,0.531814,0.865119,0.766917
560,0.1547,0.564025,0.857103,0.744811
640,0.1636,0.54036,0.86429,0.756202
720,0.1381,0.587121,0.864566,0.767554
800,0.0946,0.571209,0.865395,0.767198


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.762,0.644586,0.759812,0.540449
160,0.5928,0.502976,0.820066,0.625149
240,0.501,0.51562,0.836374,0.701419
320,0.3392,0.486978,0.834992,0.668491
400,0.3156,0.459927,0.848535,0.728115
480,0.2581,0.531814,0.865119,0.766917
560,0.1547,0.564025,0.857103,0.744811
640,0.1636,0.54036,0.86429,0.756202
720,0.1381,0.587121,0.864566,0.767554
800,0.0946,0.571209,0.865395,0.767198


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.762,0.644586,0.759812,0.540449
160,0.5928,0.502976,0.820066,0.625149
240,0.501,0.51562,0.836374,0.701419
320,0.3392,0.486978,0.834992,0.668491
400,0.3156,0.459927,0.848535,0.728115
480,0.2581,0.531814,0.865119,0.766917
560,0.1547,0.564025,0.857103,0.744811
640,0.1636,0.54036,0.86429,0.756202
720,0.1381,0.587121,0.864566,0.767554
800,0.0946,0.571209,0.865395,0.767198


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7932,0.629014,0.748203,0.560024
160,0.6054,0.546622,0.788004,0.613108
240,0.5157,0.540346,0.818132,0.643522
320,0.361,0.499583,0.836374,0.686208
400,0.335,0.467603,0.847706,0.722
480,0.2538,0.538493,0.852405,0.739853
560,0.1633,0.535641,0.858485,0.75222
640,0.1704,0.523262,0.860144,0.756778
720,0.1343,0.586605,0.862078,0.754908
800,0.1038,0.566985,0.868712,0.774088


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7932,0.629014,0.748203,0.560024
160,0.6054,0.546622,0.788004,0.613108
240,0.5157,0.540346,0.818132,0.643522
320,0.361,0.499583,0.836374,0.686208
400,0.335,0.467603,0.847706,0.722
480,0.2538,0.538493,0.852405,0.739853
560,0.1633,0.535641,0.858485,0.75222
640,0.1704,0.523262,0.860144,0.756778
720,0.1343,0.586605,0.862078,0.754908
800,0.1038,0.566985,0.868712,0.774088


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7932,0.629014,0.748203,0.560024
160,0.6054,0.546622,0.788004,0.613108
240,0.5157,0.540346,0.818132,0.643522
320,0.361,0.499583,0.836374,0.686208
400,0.335,0.467603,0.847706,0.722
480,0.2538,0.538493,0.852405,0.739853
560,0.1633,0.535641,0.858485,0.75222
640,0.1704,0.523262,0.860144,0.756778
720,0.1343,0.586605,0.862078,0.754908
800,0.1038,0.566985,0.868712,0.774088


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:0


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7523,0.617449,0.766722,0.545938
160,0.6098,0.5083,0.810116,0.611556
240,0.5022,0.497192,0.836374,0.676153
320,0.3402,0.458867,0.84052,0.730011
400,0.3202,0.443054,0.858485,0.755401
480,0.2404,0.586779,0.840243,0.742603
560,0.1649,0.547708,0.857933,0.77415
640,0.1593,0.560919,0.86346,0.77757
720,0.1137,0.63435,0.863184,0.786736
800,0.1043,0.596851,0.87424,0.795472


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:1


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7523,0.617449,0.766722,0.545938
160,0.6098,0.5083,0.810116,0.611556
240,0.5022,0.497192,0.836374,0.676153
320,0.3402,0.458867,0.84052,0.730011
400,0.3202,0.443054,0.858485,0.755401
480,0.2404,0.586779,0.840243,0.742603
560,0.1649,0.547708,0.857933,0.77415
640,0.1593,0.560919,0.86346,0.77757
720,0.1137,0.63435,0.863184,0.786736
800,0.1043,0.596851,0.87424,0.795472


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


SI2M-Lab/DarijaBERT, try:2


The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7523,0.617449,0.766722,0.545938
160,0.6098,0.5083,0.810116,0.611556
240,0.5022,0.497192,0.836374,0.676153
320,0.3402,0.458867,0.84052,0.730011
400,0.3202,0.443054,0.858485,0.755401
480,0.2404,0.586779,0.840243,0.742603
560,0.1649,0.547708,0.857933,0.77415
640,0.1593,0.560919,0.86346,0.77757
720,0.1137,0.63435,0.863184,0.786736
800,0.1043,0.596851,0.87424,0.795472


DatasetDict({
    train: Dataset({
        features: ['tweets', 'label'],
        num_rows: 14469
    })
    test: Dataset({
        features: ['tweets', 'label'],
        num_rows: 3618
    })
})

otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.8198,0.746652,0.699834,0.489843
160,0.6545,0.573233,0.7822,0.562663
240,0.5552,0.575076,0.802377,0.613517
320,0.3738,0.563116,0.813709,0.670368
400,0.3505,0.509953,0.821725,0.695722
480,0.2725,0.674763,0.820896,0.699631
560,0.1633,0.590788,0.834992,0.72448
640,0.1714,0.59605,0.835821,0.735458
720,0.1263,0.699458,0.841625,0.73507
800,0.0919,0.708202,0.840243,0.746524


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.8198,0.746652,0.699834,0.489843
160,0.6545,0.573233,0.7822,0.562663
240,0.5552,0.575076,0.802377,0.613517
320,0.3738,0.563116,0.813709,0.670368
400,0.3505,0.509953,0.821725,0.695722
480,0.2725,0.674763,0.820896,0.699631
560,0.1633,0.590788,0.834992,0.72448
640,0.1714,0.59605,0.835821,0.735458
720,0.1263,0.699458,0.841625,0.73507
800,0.0919,0.708202,0.840243,0.746524


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.8198,0.746652,0.699834,0.489843
160,0.6545,0.573233,0.7822,0.562663
240,0.5552,0.575076,0.802377,0.613517
320,0.3738,0.563116,0.813709,0.670368
400,0.3505,0.509953,0.821725,0.695722
480,0.2725,0.674763,0.820896,0.699631
560,0.1633,0.590788,0.834992,0.72448
640,0.1714,0.59605,0.835821,0.735458
720,0.1263,0.699458,0.841625,0.73507
800,0.0919,0.708202,0.840243,0.746524


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.8362,0.657731,0.743228,0.528768
160,0.647,0.582813,0.762576,0.558009
240,0.5559,0.575741,0.801548,0.634628
320,0.374,0.537475,0.818408,0.683191
400,0.3559,0.531958,0.825594,0.696716
480,0.257,0.612635,0.840796,0.730897
560,0.1702,0.603896,0.834163,0.726331
640,0.1593,0.638092,0.840243,0.723232
720,0.1311,0.693474,0.846324,0.731046
800,0.0916,0.723149,0.845771,0.7473


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.8362,0.657731,0.743228,0.528768
160,0.647,0.582813,0.762576,0.558009
240,0.5559,0.575741,0.801548,0.634628
320,0.374,0.537475,0.818408,0.683191
400,0.3559,0.531958,0.825594,0.696716
480,0.257,0.612635,0.840796,0.730897
560,0.1702,0.603896,0.834163,0.726331
640,0.1593,0.638092,0.840243,0.723232
720,0.1311,0.693474,0.846324,0.731046
800,0.0916,0.723149,0.845771,0.7473


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.8362,0.657731,0.743228,0.528768
160,0.647,0.582813,0.762576,0.558009
240,0.5559,0.575741,0.801548,0.634628
320,0.374,0.537475,0.818408,0.683191
400,0.3559,0.531958,0.825594,0.696716
480,0.257,0.612635,0.840796,0.730897
560,0.1702,0.603896,0.834163,0.726331
640,0.1593,0.638092,0.840243,0.723232
720,0.1311,0.693474,0.846324,0.731046
800,0.0916,0.723149,0.845771,0.7473


otmangi/MorRoBERTa, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7968,0.669563,0.743505,0.525139
160,0.6749,0.573422,0.778331,0.560691
240,0.5504,0.587434,0.803206,0.622295
320,0.3636,0.544628,0.810392,0.657408
400,0.3713,0.508276,0.821448,0.694255
480,0.2836,0.568622,0.827253,0.725262
560,0.1642,0.569177,0.839138,0.740612
640,0.1772,0.565436,0.834992,0.735704
720,0.1213,0.671431,0.842731,0.756899
800,0.0948,0.673081,0.848535,0.759423


otmangi/MorRoBERTa, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7968,0.669563,0.743505,0.525139
160,0.6749,0.573422,0.778331,0.560691
240,0.5504,0.587434,0.803206,0.622295
320,0.3636,0.544628,0.810392,0.657408
400,0.3713,0.508276,0.821448,0.694255
480,0.2836,0.568622,0.827253,0.725262
560,0.1642,0.569177,0.839138,0.740612
640,0.1772,0.565436,0.834992,0.735704
720,0.1213,0.671431,0.842731,0.756899
800,0.0948,0.673081,0.848535,0.759423


otmangi/MorRoBERTa, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at otmangi/MorRoBERTa and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7968,0.669563,0.743505,0.525139
160,0.6749,0.573422,0.778331,0.560691
240,0.5504,0.587434,0.803206,0.622295
320,0.3636,0.544628,0.810392,0.657408
400,0.3713,0.508276,0.821448,0.694255
480,0.2836,0.568622,0.827253,0.725262
560,0.1642,0.569177,0.839138,0.740612
640,0.1772,0.565436,0.834992,0.735704
720,0.1213,0.671431,0.842731,0.756899
800,0.0948,0.673081,0.848535,0.759423


DatasetDict({
    train: Dataset({
        features: ['tweets', 'label'],
        num_rows: 14469
    })
    test: Dataset({
        features: ['tweets', 'label'],
        num_rows: 3618
    })
})

otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7448,0.650289,0.754008,0.539908
160,0.6117,0.551362,0.786622,0.591084
240,0.5278,0.545632,0.809287,0.653217
320,0.331,0.551414,0.808458,0.662893
400,0.3063,0.504788,0.83361,0.713393
480,0.2408,0.656534,0.8267,0.709231
560,0.1465,0.681012,0.828911,0.713838
640,0.1505,0.611834,0.847153,0.746882
720,0.1214,0.674931,0.84743,0.754907
800,0.0943,0.70892,0.849917,0.757129


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7448,0.650289,0.754008,0.539908
160,0.6117,0.551362,0.786622,0.591084
240,0.5278,0.545632,0.809287,0.653217
320,0.331,0.551414,0.808458,0.662893
400,0.3063,0.504788,0.83361,0.713393
480,0.2408,0.656534,0.8267,0.709231
560,0.1465,0.681012,0.828911,0.713838
640,0.1505,0.611834,0.847153,0.746882
720,0.1214,0.674931,0.84743,0.754907
800,0.0943,0.70892,0.849917,0.757129


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7448,0.650289,0.754008,0.539908
160,0.6117,0.551362,0.786622,0.591084
240,0.5278,0.545632,0.809287,0.653217
320,0.331,0.551414,0.808458,0.662893
400,0.3063,0.504788,0.83361,0.713393
480,0.2408,0.656534,0.8267,0.709231
560,0.1465,0.681012,0.828911,0.713838
640,0.1505,0.611834,0.847153,0.746882
720,0.1214,0.674931,0.84743,0.754907
800,0.0943,0.70892,0.849917,0.757129


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7375,0.635242,0.757048,0.543654
160,0.6093,0.571682,0.788281,0.643635
240,0.534,0.550547,0.811222,0.663756
320,0.3145,0.560315,0.818684,0.699381
400,0.3193,0.520224,0.833333,0.70768
480,0.2356,0.630333,0.833333,0.729101
560,0.1497,0.597412,0.836926,0.727135
640,0.1532,0.600733,0.843836,0.74342
720,0.1151,0.73435,0.835268,0.724047
800,0.085,0.689851,0.841072,0.7418


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7375,0.635242,0.757048,0.543654
160,0.6093,0.571682,0.788281,0.643635
240,0.534,0.550547,0.811222,0.663756
320,0.3145,0.560315,0.818684,0.699381
400,0.3193,0.520224,0.833333,0.70768
480,0.2356,0.630333,0.833333,0.729101
560,0.1497,0.597412,0.836926,0.727135
640,0.1532,0.600733,0.843836,0.74342
720,0.1151,0.73435,0.835268,0.724047
800,0.085,0.689851,0.841072,0.7418


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7375,0.635242,0.757048,0.543654
160,0.6093,0.571682,0.788281,0.643635
240,0.534,0.550547,0.811222,0.663756
320,0.3145,0.560315,0.818684,0.699381
400,0.3193,0.520224,0.833333,0.70768
480,0.2356,0.630333,0.833333,0.729101
560,0.1497,0.597412,0.836926,0.727135
640,0.1532,0.600733,0.843836,0.74342
720,0.1151,0.73435,0.835268,0.724047
800,0.085,0.689851,0.841072,0.7418


otmangi/MorrBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7453,0.64298,0.751797,0.530414
160,0.6323,0.55969,0.785793,0.618274
240,0.5245,0.517305,0.810945,0.673343
320,0.3202,0.531577,0.81675,0.704208
400,0.3141,0.490225,0.830846,0.734873
480,0.25,0.577135,0.83665,0.757835
560,0.1364,0.601515,0.846324,0.766156
640,0.1497,0.581894,0.839414,0.756904
720,0.1095,0.691071,0.842731,0.762634
800,0.0821,0.705849,0.840796,0.757788


otmangi/MorrBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7453,0.64298,0.751797,0.530414
160,0.6323,0.55969,0.785793,0.618274
240,0.5245,0.517305,0.810945,0.673343
320,0.3202,0.531577,0.81675,0.704208
400,0.3141,0.490225,0.830846,0.734873
480,0.25,0.577135,0.83665,0.757835
560,0.1364,0.601515,0.846324,0.766156
640,0.1497,0.581894,0.839414,0.756904
720,0.1095,0.691071,0.842731,0.762634
800,0.0821,0.705849,0.840796,0.757788


otmangi/MorrBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at otmangi/MorrBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/14469 [00:00<?, ? examples/s]

Map:   0%|          | 0/3618 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
80,0.7453,0.64298,0.751797,0.530414
160,0.6323,0.55969,0.785793,0.618274
240,0.5245,0.517305,0.810945,0.673343
320,0.3202,0.531577,0.81675,0.704208
400,0.3141,0.490225,0.830846,0.734873
480,0.25,0.577135,0.83665,0.757835
560,0.1364,0.601515,0.846324,0.766156
640,0.1497,0.581894,0.839414,0.756904
720,0.1095,0.691071,0.842731,0.762634
800,0.0821,0.705849,0.840796,0.757788


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT,0.877557,0.816701
3,alger-ia/dziribert,0.877557,0.807656
6,faisalq/EgyBERT,0.893588,0.82688
9,faisalq/SaudiBERT,0.908513,0.847212
12,otmangi/MorRoBERTa,0.855721,0.785324
15,otmangi/MorrBERT,0.855169,0.785818
18,tunis-ai/TunBERT,0.602543,0.439308
