In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


fname = 'MSAC_1'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')



dataset = load_dataset('AbderrahmanSkiredj1/MSAC_darija_sentiment_analysis')
df = pd.DataFrame(dataset['train']) 

      
display(df.columns)
display(len(df))
display(df[:4])

df['label'] = df['label'].replace(',ne', 'neg')

classes = set(df['label'].values)
display(classes)

c = df['label'].value_counts()
display(c)

df['label'] = df['label'].astype('category')
df['label'] = df['label'].cat.codes

df = df[['text', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))


# ds = Dataset.from_pandas(df)
# ds = ds.train_test_split(test_size=0.2)

# display(ds)

max_sequence_length = 128


models = [ 
  'SI2M-Lab/DarijaBERT-mix',
    'SI2M-Lab/DarijaBERT-arabizi'
]


seeds = [0, 1, 42]

for model_name in models:
    for seed in seeds:
        ds = Dataset.from_pandas(df)
        ds = ds.train_test_split(test_size=0.2, seed = seed)
        if seed==0:
            display(ds)
    
        for i in range(3):
            print(f'{model_name}, try:{i}')
                  
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                                  num_labels=classes_num).to('cuda')                                                 
            dataset_train = ds['train']
            dataset_validation = ds['test']                                                    
            
          
    
            def preprocess_function(examples):
                return tokenizer(examples['text'], truncation=True, padding="max_length",
                                max_length=max_sequence_length)
            
            
            dataset_train = dataset_train.map(preprocess_function, batched=True) # , batched=True
            dataset_validation = dataset_validation.map(preprocess_function, batched=True)  # , batched=True
            
           
            
            def compute_metrics(eval_pred):
                logits, labels = eval_pred
                predictions = np.argmax(logits, axis=-1)    
                acc = accuracy_score(labels, predictions)        
                f1 = f1_score(labels, predictions, average='macro')   
                with open(log_file, 'a') as f:
                    f.write(f'{model_name},{acc},{f1}\n')
                return {'accuracy': acc, 'f1_score': f1}
    
    
            
            
            epochs = 25
            save_steps = 10000 #save checkpoint every 10000 steps
            batch_size = 64
            
            training_args = TrainingArguments(
                output_dir = 'bert/',
                overwrite_output_dir=True,
                num_train_epochs = epochs,
                per_device_train_batch_size = batch_size,
                per_device_eval_batch_size = batch_size,
                save_steps = save_steps,
                save_total_limit = 1, #only save the last 5 checkpoints
                fp16=True,
                learning_rate = 5e-5,  # 5e-5 is the default
                logging_steps = 20, #50_000
                evaluation_strategy = 'steps',
                # evaluate_during_training = True,
                eval_steps = 20
                
            )
            
            trainer = Trainer(
                model = model,
                args = training_args,
                # data_collator=data_collator,
                train_dataset=dataset_train,
                eval_dataset=dataset_validation,
                compute_metrics = compute_metrics
            )
            
            
            trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-17 06:28:37.287361: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-17 06:28:37.311550: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Found cached dataset parquet (/home/ffq/.cache/huggingface/datasets/AbderrahmanSkiredj1___parquet/AbderrahmanSkiredj1--MSAC_darija_sentiment_analysis-78ec287cfc3da3ad/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/1 [00:00<?, ?it/s]

Index(['text', 'label'], dtype='object')

2000

Unnamed: 0,text,label
0,'طوال حياتي لم المس اي تغير حتى قدمت هذه الحكومة فل نقف بجانها بصوتنا',pos
1,'منتوج رائع وثمن مناسب ....جميل',pos
2,'كلنا ابن كيران لمتافق معايا يدير جيم',pos
3,'وفقك الله لولاية اخرى حقاش مكينش محسن منك',pos


{'neg', 'pos'}

label
pos    1000
neg    1000
Name: count, dtype: int64

2

2000

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

SI2M-Lab/DarijaBERT-mix, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4642,0.35844,0.8625,0.860737
40,0.2644,0.31883,0.865,0.86413
60,0.1943,0.381808,0.86,0.858439
80,0.1621,0.397299,0.8875,0.885925
100,0.1029,0.410697,0.89,0.888998
120,0.0562,0.627622,0.8725,0.872077
140,0.0594,0.556562,0.8825,0.880558
160,0.0274,0.576854,0.885,0.884712
180,0.0282,0.729952,0.89,0.889724
200,0.0253,0.646467,0.8875,0.88642


SI2M-Lab/DarijaBERT-mix, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.524,0.439865,0.81,0.81
40,0.2904,0.328155,0.8725,0.872077
60,0.2168,0.315323,0.8825,0.881593
80,0.126,0.360954,0.89,0.888254
100,0.095,0.381678,0.885,0.884512
120,0.037,0.584956,0.8925,0.890579
140,0.0613,0.588783,0.8975,0.896958
160,0.0249,0.608712,0.8925,0.89185
180,0.023,0.58452,0.89,0.889199
200,0.019,0.648913,0.8875,0.886906


SI2M-Lab/DarijaBERT-mix, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.524,0.439865,0.81,0.81
40,0.2904,0.328155,0.8725,0.872077
60,0.2168,0.315323,0.8825,0.881593
80,0.126,0.360954,0.89,0.888254
100,0.095,0.381678,0.885,0.884512
120,0.037,0.584956,0.8925,0.890579
140,0.0613,0.588783,0.8975,0.896958
160,0.0249,0.608712,0.8925,0.89185
180,0.023,0.58452,0.89,0.889199
200,0.019,0.648913,0.8875,0.886906


SI2M-Lab/DarijaBERT-mix, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5583,0.357327,0.86,0.859575
40,0.3104,0.39505,0.8325,0.827957
60,0.3164,0.680671,0.7575,0.737268
80,0.383,0.483167,0.7725,0.768435
100,0.3771,0.461488,0.7925,0.780642
120,0.282,0.358266,0.8725,0.870393
140,0.149,0.349598,0.885,0.883718
160,0.1039,0.386093,0.8875,0.885332
180,0.0721,0.423597,0.89,0.889533
200,0.0437,0.480553,0.9,0.89899


SI2M-Lab/DarijaBERT-mix, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5583,0.357327,0.86,0.859575
40,0.3104,0.39505,0.8325,0.827957
60,0.3164,0.680671,0.7575,0.737268
80,0.383,0.483167,0.7725,0.768435
100,0.3771,0.461488,0.7925,0.780642
120,0.282,0.358266,0.8725,0.870393
140,0.149,0.349598,0.885,0.883718
160,0.1039,0.386093,0.8875,0.885332
180,0.0721,0.423597,0.89,0.889533
200,0.0437,0.480553,0.9,0.89899


SI2M-Lab/DarijaBERT-mix, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5583,0.357327,0.86,0.859575
40,0.3104,0.39505,0.8325,0.827957
60,0.3164,0.680671,0.7575,0.737268
80,0.383,0.483167,0.7725,0.768435
100,0.3771,0.461488,0.7925,0.780642
120,0.282,0.358266,0.8725,0.870393
140,0.149,0.349598,0.885,0.883718
160,0.1039,0.386093,0.8875,0.885332
180,0.0721,0.423597,0.89,0.889533
200,0.0437,0.480553,0.9,0.89899


SI2M-Lab/DarijaBERT-mix, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5414,0.512851,0.715,0.688831
40,0.5268,0.354977,0.85,0.84799
60,0.3797,0.612728,0.6725,0.666648
80,0.6958,0.691594,0.5475,0.406436
100,0.5559,0.382303,0.82,0.8125
120,0.303,0.231362,0.91,0.909675
140,0.187,0.24943,0.905,0.904532
160,0.1339,0.265798,0.9125,0.912377
180,0.0947,0.261964,0.915,0.914581
200,0.0813,0.306187,0.9025,0.901564


SI2M-Lab/DarijaBERT-mix, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5414,0.512851,0.715,0.688831
40,0.5268,0.354977,0.85,0.84799
60,0.3797,0.612728,0.6725,0.666648
80,0.6958,0.691594,0.5475,0.406436
100,0.5559,0.382303,0.82,0.8125
120,0.303,0.231362,0.91,0.909675
140,0.187,0.24943,0.905,0.904532
160,0.1339,0.265798,0.9125,0.912377
180,0.0947,0.261964,0.915,0.914581
200,0.0813,0.306187,0.9025,0.901564


SI2M-Lab/DarijaBERT-mix, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5414,0.512851,0.715,0.688831
40,0.5268,0.354977,0.85,0.84799
60,0.3797,0.612728,0.6725,0.666648
80,0.6958,0.691594,0.5475,0.406436
100,0.5559,0.382303,0.82,0.8125
120,0.303,0.231362,0.91,0.909675
140,0.187,0.24943,0.905,0.904532
160,0.1339,0.265798,0.9125,0.912377
180,0.0947,0.261964,0.915,0.914581
200,0.0813,0.306187,0.9025,0.901564


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1600
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 400
    })
})

SI2M-Lab/DarijaBERT-arabizi, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4823,0.44508,0.835,0.832589
40,0.2706,0.450597,0.84,0.838384
60,0.1825,0.526775,0.835,0.833937
80,0.1065,0.610902,0.845,0.843872
100,0.053,0.825387,0.8375,0.835775
120,0.0441,0.876563,0.8325,0.831208
140,0.0619,1.015545,0.815,0.814625
160,0.0189,1.028676,0.83,0.829386
180,0.0154,1.200542,0.8025,0.8024
200,0.0109,1.116642,0.8375,0.836756


SI2M-Lab/DarijaBERT-arabizi, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4823,0.44508,0.835,0.832589
40,0.2706,0.450597,0.84,0.838384
60,0.1825,0.526775,0.835,0.833937
80,0.1065,0.610902,0.845,0.843872
100,0.053,0.825387,0.8375,0.835775
120,0.0441,0.876563,0.8325,0.831208
140,0.0619,1.015545,0.815,0.814625
160,0.0189,1.028676,0.83,0.829386
180,0.0154,1.200542,0.8025,0.8024
200,0.0109,1.116642,0.8375,0.836756


SI2M-Lab/DarijaBERT-arabizi, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.4823,0.44508,0.835,0.832589
40,0.2706,0.450597,0.84,0.838384
60,0.1825,0.526775,0.835,0.833937
80,0.1065,0.610902,0.845,0.843872
100,0.053,0.825387,0.8375,0.835775
120,0.0441,0.876563,0.8325,0.831208
140,0.0619,1.015545,0.815,0.814625
160,0.0189,1.028676,0.83,0.829386
180,0.0154,1.200542,0.8025,0.8024
200,0.0109,1.116642,0.8375,0.836756


SI2M-Lab/DarijaBERT-arabizi, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5163,0.437476,0.815,0.81014
40,0.295,0.392336,0.855,0.85418
60,0.1972,0.472141,0.8475,0.844561
80,0.1233,0.55597,0.8575,0.857178
100,0.1326,1.298194,0.72,0.710077
120,0.0733,0.670987,0.835,0.834404
140,0.0202,0.848896,0.8425,0.841779
160,0.018,0.923735,0.8375,0.837377
180,0.0071,1.023986,0.845,0.84254
200,0.0053,1.112899,0.85,0.849696


SI2M-Lab/DarijaBERT-arabizi, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5163,0.437476,0.815,0.81014
40,0.295,0.392336,0.855,0.85418
60,0.1972,0.472141,0.8475,0.844561
80,0.1233,0.55597,0.8575,0.857178
100,0.1326,1.298194,0.72,0.710077
120,0.0733,0.670987,0.835,0.834404
140,0.0202,0.848896,0.8425,0.841779
160,0.018,0.923735,0.8375,0.837377
180,0.0071,1.023986,0.845,0.84254
200,0.0053,1.112899,0.85,0.849696


SI2M-Lab/DarijaBERT-arabizi, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5163,0.437476,0.815,0.81014
40,0.295,0.392336,0.855,0.85418
60,0.1972,0.472141,0.8475,0.844561
80,0.1233,0.55597,0.8575,0.857178
100,0.1326,1.298194,0.72,0.710077
120,0.0733,0.670987,0.835,0.834404
140,0.0202,0.848896,0.8425,0.841779
160,0.018,0.923735,0.8375,0.837377
180,0.0071,1.023986,0.845,0.84254
200,0.0053,1.112899,0.85,0.849696


SI2M-Lab/DarijaBERT-arabizi, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5349,0.351132,0.8475,0.846184
40,0.3002,0.352267,0.8825,0.882287
60,0.2009,0.334928,0.8875,0.886906
80,0.1014,0.496092,0.8625,0.862251
100,0.0902,0.522983,0.84,0.839936
120,0.0496,0.495845,0.8775,0.876199
140,0.0099,0.791515,0.8625,0.86212
160,0.0127,0.800136,0.8625,0.860043
180,0.0066,0.822722,0.87,0.869674
200,0.0088,0.873012,0.87,0.868408


SI2M-Lab/DarijaBERT-arabizi, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5349,0.351132,0.8475,0.846184
40,0.3002,0.352267,0.8825,0.882287
60,0.2009,0.334928,0.8875,0.886906
80,0.1014,0.496092,0.8625,0.862251
100,0.0902,0.522983,0.84,0.839936
120,0.0496,0.495845,0.8775,0.876199
140,0.0099,0.791515,0.8625,0.86212
160,0.0127,0.800136,0.8625,0.860043
180,0.0066,0.822722,0.87,0.869674
200,0.0088,0.873012,0.87,0.868408


SI2M-Lab/DarijaBERT-arabizi, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.5349,0.351132,0.8475,0.846184
40,0.3002,0.352267,0.8825,0.882287
60,0.2009,0.334928,0.8875,0.886906
80,0.1014,0.496092,0.8625,0.862251
100,0.0902,0.522983,0.84,0.839936
120,0.0496,0.495845,0.8775,0.876199
140,0.0099,0.791515,0.8625,0.86212
160,0.0127,0.800136,0.8625,0.860043
180,0.0066,0.822722,0.87,0.869674
200,0.0088,0.873012,0.87,0.868408


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT-arabizi,0.8925,0.892008
3,SI2M-Lab/DarijaBERT-mix,0.9275,0.927216
