In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SS2030_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('benchmarks2/SS2030/SS2030.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

display(df[:4])

df = df[df['text'] != '']

classes = set(df['Sentiment'].values)
display(classes)


df['label'] = df['Sentiment']

df = df[['text', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))
# display(len(df_test))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

# max_sequence_length = 128
max_sequence_length = 128

models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib',
        'reemalyami/AraRoBERTa-SA',    
]

for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 5
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 10, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 10
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SS2030_results_1.csv')
display(best_results)



2024-04-01 06:30:40.318072: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-01 06:30:40.341935: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['text', 'Sentiment'], dtype='object')

Unnamed: 0,text,Sentiment
0,حقوق المرأة 💚💚💚 https://t.co/Mzf90Ta5g1,1
1,RT @___IHAVENOIDEA: حقوق المرأة في الإسلام. https://t.co/ps3qNw1CbB,1
2,RT @saud_talep: Retweeted لجنة التنمية بشبرا (@Shubratanmyeh):\n \n ما زال التسجيل مستمر في دورة حقوق المرأة بعد الطلاق ✨ #وعيك_يحميك... https://t.co/c2NXzNCdLU,1
3,RT @MojKsa: حقوق المرأة التي تضمنها لها وزارة العدل https://t.co/QUGzWwubFk,1


{0, 1}

2

4252

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 3401
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 851
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.4966,0.390134,0.809636,0.809623
20,0.3102,0.315659,0.854289,0.85412
30,0.2497,0.22065,0.910693,0.909148
40,0.2277,0.215986,0.921269,0.918745
50,0.2199,0.232633,0.913043,0.91006
60,0.1242,0.155818,0.942421,0.941292
70,0.0843,0.189583,0.936545,0.935365
80,0.1224,0.173687,0.938895,0.937458
90,0.0964,0.167946,0.940071,0.938683
100,0.0679,0.187269,0.93537,0.933777


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.5208,0.373713,0.839013,0.838838
20,0.2974,0.292644,0.881316,0.880836
30,0.2421,0.233529,0.910693,0.909313
40,0.2492,0.263727,0.902468,0.898414
50,0.222,0.192574,0.931845,0.930292
60,0.1279,0.159957,0.93772,0.936739
70,0.0834,0.200768,0.931845,0.930138
80,0.1122,0.173554,0.938895,0.937718
90,0.1087,0.160826,0.942421,0.941292
100,0.076,0.185811,0.93772,0.936038


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.5208,0.373713,0.839013,0.838838
20,0.2974,0.292644,0.881316,0.880836
30,0.2421,0.233529,0.910693,0.909313
40,0.2492,0.263727,0.902468,0.898414
50,0.222,0.192574,0.931845,0.930292
60,0.1279,0.159957,0.93772,0.936739
70,0.0834,0.200768,0.931845,0.930138
80,0.1122,0.173554,0.938895,0.937718
90,0.1087,0.160826,0.942421,0.941292
100,0.076,0.185811,0.93772,0.936038


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6396,0.519764,0.733255,0.693047
20,0.4226,0.44294,0.802585,0.802487
30,0.3816,0.335722,0.850764,0.84884
40,0.3321,0.293777,0.867215,0.86528
50,0.2939,0.295434,0.862515,0.859332
60,0.2012,0.355712,0.86134,0.855505
70,0.2203,0.333219,0.86369,0.856136
80,0.1664,0.248706,0.893067,0.890592
90,0.177,0.276222,0.881316,0.879913
100,0.1433,0.286438,0.891892,0.890952


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6396,0.519764,0.733255,0.693047
20,0.4226,0.44294,0.802585,0.802487
30,0.3816,0.335722,0.850764,0.84884
40,0.3321,0.293777,0.867215,0.86528
50,0.2939,0.295434,0.862515,0.859332
60,0.2012,0.355712,0.86134,0.855505
70,0.2203,0.333219,0.86369,0.856136
80,0.1664,0.248706,0.893067,0.890592
90,0.177,0.276222,0.881316,0.879913
100,0.1433,0.286438,0.891892,0.890952


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6396,0.519764,0.733255,0.693047
20,0.4226,0.44294,0.802585,0.802487
30,0.3816,0.335722,0.850764,0.84884
40,0.3321,0.293777,0.867215,0.86528
50,0.2939,0.295434,0.862515,0.859332
60,0.2012,0.355712,0.86134,0.855505
70,0.2203,0.333219,0.86369,0.856136
80,0.1664,0.248706,0.893067,0.890592
90,0.177,0.276222,0.881316,0.879913
100,0.1433,0.286438,0.891892,0.890952


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.582,0.411191,0.804935,0.802035
20,0.3223,0.29305,0.869565,0.868517
30,0.3,0.243121,0.891892,0.888473
40,0.2659,0.203698,0.916569,0.914638
50,0.243,0.207994,0.903643,0.90051
60,0.1222,0.239292,0.916569,0.914448
70,0.0992,0.207902,0.922444,0.921151
80,0.1254,0.175805,0.924794,0.923586
90,0.093,0.176708,0.931845,0.93091
100,0.0715,0.19453,0.93067,0.929659


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.582,0.411191,0.804935,0.802035
20,0.3223,0.29305,0.869565,0.868517
30,0.3,0.243121,0.891892,0.888473
40,0.2659,0.203698,0.916569,0.914638
50,0.243,0.207994,0.903643,0.90051
60,0.1222,0.239292,0.916569,0.914448
70,0.0992,0.207902,0.922444,0.921151
80,0.1254,0.175805,0.924794,0.923586
90,0.093,0.176708,0.931845,0.93091
100,0.0715,0.19453,0.93067,0.929659


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.582,0.411191,0.804935,0.802035
20,0.3223,0.29305,0.869565,0.868517
30,0.3,0.243121,0.891892,0.888473
40,0.2659,0.203698,0.916569,0.914638
50,0.243,0.207994,0.903643,0.90051
60,0.1222,0.239292,0.916569,0.914448
70,0.0992,0.207902,0.922444,0.921151
80,0.1254,0.175805,0.924794,0.923586
90,0.093,0.176708,0.931845,0.93091
100,0.0715,0.19453,0.93067,0.929659


reemalyami/AraRoBERTa-SA, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.687,0.528167,0.747356,0.745216
20,0.4727,0.501752,0.768508,0.768462
30,0.4497,0.365799,0.811986,0.806523
40,0.3883,0.381835,0.821387,0.820582
50,0.3605,0.339965,0.843713,0.841699
60,0.2682,0.356351,0.846063,0.845132
70,0.2257,0.38552,0.855464,0.853679
80,0.2375,0.350287,0.854289,0.850527
90,0.2616,0.33974,0.841363,0.840722
100,0.2311,0.316132,0.86134,0.858851


reemalyami/AraRoBERTa-SA, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.687,0.528167,0.747356,0.745216
20,0.4727,0.501752,0.768508,0.768462
30,0.4497,0.365799,0.811986,0.806523
40,0.3883,0.381835,0.821387,0.820582
50,0.3605,0.339965,0.843713,0.841699
60,0.2682,0.356351,0.846063,0.845132
70,0.2257,0.38552,0.855464,0.853679
80,0.2375,0.350287,0.854289,0.850527
90,0.2616,0.33974,0.841363,0.840722
100,0.2311,0.316132,0.86134,0.858851


reemalyami/AraRoBERTa-SA, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3401 [00:00<?, ? examples/s]

Map:   0%|          | 0/851 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.687,0.528167,0.747356,0.745216
20,0.4727,0.501752,0.768508,0.768462
30,0.4497,0.365799,0.811986,0.806523
40,0.3883,0.381835,0.821387,0.820582
50,0.3605,0.339965,0.843713,0.841699
60,0.2682,0.356351,0.846063,0.845132
70,0.2257,0.38552,0.855464,0.853679
80,0.2375,0.350287,0.854289,0.850527
90,0.2616,0.33974,0.841363,0.840722
100,0.2311,0.316132,0.86134,0.858851


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.920094,0.9185
3,aubmindlab/bert-base-arabertv02-twitter,0.950646,0.949728
5,qarib/bert-base-qarib,0.945946,0.945142
8,reemalyami/AraRoBERTa-SA,0.881316,0.880148


In [None]:
# offical results
Model 	Accuracy 	F1
0 	CAMeL-Lab/bert-base-arabic-camelbert-da 	0.920094 	0.918500
3 	aubmindlab/bert-base-arabertv02-twitter 	0.950646 	0.949728
5 	qarib/bert-base-qarib 	                    0.945946 	0.945142
8 	reemalyami/AraRoBERTa-SA 	                0.881316 	0.880148