In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'AraCust_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('benchmarks/AraCust.csv', encoding='utf-8', engine='python', sep='\t') #, quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)


display(df[:4])



df = df[df[' Tweet'] != '']

classes = set(df[' Label'].values)
display(classes)

df[' Label'] = df[' Label'].astype('category')
df['label'] = df[' Label'].cat.codes



df = df[[' Tweet', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128


models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib',
        'reemalyami/AraRoBERTa-SA',    
]

for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples[' Tweet'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 5
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 50, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 50
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('AraCust_results_1.csv')
display(best_results)



2024-03-27 00:51:31.281070: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-27 00:51:31.400183: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index([' Tweet', ' Label', 'Company'], dtype='object')

Unnamed: 0,Tweet,Label,Company
0,@STC1100 الله لاينهكم شركة STC ركبو لنا الياف في حي الملك عبدالله مقابل حي المستقبل وشكرا 🌚,Positive,STC
1,@alrakoo @mmshibani @GOclub @STC_KSA @ZainKSA @STC اشكرك 😊,Positive,STC
2,اعلان توعوي مميز من شركة STC,Positive,STC
3,الشيء الوحيد الذي فادتني فيه STC نسيت بطاقتي الجامعية ودخلت للجامعة بكرت شحن شكرا STC,Positive,STC


{'Negative', 'Positive'}

2

20000

DatasetDict({
    train: Dataset({
        features: [' Tweet', 'label'],
        num_rows: 16000
    })
    test: Dataset({
        features: [' Tweet', 'label'],
        num_rows: 4000
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2817,0.268979,0.91375,0.892542
100,0.1903,0.138261,0.9615,0.955258
150,0.1495,0.115926,0.966,0.960471
200,0.113,0.107349,0.97175,0.967106
250,0.1171,0.102404,0.976,0.971999
300,0.08,0.15207,0.9615,0.955731
350,0.0963,0.091462,0.97625,0.97226
400,0.0732,0.095621,0.97725,0.97344
450,0.0784,0.083388,0.978,0.974287
500,0.0876,0.084676,0.977,0.973154


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2847,0.174412,0.949,0.940986
100,0.1877,0.133469,0.96025,0.953876
150,0.1499,0.129588,0.965,0.959466
200,0.1175,0.147921,0.96375,0.957973
250,0.1211,0.094179,0.974,0.969517
300,0.0807,0.155618,0.96125,0.955453
350,0.0908,0.09196,0.9765,0.972522
400,0.0831,0.101482,0.97325,0.96888
450,0.0778,0.092264,0.97625,0.972297
500,0.0874,0.08887,0.97625,0.972297


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2847,0.174412,0.949,0.940986
100,0.1877,0.133469,0.96025,0.953876
150,0.1499,0.129588,0.965,0.959466
200,0.1175,0.147921,0.96375,0.957973
250,0.1211,0.094179,0.974,0.969517
300,0.0807,0.155618,0.96125,0.955453
350,0.0908,0.09196,0.9765,0.972522
400,0.0831,0.101482,0.97325,0.96888
450,0.0778,0.092264,0.97625,0.972297
500,0.0874,0.08887,0.97625,0.972297


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3677,0.309515,0.892,0.86245
100,0.2188,0.169075,0.9465,0.93631
150,0.1917,0.154841,0.95525,0.948207
200,0.1441,0.113733,0.96675,0.961216
250,0.121,0.095925,0.97275,0.968087
300,0.0848,0.10295,0.973,0.968554
350,0.0942,0.103669,0.97325,0.968811
400,0.0807,0.100413,0.9775,0.973703
450,0.0773,0.090088,0.9765,0.972522
500,0.0914,0.088686,0.9765,0.972546


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3677,0.309515,0.892,0.86245
100,0.2188,0.169075,0.9465,0.93631
150,0.1917,0.154841,0.95525,0.948207
200,0.1441,0.113733,0.96675,0.961216
250,0.121,0.095925,0.97275,0.968087
300,0.0848,0.10295,0.973,0.968554
350,0.0942,0.103669,0.97325,0.968811
400,0.0807,0.100413,0.9775,0.973703
450,0.0773,0.090088,0.9765,0.972522
500,0.0914,0.088686,0.9765,0.972546


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3677,0.309515,0.892,0.86245
100,0.2188,0.169075,0.9465,0.93631
150,0.1917,0.154841,0.95525,0.948207
200,0.1441,0.113733,0.96675,0.961216
250,0.121,0.095925,0.97275,0.968087
300,0.0848,0.10295,0.973,0.968554
350,0.0942,0.103669,0.97325,0.968811
400,0.0807,0.100413,0.9775,0.973703
450,0.0773,0.090088,0.9765,0.972522
500,0.0914,0.088686,0.9765,0.972546


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2605,0.219763,0.93,0.914789
100,0.1871,0.164,0.94325,0.931391
150,0.1623,0.133809,0.962,0.956048
200,0.1188,0.122795,0.971,0.966166
250,0.122,0.100022,0.97475,0.970403
300,0.0853,0.152877,0.96325,0.957646
350,0.093,0.10775,0.975,0.970858
400,0.0941,0.087794,0.9755,0.971391
450,0.07,0.108928,0.97375,0.969394
500,0.0873,0.095501,0.975,0.970807


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2605,0.219763,0.93,0.914789
100,0.1871,0.164,0.94325,0.931391
150,0.1623,0.133809,0.962,0.956048
200,0.1188,0.122795,0.971,0.966166
250,0.122,0.100022,0.97475,0.970403
300,0.0853,0.152877,0.96325,0.957646
350,0.093,0.10775,0.975,0.970858
400,0.0941,0.087794,0.9755,0.971391
450,0.07,0.108928,0.97375,0.969394
500,0.0873,0.095501,0.975,0.970807


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2605,0.219763,0.93,0.914789
100,0.1871,0.164,0.94325,0.931391
150,0.1623,0.133809,0.962,0.956048
200,0.1188,0.122795,0.971,0.966166
250,0.122,0.100022,0.97475,0.970403
300,0.0853,0.152877,0.96325,0.957646
350,0.093,0.10775,0.975,0.970858
400,0.0941,0.087794,0.9755,0.971391
450,0.07,0.108928,0.97375,0.969394
500,0.0873,0.095501,0.975,0.970807


reemalyami/AraRoBERTa-SA, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.401,0.285452,0.9095,0.889835
100,0.2628,0.259479,0.9095,0.897905
150,0.2272,0.179215,0.948,0.939517
200,0.1629,0.144024,0.9565,0.94841
250,0.1475,0.135933,0.96325,0.956628
300,0.1056,0.120979,0.95925,0.952633
350,0.1102,0.128477,0.9485,0.941542
400,0.0893,0.126206,0.9675,0.962082
450,0.087,0.107951,0.9705,0.965628
500,0.0969,0.102487,0.9715,0.966734


reemalyami/AraRoBERTa-SA, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.401,0.285452,0.9095,0.889835
100,0.2628,0.259479,0.9095,0.897905
150,0.2272,0.179215,0.948,0.939517
200,0.1629,0.144024,0.9565,0.94841
250,0.1475,0.135933,0.96325,0.956628
300,0.1056,0.120979,0.95925,0.952633
350,0.1102,0.128477,0.9485,0.941542
400,0.0893,0.126206,0.9675,0.962082
450,0.087,0.107951,0.9705,0.965628
500,0.0969,0.102487,0.9715,0.966734


reemalyami/AraRoBERTa-SA, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.401,0.285452,0.9095,0.889835
100,0.2628,0.259479,0.9095,0.897905
150,0.2272,0.179215,0.948,0.939517
200,0.1629,0.144024,0.9565,0.94841
250,0.1475,0.135933,0.96325,0.956628
300,0.1056,0.120979,0.95925,0.952633
350,0.1102,0.128477,0.9485,0.941542
400,0.0893,0.126206,0.9675,0.962082
450,0.087,0.107951,0.9705,0.965628
500,0.0969,0.102487,0.9715,0.966734


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.979,0.97539
3,aubmindlab/bert-base-arabertv02-twitter,0.97875,0.975158
4,qarib/bert-base-qarib,0.97825,0.974585
7,reemalyami/AraRoBERTa-SA,0.97625,0.972272
