In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SaudiBank_sentiment_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('benchmarks/data_Saudi_banks.csv', encoding='utf-8', engine='python', sep='\t') #, quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

display(df[:4])

df = df[df['Tweet'] != '']

classes = set(df['Final annotation'].values)
display(classes)

df['Final annotation'] = df['Final annotation'].astype('category')
df['label'] = df['Final annotation'].cat.codes


df = df[['Tweet', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))
# display(len(df_test))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

# max_sequence_length = 128
max_sequence_length = 128

models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib',
        'reemalyami/AraRoBERTa-SA',    
]

for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['Tweet'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 5
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 30, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 30
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SaudiBank_sentiment_results_1.csv')
display(best_results)



2024-03-27 02:37:05.485820: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-27 02:37:05.513789: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['Tweet', 'Bank', 'Tokens', 'Tokens without stop words',
       'Annotator 1 & 3', 'Annotator 2 & 4', 'Final annotation'],
      dtype='object')

Unnamed: 0,Tweet,Bank,Tokens,Tokens without stop words,Annotator 1 & 3,Annotator 2 & 4,Final annotation
0,الله يقلعك من بنك دمر مستقبلنا الله حسبنا الله ونعم الوكيل فيك من بنك,SAIB,"['الله', 'يقلعك', 'من', 'بنك', 'دمر', 'مستقبلنا', 'الله', 'حسبنا', 'الله', 'ونعم', 'الوكيل', 'فيك', 'من', 'بنك']","['الله', 'يقلعك', 'بنك', 'دمر', 'مستقبلنا', 'الله', 'حسبنا', 'الله', 'ونعم', 'الوكيل', 'فيك', 'بنك']",NEG,NEG,NEG
1,صار لي يومين بحاول اوصل لاي شخص من قبلكم يرد علي عجزت هل فعلا انتم بنك ولا شغل وهمي,SAIB,"['صار', 'لي', 'يومين', 'بحاول', 'اوصل', 'لاي', 'شخص', 'من', 'قبلكم', 'يرد', 'علي', 'عجزت', 'هل', 'فعلا', 'انتم', 'بنك', 'ولا', 'شغل', 'وهمي']","['صار', 'لي', 'يومين', 'بحاول', 'اوصل', 'لاي', 'شخص', 'قبلكم', 'يرد', 'علي', 'عجزت', 'هل', 'فعلا', 'انتم', 'بنك', 'شغل', 'وهمي']",NEG,NEG,NEG
2,صادق كثير من الزملاء يشتكون منه الصراحه وبصراحه صرفت نظر عنه نهاءيا,SAIB,"['صادق', 'كثير', 'من', 'الزملاء', 'يشتكون', 'منه', 'الصراحه', 'وبصراحه', 'صرفت', 'نظر', 'عنه', 'نهاءيا']","['صادق', 'كثير', 'الزملاء', 'يشتكون', 'منه', 'الصراحه', 'وبصراحه', 'صرفت', 'نظر', 'عنه', 'نهاءيا']",NEG,NEG,NEG
3,ماقدرنا ناخذ بطاقه مدي وحسابي ذهبي وموظفين خدمه العملاء مو فاهمين وادور الفروع ومافي فايده علشان اطبع وادق عليكم بدون فايده للاسف تغيرتو للاسواء مو الافضل في خلل كبير عندكم,SAIB,"['ماقدرنا', 'ناخذ', 'بطاقه', 'مدي', 'وحسابي', 'ذهبي', 'وموظفين', 'خدمه', 'العملاء', 'مو', 'فاهمين', 'وادور', 'الفروع', 'ومافي', 'فايده', 'علشان', 'اطبع', 'وادق', 'عليكم', 'بدون', 'فايده', 'للاسف', 'تغيرتو', 'للاسواء', 'مو', 'الافضل', 'في', 'خلل', 'كبير', 'عندكم']","['ماقدرنا', 'ناخذ', 'بطاقه', 'مدي', 'وحسابي', 'ذهبي', 'وموظفين', 'خدمه', 'العملاء', 'مو', 'فاهمين', 'وادور', 'الفروع', 'ومافي', 'فايده', 'علشان', 'اطبع', 'وادق', 'عليكم', 'بدون', 'فايده', 'للاسف', 'تغيرتو', 'للاسواء', 'مو', 'الافضل', 'خلل', 'كبير', 'عندكم']",NEG,NEG,NEG


{'NEG', 'NEU', 'POS'}

3

12048

DatasetDict({
    train: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 9638
    })
    test: Dataset({
        features: ['Tweet', 'label'],
        num_rows: 2410
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.5411,0.442381,0.850622,0.646837
60,0.4193,0.41078,0.850207,0.701898
90,0.3973,0.398898,0.851867,0.606523
120,0.3762,0.396408,0.850207,0.676965
150,0.374,0.41979,0.853527,0.62179
180,0.2913,0.41281,0.858921,0.67108
210,0.2533,0.430693,0.851037,0.667403
240,0.2809,0.425169,0.853942,0.651285
270,0.3036,0.412797,0.856017,0.688698
300,0.2901,0.420099,0.849793,0.68736


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.5241,0.443001,0.851037,0.645881
60,0.4003,0.405383,0.840664,0.713684
90,0.3942,0.391397,0.848963,0.614106
120,0.3748,0.396047,0.848963,0.695635
150,0.3731,0.416948,0.856846,0.627454
180,0.2882,0.413855,0.856846,0.664862
210,0.2484,0.430763,0.852697,0.67961
240,0.2796,0.441036,0.854772,0.67321
270,0.308,0.40865,0.854357,0.683306
300,0.2836,0.415358,0.843568,0.68904


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.5241,0.443001,0.851037,0.645881
60,0.4003,0.405383,0.840664,0.713684
90,0.3942,0.391397,0.848963,0.614106
120,0.3748,0.396047,0.848963,0.695635
150,0.3731,0.416948,0.856846,0.627454
180,0.2882,0.413855,0.856846,0.664862
210,0.2484,0.430763,0.852697,0.67961
240,0.2796,0.441036,0.854772,0.67321
270,0.308,0.40865,0.854357,0.683306
300,0.2836,0.415358,0.843568,0.68904


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.5875,0.456481,0.8361,0.566872
60,0.4515,0.473127,0.800415,0.689793
90,0.4389,0.430545,0.851037,0.606295
120,0.4025,0.420627,0.853527,0.62186
150,0.3968,0.443576,0.848133,0.591914
180,0.2998,0.442571,0.849793,0.648128
210,0.2536,0.451037,0.850622,0.66909
240,0.2846,0.447846,0.843568,0.650148
270,0.3045,0.426904,0.849793,0.662693
300,0.2679,0.43995,0.842324,0.689684


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.5875,0.456481,0.8361,0.566872
60,0.4515,0.473127,0.800415,0.689793
90,0.4389,0.430545,0.851037,0.606295
120,0.4025,0.420627,0.853527,0.62186
150,0.3968,0.443576,0.848133,0.591914
180,0.2998,0.442571,0.849793,0.648128
210,0.2536,0.451037,0.850622,0.66909
240,0.2846,0.447846,0.843568,0.650148
270,0.3045,0.426904,0.849793,0.662693
300,0.2679,0.43995,0.842324,0.689684


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.5875,0.456481,0.8361,0.566872
60,0.4515,0.473127,0.800415,0.689793
90,0.4389,0.430545,0.851037,0.606295
120,0.4025,0.420627,0.853527,0.62186
150,0.3968,0.443576,0.848133,0.591914
180,0.2998,0.442571,0.849793,0.648128
210,0.2536,0.451037,0.850622,0.66909
240,0.2846,0.447846,0.843568,0.650148
270,0.3045,0.426904,0.849793,0.662693
300,0.2679,0.43995,0.842324,0.689684


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.5161,0.444224,0.843568,0.611889
60,0.4098,0.421833,0.840249,0.682283
90,0.4035,0.406941,0.852282,0.63312
120,0.3732,0.414545,0.847718,0.654517
150,0.3996,0.392916,0.857676,0.647017
180,0.272,0.420641,0.850207,0.678537
210,0.2258,0.454314,0.833195,0.652774
240,0.256,0.459528,0.844813,0.663198
270,0.2814,0.421298,0.853942,0.682623
300,0.2563,0.437296,0.845643,0.693689


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.5161,0.444224,0.843568,0.611889
60,0.4098,0.421833,0.840249,0.682283
90,0.4035,0.406941,0.852282,0.63312
120,0.3732,0.414545,0.847718,0.654517
150,0.3996,0.392916,0.857676,0.647017
180,0.272,0.420641,0.850207,0.678537
210,0.2258,0.454314,0.833195,0.652774
240,0.256,0.459528,0.844813,0.663198
270,0.2814,0.421298,0.853942,0.682623
300,0.2563,0.437296,0.845643,0.693689


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.5161,0.444224,0.843568,0.611889
60,0.4098,0.421833,0.840249,0.682283
90,0.4035,0.406941,0.852282,0.63312
120,0.3732,0.414545,0.847718,0.654517
150,0.3996,0.392916,0.857676,0.647017
180,0.272,0.420641,0.850207,0.678537
210,0.2258,0.454314,0.833195,0.652774
240,0.256,0.459528,0.844813,0.663198
270,0.2814,0.421298,0.853942,0.682623
300,0.2563,0.437296,0.845643,0.693689


reemalyami/AraRoBERTa-SA, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.6038,0.535883,0.818672,0.557784
60,0.4935,0.493007,0.804979,0.666825
90,0.4559,0.437381,0.841079,0.578135
120,0.422,0.444622,0.844813,0.642995
150,0.4279,0.445895,0.843154,0.603819
180,0.3427,0.504401,0.846888,0.617561
210,0.2968,0.460739,0.837759,0.669175
240,0.3354,0.476159,0.846058,0.59452
270,0.366,0.435403,0.848548,0.629667
300,0.3136,0.470724,0.847303,0.644419


reemalyami/AraRoBERTa-SA, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.6038,0.535883,0.818672,0.557784
60,0.4935,0.493007,0.804979,0.666825
90,0.4559,0.437381,0.841079,0.578135
120,0.422,0.444622,0.844813,0.642995
150,0.4279,0.445895,0.843154,0.603819
180,0.3427,0.504401,0.846888,0.617561
210,0.2968,0.460739,0.837759,0.669175
240,0.3354,0.476159,0.846058,0.59452
270,0.366,0.435403,0.848548,0.629667
300,0.3136,0.470724,0.847303,0.644419


reemalyami/AraRoBERTa-SA, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9638 [00:00<?, ? examples/s]

Map:   0%|          | 0/2410 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
30,0.6038,0.535883,0.818672,0.557784
60,0.4935,0.493007,0.804979,0.666825
90,0.4559,0.437381,0.841079,0.578135
120,0.422,0.444622,0.844813,0.642995
150,0.4279,0.445895,0.843154,0.603819
180,0.3427,0.504401,0.846888,0.617561
210,0.2968,0.460739,0.837759,0.669175
240,0.3354,0.476159,0.846058,0.59452
270,0.366,0.435403,0.848548,0.629667
300,0.3136,0.470724,0.847303,0.644419


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.833195,0.705522
3,aubmindlab/bert-base-arabertv02-twitter,0.840664,0.713684
5,qarib/bert-base-qarib,0.836515,0.704911
8,reemalyami/AraRoBERTa-SA,0.837759,0.691839
