In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SaudiIrony_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('benchmarks/SaudiIrony.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

df['Tweets with Decoded emojis'] = df['Tweets with Decoded emojis'].str.replace('\r\n', ' ', regex=False)
df['Final Annotation'] = df['Final Annotation'].str.replace('\r\n', '', regex=False)

display(df[:4])

df['text'] = df['Tweets with Decoded emojis']

df = df[df['text'] != '']

classes = set(df['Final Annotation'].values)
display(classes)

df['Final Annotation'] = df['Final Annotation'].astype('category')
df['label'] = df['Final Annotation'].cat.codes



df = df[['text', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))
# display(len(df_test))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128
models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib',
        'reemalyami/AraRoBERTa-SA',    
]

for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 3
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 35, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 35
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SaudiIrony_results_1.csv')
display(best_results)



2024-03-27 02:56:11.330942: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-27 02:56:11.355131: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['Tweet ID', 'Tweets with Decoded emojis', 'Final Annotation'], dtype='object')

Unnamed: 0,Tweet ID,Tweets with Decoded emojis,Final Annotation
0,1,نعم من علامات الجمال تلك الطيبه التي لاترى بل العين ولكنها تلمس القلب هذا هو الجمال الذي لايشيخ ابدا مساء الخيرات على كل من يحب السلام والخير ربي يحفظكم,ليست تهكم
1,2,المعرفه الجديده والمهارات الجديده واعتماد طرائق جديده في النظر الى الكون هي التي تبقي العقل والجسد في حال من النمو ويتجلى ذلك في التصميم على البقاء في حال من الجده في كل ثانيه من ثواني العمر دديباك شوبرا جسد لايشيخ عقل يتخطى الزمن,ليست تهكم
2,3,لايشيخ,تهكم
3,4,لايشيخ وكورونا بتزيده مناعه يعني كورونا العن ترا انا مناعتي ضعيفه وجتني اعراض من اللقاح يوم وراحت لو انها كورونا كان جت الاعراض اقوى وتستمر ايام,تهكم


{'تهكم', 'ليست تهكم'}

2

19635

DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 15708
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 3927
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6211,0.600648,0.675579,0.653132
70,0.6102,0.588942,0.685511,0.68037
105,0.5718,0.57911,0.700025,0.689621
140,0.5773,0.577724,0.705118,0.685662
175,0.5627,0.579666,0.701553,0.676492
210,0.5897,0.578319,0.706901,0.696705
245,0.5702,0.578494,0.703591,0.695837
280,0.5248,0.585813,0.69646,0.687219
315,0.5276,0.597202,0.701044,0.67031
350,0.5203,0.609687,0.690858,0.688134


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6261,0.64249,0.647823,0.647662
70,0.6071,0.588211,0.682709,0.679944
105,0.5724,0.577407,0.705628,0.693375
140,0.5845,0.581156,0.69315,0.667638
175,0.5646,0.577612,0.705882,0.683972
210,0.5868,0.584527,0.700535,0.693352
245,0.5723,0.580387,0.702827,0.694925
280,0.5265,0.587493,0.69697,0.6893
315,0.5344,0.586219,0.694169,0.66782
350,0.5219,0.609324,0.689585,0.686713


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6261,0.64249,0.647823,0.647662
70,0.6071,0.588211,0.682709,0.679944
105,0.5724,0.577407,0.705628,0.693375
140,0.5845,0.581156,0.69315,0.667638
175,0.5646,0.577612,0.705882,0.683972
210,0.5868,0.584527,0.700535,0.693352
245,0.5723,0.580387,0.702827,0.694925
280,0.5265,0.587493,0.69697,0.6893
315,0.5344,0.586219,0.694169,0.66782
350,0.5219,0.609324,0.689585,0.686713


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6188,0.590925,0.686275,0.65679
70,0.6029,0.58695,0.702317,0.690923
105,0.5825,0.580437,0.707156,0.690883
140,0.5852,0.586599,0.688566,0.652189
175,0.5675,0.58244,0.701553,0.678715
210,0.584,0.571747,0.708683,0.694329
245,0.5718,0.588052,0.692895,0.68908
280,0.5339,0.580137,0.701044,0.695854
315,0.5265,0.60373,0.692131,0.663253
350,0.5172,0.605052,0.692641,0.688174


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6188,0.590925,0.686275,0.65679
70,0.6029,0.58695,0.702317,0.690923
105,0.5825,0.580437,0.707156,0.690883
140,0.5852,0.586599,0.688566,0.652189
175,0.5675,0.58244,0.701553,0.678715
210,0.584,0.571747,0.708683,0.694329
245,0.5718,0.588052,0.692895,0.68908
280,0.5339,0.580137,0.701044,0.695854
315,0.5265,0.60373,0.692131,0.663253
350,0.5172,0.605052,0.692641,0.688174


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6188,0.590925,0.686275,0.65679
70,0.6029,0.58695,0.702317,0.690923
105,0.5825,0.580437,0.707156,0.690883
140,0.5852,0.586599,0.688566,0.652189
175,0.5675,0.58244,0.701553,0.678715
210,0.584,0.571747,0.708683,0.694329
245,0.5718,0.588052,0.692895,0.68908
280,0.5339,0.580137,0.701044,0.695854
315,0.5265,0.60373,0.692131,0.663253
350,0.5172,0.605052,0.692641,0.688174


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.616,0.599438,0.687802,0.66241
70,0.5995,0.586863,0.695951,0.690113
105,0.5739,0.584189,0.698243,0.677049
140,0.5781,0.584163,0.690858,0.671253
175,0.5658,0.58138,0.704864,0.686436
210,0.585,0.576444,0.714031,0.702903
245,0.5751,0.572604,0.713267,0.69726
280,0.5179,0.581474,0.708683,0.699039
315,0.5267,0.578801,0.707665,0.686023
350,0.5053,0.597033,0.695696,0.688814


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.616,0.599438,0.687802,0.66241
70,0.5995,0.586863,0.695951,0.690113
105,0.5739,0.584189,0.698243,0.677049
140,0.5781,0.584163,0.690858,0.671253
175,0.5658,0.58138,0.704864,0.686436
210,0.585,0.576444,0.714031,0.702903
245,0.5751,0.572604,0.713267,0.69726
280,0.5179,0.581474,0.708683,0.699039
315,0.5267,0.578801,0.707665,0.686023
350,0.5053,0.597033,0.695696,0.688814


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.616,0.599438,0.687802,0.66241
70,0.5995,0.586863,0.695951,0.690113
105,0.5739,0.584189,0.698243,0.677049
140,0.5781,0.584163,0.690858,0.671253
175,0.5658,0.58138,0.704864,0.686436
210,0.585,0.576444,0.714031,0.702903
245,0.5751,0.572604,0.713267,0.69726
280,0.5179,0.581474,0.708683,0.699039
315,0.5267,0.578801,0.707665,0.686023
350,0.5053,0.597033,0.695696,0.688814


reemalyami/AraRoBERTa-SA, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6334,0.625382,0.650369,0.649986
70,0.6184,0.601681,0.669977,0.656674
105,0.5824,0.603124,0.694169,0.677294
140,0.597,0.599418,0.6603,0.586286
175,0.5891,0.60555,0.685001,0.647294
210,0.6056,0.589738,0.690858,0.685826
245,0.5922,0.599183,0.693405,0.686051
280,0.5557,0.586419,0.691113,0.684078
315,0.5568,0.627383,0.694678,0.663116
350,0.5377,0.613256,0.683473,0.677373


reemalyami/AraRoBERTa-SA, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6334,0.625382,0.650369,0.649986
70,0.6184,0.601681,0.669977,0.656674
105,0.5824,0.603124,0.694169,0.677294
140,0.597,0.599418,0.6603,0.586286
175,0.5891,0.60555,0.685001,0.647294
210,0.6056,0.589738,0.690858,0.685826
245,0.5922,0.599183,0.693405,0.686051
280,0.5557,0.586419,0.691113,0.684078
315,0.5568,0.627383,0.694678,0.663116
350,0.5377,0.613256,0.683473,0.677373


reemalyami/AraRoBERTa-SA, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6334,0.625382,0.650369,0.649986
70,0.6184,0.601681,0.669977,0.656674
105,0.5824,0.603124,0.694169,0.677294
140,0.597,0.599418,0.6603,0.586286
175,0.5891,0.60555,0.685001,0.647294
210,0.6056,0.589738,0.690858,0.685826
245,0.5922,0.599183,0.693405,0.686051
280,0.5557,0.586419,0.691113,0.684078
315,0.5568,0.627383,0.694678,0.663116
350,0.5377,0.613256,0.683473,0.677373


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.701044,0.695854
3,aubmindlab/bert-base-arabertv02-twitter,0.706901,0.696705
4,qarib/bert-base-qarib,0.714031,0.702903
7,reemalyami/AraRoBERTa-SA,0.702572,0.688275
