In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SaudiIrony_2.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('benchmarks/SaudiIrony.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

df['Tweets with Decoded emojis'] = df['Tweets with Decoded emojis'].str.replace('\r\n', ' ', regex=False)
df['Final Annotation'] = df['Final Annotation'].str.replace('\r\n', '', regex=False)

display(df[:4])

df['text'] = df['Tweets with Decoded emojis']

df = df[df['text'] != '']

classes = set(df['Final Annotation'].values)
display(classes)

df['Final Annotation'] = df['Final Annotation'].astype('category')
df['label'] = df['Final Annotation'].cat.codes



df = df[['text', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))

ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128

models = [ 
        'faisalq/SaudiBERT',
        'UBC-NLP/MARBERT',
        'UBC-NLP/MARBERTv2',  
]

for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 3
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 35, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 35
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SaudiIrony_results_2.csv')
display(best_results)



2024-04-02 04:26:07.578687: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-02 04:26:07.604144: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['Tweet ID', 'Tweets with Decoded emojis', 'Final Annotation'], dtype='object')

Unnamed: 0,Tweet ID,Tweets with Decoded emojis,Final Annotation
0,1,نعم من علامات الجمال تلك الطيبه التي لاترى بل العين ولكنها تلمس القلب هذا هو الجمال الذي لايشيخ ابدا مساء الخيرات على كل من يحب السلام والخير ربي يحفظكم,ليست تهكم
1,2,المعرفه الجديده والمهارات الجديده واعتماد طرائق جديده في النظر الى الكون هي التي تبقي العقل والجسد في حال من النمو ويتجلى ذلك في التصميم على البقاء في حال من الجده في كل ثانيه من ثواني العمر دديباك شوبرا جسد لايشيخ عقل يتخطى الزمن,ليست تهكم
2,3,لايشيخ,تهكم
3,4,لايشيخ وكورونا بتزيده مناعه يعني كورونا العن ترا انا مناعتي ضعيفه وجتني اعراض من اللقاح يوم وراحت لو انها كورونا كان جت الاعراض اقوى وتستمر ايام,تهكم


{'تهكم', 'ليست تهكم'}

2

19635

DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 15708
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 3927
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6039,0.57789,0.699516,0.678725
70,0.5946,0.578685,0.71123,0.680549
105,0.5805,0.563778,0.717087,0.699501
140,0.5783,0.56509,0.715304,0.699853
175,0.576,0.563405,0.724726,0.700636
210,0.5765,0.563776,0.710975,0.701322
245,0.5684,0.568373,0.71505,0.702453
280,0.5085,0.646207,0.677617,0.675282
315,0.5016,0.608181,0.706901,0.697225
350,0.5056,0.586152,0.698243,0.690062


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6078,0.581651,0.698752,0.670602
70,0.5946,0.578497,0.706646,0.676042
105,0.5865,0.567958,0.712758,0.694135
140,0.5852,0.566839,0.717851,0.69899
175,0.5828,0.566747,0.722434,0.69595
210,0.5772,0.565891,0.713267,0.703264
245,0.5649,0.570158,0.717851,0.704557
280,0.5157,0.634152,0.686275,0.68447
315,0.5009,0.605038,0.708174,0.69885
350,0.5081,0.588588,0.693405,0.68664


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6078,0.581651,0.698752,0.670602
70,0.5946,0.578497,0.706646,0.676042
105,0.5865,0.567958,0.712758,0.694135
140,0.5852,0.566839,0.717851,0.69899
175,0.5828,0.566747,0.722434,0.69595
210,0.5772,0.565891,0.713267,0.703264
245,0.5649,0.570158,0.717851,0.704557
280,0.5157,0.634152,0.686275,0.68447
315,0.5009,0.605038,0.708174,0.69885
350,0.5081,0.588588,0.693405,0.68664


UBC-NLP/MARBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6187,0.588357,0.69646,0.673232
70,0.6006,0.581106,0.702317,0.684791
105,0.5918,0.57987,0.702317,0.691602
140,0.5906,0.57464,0.71123,0.689763
175,0.597,0.578077,0.713522,0.689322
210,0.5713,0.576636,0.697988,0.691371
245,0.57,0.589524,0.709193,0.692806
280,0.5242,0.605024,0.685256,0.681872
315,0.4987,0.633163,0.691367,0.683915
350,0.5067,0.593441,0.684747,0.677642


UBC-NLP/MARBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6187,0.588357,0.69646,0.673232
70,0.6006,0.581106,0.702317,0.684791
105,0.5918,0.57987,0.702317,0.691602
140,0.5906,0.57464,0.71123,0.689763
175,0.597,0.578077,0.713522,0.689322
210,0.5713,0.576636,0.697988,0.691371
245,0.57,0.589524,0.709193,0.692806
280,0.5242,0.605024,0.685256,0.681872
315,0.4987,0.633163,0.691367,0.683915
350,0.5067,0.593441,0.684747,0.677642


UBC-NLP/MARBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6187,0.588357,0.69646,0.673232
70,0.6006,0.581106,0.702317,0.684791
105,0.5918,0.57987,0.702317,0.691602
140,0.5906,0.57464,0.71123,0.689763
175,0.597,0.578077,0.713522,0.689322
210,0.5713,0.576636,0.697988,0.691371
245,0.57,0.589524,0.709193,0.692806
280,0.5242,0.605024,0.685256,0.681872
315,0.4987,0.633163,0.691367,0.683915
350,0.5067,0.593441,0.684747,0.677642


UBC-NLP/MARBERTv2, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6198,0.601585,0.679908,0.67037
70,0.5909,0.587665,0.699771,0.677775
105,0.5892,0.573994,0.710211,0.689378
140,0.5832,0.572713,0.710721,0.687755
175,0.586,0.572282,0.715304,0.689949
210,0.5701,0.5969,0.690094,0.687165
245,0.5753,0.57007,0.709702,0.697683
280,0.533,0.587755,0.689076,0.684939
315,0.5152,0.612776,0.69697,0.689502
350,0.5241,0.570154,0.706901,0.688023


UBC-NLP/MARBERTv2, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6198,0.601585,0.679908,0.67037
70,0.5909,0.587665,0.699771,0.677775
105,0.5892,0.573994,0.710211,0.689378
140,0.5832,0.572713,0.710721,0.687755
175,0.586,0.572282,0.715304,0.689949
210,0.5701,0.5969,0.690094,0.687165
245,0.5753,0.57007,0.709702,0.697683
280,0.533,0.587755,0.689076,0.684939
315,0.5152,0.612776,0.69697,0.689502
350,0.5241,0.570154,0.706901,0.688023


UBC-NLP/MARBERTv2, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
35,0.6198,0.601585,0.679908,0.67037
70,0.5909,0.587665,0.699771,0.677775
105,0.5892,0.573994,0.710211,0.689378
140,0.5832,0.572713,0.710721,0.687755
175,0.586,0.572282,0.715304,0.689949
210,0.5701,0.5969,0.690094,0.687165
245,0.5753,0.57007,0.709702,0.697683
280,0.533,0.587755,0.689076,0.684939
315,0.5152,0.612776,0.69697,0.689502
350,0.5241,0.570154,0.706901,0.688023


Unnamed: 0,Model,Accuracy,F1
0,UBC-NLP/MARBERT,0.709193,0.692806
3,UBC-NLP/MARBERTv2,0.709702,0.697683
6,faisalq/SaudiBERT,0.717851,0.704557
