In [1]:
# cell-1  
#load and clean the data (removing diacritics and unwanted text)

import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SaudiIrony.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')




df = pd.read_csv('SaudiIrony/SaudiIrony.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

df['Tweets with Decoded emojis'] = df['Tweets with Decoded emojis'].str.replace('\r\n', ' ', regex=False)
df['Final Annotation'] = df['Final Annotation'].str.replace('\r\n', '', regex=False)
# df_test = pd.concat([df2, df3], axis=0, ignore_index=True)

display(df[:4])

# ['Tweet ID', 'Tweets with Decoded emojis', 'Final Annotation']
df['text'] = df['Tweets with Decoded emojis']

df = df[df['text'] != '']

# dfx = df[df['text'] == '']

# display(dfx)
# display(len(dfx))

# return 



classes = set(df['Final Annotation'].values)
display(classes)

df['Final Annotation'] = df['Final Annotation'].astype('category')
df['label'] = df['Final Annotation'].cat.codes



df = df[['text', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))
# display(len(df_test))


ds = Dataset.from_pandas(df)
# ds_v = Dataset.from_pandas(df_test)

# ds = ds['train']
ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128

# classes_num = 6
# display(classes_num)
# display(ds)


models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 10
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 20, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 20
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SaudiIrony_results.csv')
display(best_results)



2024-02-21 07:28:30.405331: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-21 07:28:30.430331: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['Tweet ID', 'Tweets with Decoded emojis', 'Final Annotation'], dtype='object')

Unnamed: 0,Tweet ID,Tweets with Decoded emojis,Final Annotation
0,1,نعم من علامات الجمال تلك الطيبه التي لاترى بل العين ولكنها تلمس القلب هذا هو الجمال الذي لايشيخ ابدا مساء الخيرات على كل من يحب السلام والخير ربي يحفظكم,ليست تهكم
1,2,المعرفه الجديده والمهارات الجديده واعتماد طرائق جديده في النظر الى الكون هي التي تبقي العقل والجسد في حال من النمو ويتجلى ذلك في التصميم على البقاء في حال من الجده في كل ثانيه من ثواني العمر دديباك شوبرا جسد لايشيخ عقل يتخطى الزمن,ليست تهكم
2,3,لايشيخ,تهكم
3,4,لايشيخ وكورونا بتزيده مناعه يعني كورونا العن ترا انا مناعتي ضعيفه وجتني اعراض من اللقاح يوم وراحت لو انها كورونا كان جت الاعراض اقوى وتستمر ايام,تهكم


{'تهكم', 'ليست تهكم'}

2

19635

DatasetDict({
    train: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 15708
    })
    test: Dataset({
        features: ['text', 'label', '__index_level_0__'],
        num_rows: 3927
    })
})

faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6301,0.585285,0.695442,0.672801
40,0.5966,0.584106,0.700025,0.672893
60,0.5832,0.567993,0.71454,0.699664
80,0.5452,0.577857,0.710975,0.699231
100,0.5475,0.585158,0.716068,0.702112
120,0.5483,0.567233,0.715814,0.701741
140,0.4795,0.649834,0.682964,0.67619
160,0.4649,0.624663,0.699007,0.687263
180,0.4669,0.645529,0.683219,0.680335
200,0.3896,0.744244,0.684747,0.672845


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6246,0.585028,0.703591,0.68128
40,0.5972,0.57863,0.702317,0.675444
60,0.5904,0.577864,0.710466,0.68341
80,0.5547,0.578247,0.716323,0.704578
100,0.5497,0.581299,0.712503,0.69953
120,0.5513,0.56775,0.708683,0.699976
140,0.4833,0.623085,0.692641,0.681764
160,0.4658,0.637548,0.688821,0.663308
180,0.4682,0.652218,0.690858,0.684181
200,0.3956,0.74002,0.68984,0.67348


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6246,0.585028,0.703591,0.68128
40,0.5972,0.57863,0.702317,0.675444
60,0.5904,0.577864,0.710466,0.68341
80,0.5547,0.578247,0.716323,0.704578
100,0.5497,0.581299,0.712503,0.69953
120,0.5513,0.56775,0.708683,0.699976
140,0.4833,0.623085,0.692641,0.681764
160,0.4658,0.637548,0.688821,0.663308
180,0.4682,0.652218,0.690858,0.684181
200,0.3956,0.74002,0.68984,0.67348


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6254,0.589652,0.703336,0.683461
40,0.5901,0.580594,0.708683,0.685556
60,0.5824,0.581798,0.702317,0.675959
80,0.5514,0.573806,0.717087,0.70458
100,0.5422,0.581871,0.713267,0.702382
120,0.5432,0.570844,0.711485,0.702512
140,0.4808,0.636816,0.67838,0.666999
160,0.4522,0.624143,0.681691,0.673089
180,0.4558,0.638139,0.683983,0.676836
200,0.3714,0.744485,0.67125,0.662048


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6254,0.589652,0.703336,0.683461
40,0.5901,0.580594,0.708683,0.685556
60,0.5824,0.581798,0.702317,0.675959
80,0.5514,0.573806,0.717087,0.70458
100,0.5422,0.581871,0.713267,0.702382
120,0.5432,0.570844,0.711485,0.702512
140,0.4808,0.636816,0.67838,0.666999
160,0.4522,0.624143,0.681691,0.673089
180,0.4558,0.638139,0.683983,0.676836
200,0.3714,0.744485,0.67125,0.662048


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6254,0.589652,0.703336,0.683461
40,0.5901,0.580594,0.708683,0.685556
60,0.5824,0.581798,0.702317,0.675959
80,0.5514,0.573806,0.717087,0.70458
100,0.5422,0.581871,0.713267,0.702382
120,0.5432,0.570844,0.711485,0.702512
140,0.4808,0.636816,0.67838,0.666999
160,0.4522,0.624143,0.681691,0.673089
180,0.4558,0.638139,0.683983,0.676836
200,0.3714,0.744485,0.67125,0.662048


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6196,0.591031,0.695187,0.682534
40,0.5897,0.577393,0.703845,0.67843
60,0.5822,0.575674,0.711485,0.69355
80,0.5447,0.578697,0.707156,0.69516
100,0.5333,0.582269,0.709447,0.69857
120,0.5375,0.577919,0.708429,0.699741
140,0.4634,0.651652,0.674051,0.654552
160,0.4319,0.666793,0.684492,0.672811
180,0.4428,0.666465,0.680672,0.675517
200,0.3641,0.777209,0.673033,0.661096


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6196,0.591031,0.695187,0.682534
40,0.5897,0.577393,0.703845,0.67843
60,0.5822,0.575674,0.711485,0.69355
80,0.5447,0.578697,0.707156,0.69516
100,0.5333,0.582269,0.709447,0.69857
120,0.5375,0.577919,0.708429,0.699741
140,0.4634,0.651652,0.674051,0.654552
160,0.4319,0.666793,0.684492,0.672811
180,0.4428,0.666465,0.680672,0.675517
200,0.3641,0.777209,0.673033,0.661096


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15708 [00:00<?, ? examples/s]

Map:   0%|          | 0/3927 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6196,0.591031,0.695187,0.682534
40,0.5897,0.577393,0.703845,0.67843
60,0.5822,0.575674,0.711485,0.69355
80,0.5447,0.578697,0.707156,0.69516
100,0.5333,0.582269,0.709447,0.69857
120,0.5375,0.577919,0.708429,0.699741
140,0.4634,0.651652,0.674051,0.654552
160,0.4319,0.666793,0.684492,0.672811
180,0.4428,0.666465,0.680672,0.675517
200,0.3641,0.777209,0.673033,0.661096


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.708429,0.699741
3,faisalq/bert-base-arabic-senpiece,0.717087,0.70458
6,faisalq/bert-base-arabic-wordpiece,0.716323,0.704578
