In [1]:
# cell-1  
#load and clean the data (removing diacritics and unwanted text)

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'ArSarcasmV2_sarcasm.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')



# ds = load_dataset('hard')

df = pd.read_csv('ArSarcasmV2/training_data.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
display(df.columns)
df_test = pd.read_csv('ArSarcasmV2/testing_data.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3

# df_test = pd.concat([df2, df3], axis=0, ignore_index=True)

# ['tweet', 'sarcasm', 'sentiment', 'dialect']

display(df[:4])

# return 



classes = set(df['sarcasm'].values)
display(classes)

df['sarcasm'] = df['sarcasm'].astype('category')
df['label'] = df['sarcasm'].cat.codes


df_test['sarcasm'] = df_test['sarcasm'].astype('category')
df_test['label'] = df_test['sarcasm'].cat.codes

df = df[['tweet', 'label']]
df_test = df_test[['tweet', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))
display(len(df_test))


ds_t = Dataset.from_pandas(df)
ds_v = Dataset.from_pandas(df_test)

# ds = ds['train']
# ds = ds.train_test_split(test_size=0.2)
display(ds_t)
display(ds_v)

max_sequence_length = 128

# classes_num = 6
# display(classes_num)
# display(ds)


models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds_t
        dataset_validation = ds_v                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['tweet'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 10
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 25, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 25
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('ArSarcasmV2_sarcasm_results.csv')
display(best_results)



2024-02-21 05:31:11.220366: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-21 05:31:11.244505: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['tweet', 'sarcasm', 'sentiment', 'dialect'], dtype='object')

Unnamed: 0,tweet,sarcasm,sentiment,dialect
0,"""د. #محمود_العلايلي:أرى أن الفريق #أحمد_شفيق رقم مهم في المعادلة السياسية المصرية ولا يمكن إغفالههل ترى أن هذا صحيح؟#أربعة_زائد_واحد""",False,NEU,msa
1,"""مع فيدرر يا آجا والكبار 😍 https://t.co/hrBeHbkBNu""",False,NEU,msa
2,“الداعون لمبدأ الاختلاط بين الجنسين؛ كالداعين لإلغاء التسعيرة كلاهما يريد تصفية السوق السوداء بجعلها حرة.” #الاختلاط,True,NEG,msa
3,"""@ihe_94 @ya78m @amooo5 @badiajnikhar @Oukasafa @reoshalm @Mnory202 مساكين من الصبح و هوما رايحين راجعين عاى غوغل تعبت بدالهم ههههه ي""",True,NEG,gulf


{False, True}

2

12548

3000

Dataset({
    features: ['tweet', 'label'],
    num_rows: 12548
})

Dataset({
    features: ['tweet', 'label'],
    num_rows: 3000
})

faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12548 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.4009,0.53923,0.751,0.540311
50,0.3206,0.619142,0.727667,0.425911
75,0.301,0.519136,0.77,0.651779
100,0.2512,0.53744,0.775333,0.680571
125,0.1784,0.584198,0.772667,0.708445
150,0.1675,0.622605,0.764667,0.676834
175,0.0882,0.906689,0.762333,0.661913
200,0.0896,0.803467,0.763333,0.691452
225,0.0545,1.068386,0.754,0.640826
250,0.0542,1.014811,0.762333,0.672533


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12548 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3906,0.514207,0.757667,0.591606
50,0.3245,0.560793,0.731667,0.445589
75,0.2754,0.551589,0.769333,0.651588
100,0.2518,0.546338,0.77,0.686245
125,0.1697,0.612571,0.766333,0.693685
150,0.1598,0.650554,0.763667,0.672472
175,0.0817,0.952957,0.765333,0.658861
200,0.0808,0.809303,0.754333,0.693218
225,0.0507,1.044781,0.758333,0.672019
250,0.0501,1.047985,0.757667,0.651283


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12548 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3906,0.514207,0.757667,0.591606
50,0.3245,0.560793,0.731667,0.445589
75,0.2754,0.551589,0.769333,0.651588
100,0.2518,0.546338,0.77,0.686245
125,0.1697,0.612571,0.766333,0.693685
150,0.1598,0.650554,0.763667,0.672472
175,0.0817,0.952957,0.765333,0.658861
200,0.0808,0.809303,0.754333,0.693218
225,0.0507,1.044781,0.758333,0.672019
250,0.0501,1.047985,0.757667,0.651283


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12548 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3958,0.486767,0.761,0.59372
50,0.3175,0.587156,0.736333,0.472826
75,0.2583,0.525364,0.778,0.699128
100,0.2434,0.514102,0.773333,0.690792
125,0.1611,0.62391,0.766333,0.703273
150,0.1501,0.663623,0.773667,0.685718
175,0.0772,0.865367,0.769333,0.676997
200,0.076,0.888536,0.769333,0.673386
225,0.0421,1.111264,0.763,0.654532
250,0.0425,0.990926,0.764667,0.689508


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12548 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3958,0.486767,0.761,0.59372
50,0.3175,0.587156,0.736333,0.472826
75,0.2583,0.525364,0.778,0.699128
100,0.2434,0.514102,0.773333,0.690792
125,0.1611,0.62391,0.766333,0.703273
150,0.1501,0.663623,0.773667,0.685718
175,0.0772,0.865367,0.769333,0.676997
200,0.076,0.888536,0.769333,0.673386
225,0.0421,1.111264,0.763,0.654532
250,0.0425,0.990926,0.764667,0.689508


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12548 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.3958,0.486767,0.761,0.59372
50,0.3175,0.587156,0.736333,0.472826
75,0.2583,0.525364,0.778,0.699128
100,0.2434,0.514102,0.773333,0.690792
125,0.1611,0.62391,0.766333,0.703273
150,0.1501,0.663623,0.773667,0.685718
175,0.0772,0.865367,0.769333,0.676997
200,0.076,0.888536,0.769333,0.673386
225,0.0421,1.111264,0.763,0.654532
250,0.0425,0.990926,0.764667,0.689508


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12548 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.4049,0.47764,0.765333,0.62333
50,0.3189,0.537717,0.752,0.536341
75,0.2571,0.503869,0.777,0.696222
100,0.2505,0.496993,0.763,0.707268
125,0.1655,0.593053,0.771,0.701322
150,0.1723,0.576846,0.759667,0.707893
175,0.0888,0.726969,0.773,0.695567
200,0.0854,0.789551,0.765333,0.664265
225,0.0464,0.981411,0.763,0.665952
250,0.049,0.929307,0.758667,0.681591


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12548 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.4049,0.47764,0.765333,0.62333
50,0.3189,0.537717,0.752,0.536341
75,0.2571,0.503869,0.777,0.696222
100,0.2505,0.496993,0.763,0.707268
125,0.1655,0.593053,0.771,0.701322
150,0.1723,0.576846,0.759667,0.707893
175,0.0888,0.726969,0.773,0.695567
200,0.0854,0.789551,0.765333,0.664265
225,0.0464,0.981411,0.763,0.665952
250,0.049,0.929307,0.758667,0.681591


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/12548 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
25,0.4049,0.47764,0.765333,0.62333
50,0.3189,0.537717,0.752,0.536341
75,0.2571,0.503869,0.777,0.696222
100,0.2505,0.496993,0.763,0.707268
125,0.1655,0.593053,0.771,0.701322
150,0.1723,0.576846,0.759667,0.707893
175,0.0888,0.726969,0.773,0.695567
200,0.0854,0.789551,0.765333,0.664265
225,0.0464,0.981411,0.763,0.665952
250,0.049,0.929307,0.758667,0.681591


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.759667,0.707893
3,faisalq/bert-base-arabic-senpiece,0.766333,0.703273
6,faisalq/bert-base-arabic-wordpiece,0.772667,0.708445
