In [1]:
# cell-1  
#load and clean the data (removing diacritics and unwanted text)

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'iSarcasmEval_dialect.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')



# ds = load_dataset('hard')

df = pd.read_csv('iSarcasmEval/train/train.Ar.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
# display(df.columns)
df_test = pd.read_csv('iSarcasmEval/test/task_A_Ar_test.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3

# df_test = pd.concat([df2, df3], axis=0, ignore_index=True)

# ['id', 'text', 'sarcastic', 'rephrase', 'dialect']
# ['text', 'dialect', 'sarcastic']

      
display(df.columns)
display(df_test.columns)
display(df[:4])

# return 



classes = set(df['dialect'].values)
display(classes)

df['dialect'] = df['dialect'].astype('category')
df['label'] = df['dialect'].cat.codes


df_test['dialect'] = df_test['dialect'].astype('category')
df_test['label'] = df_test['dialect'].cat.codes

df = df[['text', 'label']]
df_test = df_test[['text', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))
display(len(df_test))


ds_t = Dataset.from_pandas(df)
ds_v = Dataset.from_pandas(df_test)

# ds = ds['train']
# ds = ds.train_test_split(test_size=0.2)
display(ds_t)
display(ds_v)

max_sequence_length = 128

# classes_num = 6
# display(classes_num)
# display(ds)


models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds_t
        dataset_validation = ds_v                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 10
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 5, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 5
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('iSarcasmEval_dialect_results.csv')
display(best_results)



2024-02-21 06:27:49.786991: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-21 06:27:49.809453: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['id', 'text', 'sarcastic', 'rephrase', 'dialect'], dtype='object')

Index(['text', 'dialect', 'sarcastic'], dtype='object')

Unnamed: 0,id,text,sarcastic,rephrase,dialect
0,1,ضبط شخص بدبلوم انتحل صفة طبيب بلد مافيش حد فيها بيشتغل بشهادته,1,شخص ينتحل صفة طبيب ويفتتح عيادة فى بلد فاشلة ضاعت فيها الرقابة,nile
1,2,مش معنى انك قولتلى رايك يبقى أنا هعمل بيه طب افرض انت حمار استحمر أنا كما زيك,1,مش لازم دائما اعمل برأيك,nile
2,3,اية المهلبية دي يصحبي,1,ما هذا الجمال,nile
3,4,الحديث قياس فيه الفضة و فيه النحاس,1,لسانك ترجمان قلبك,magreb


{'gulf', 'levant', 'magreb', 'msa', 'nile'}

5

3102

1400

Dataset({
    features: ['text', 'label'],
    num_rows: 3102
})

Dataset({
    features: ['text', 'label'],
    num_rows: 1400
})

faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3102 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,1.0753,1.533988,0.505714,0.238169
10,0.7088,1.353235,0.510714,0.240621
15,0.5947,1.202392,0.555714,0.260829
20,0.6071,1.221633,0.557143,0.279737
25,0.5401,1.41968,0.537143,0.263916
30,0.4572,1.233247,0.535,0.304471
35,0.4589,1.263361,0.547143,0.34401
40,0.3706,1.368717,0.553571,0.332302
45,0.335,1.420802,0.535714,0.341495
50,0.3222,1.486061,0.535714,0.342723


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3102 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,1.0633,1.543468,0.532143,0.251577
10,0.7348,1.304087,0.533571,0.252247
15,0.6377,1.25641,0.53,0.251136
20,0.6086,1.378737,0.545714,0.257671
25,0.5447,1.186999,0.562857,0.318078
30,0.4338,1.475538,0.537857,0.29903
35,0.4663,1.243804,0.555,0.382919
40,0.3694,1.296571,0.555,0.334066
45,0.3404,1.341883,0.542857,0.355634
50,0.3286,1.488863,0.556429,0.366644


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3102 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,1.0633,1.543468,0.532143,0.251577
10,0.7348,1.304087,0.533571,0.252247
15,0.6377,1.25641,0.53,0.251136
20,0.6086,1.378737,0.545714,0.257671
25,0.5447,1.186999,0.562857,0.318078
30,0.4338,1.475538,0.537857,0.29903
35,0.4663,1.243804,0.555,0.382919
40,0.3694,1.296571,0.555,0.334066
45,0.3404,1.341883,0.542857,0.355634
50,0.3286,1.488863,0.556429,0.366644


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3102 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,1.0598,1.460212,0.527143,0.249059
10,0.7188,1.305038,0.531429,0.251114
15,0.6004,1.155083,0.553571,0.260627
20,0.5834,1.202471,0.555714,0.263499
25,0.5298,1.143207,0.582857,0.361416
30,0.4117,1.327461,0.567143,0.346117
35,0.447,1.181254,0.567143,0.396826
40,0.3727,1.276568,0.569286,0.409578
45,0.31,1.352664,0.572857,0.393053
50,0.3153,1.522321,0.555714,0.406872


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3102 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,1.0598,1.460212,0.527143,0.249059
10,0.7188,1.305038,0.531429,0.251114
15,0.6004,1.155083,0.553571,0.260627
20,0.5834,1.202471,0.555714,0.263499
25,0.5298,1.143207,0.582857,0.361416
30,0.4117,1.327461,0.567143,0.346117
35,0.447,1.181254,0.567143,0.396826
40,0.3727,1.276568,0.569286,0.409578
45,0.31,1.352664,0.572857,0.393053
50,0.3153,1.522321,0.555714,0.406872


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3102 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,1.0598,1.460212,0.527143,0.249059
10,0.7188,1.305038,0.531429,0.251114
15,0.6004,1.155083,0.553571,0.260627
20,0.5834,1.202471,0.555714,0.263499
25,0.5298,1.143207,0.582857,0.361416
30,0.4117,1.327461,0.567143,0.346117
35,0.447,1.181254,0.567143,0.396826
40,0.3727,1.276568,0.569286,0.409578
45,0.31,1.352664,0.572857,0.393053
50,0.3153,1.522321,0.555714,0.406872


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3102 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,1.0746,1.506391,0.489286,0.223497
10,0.7242,1.484639,0.527143,0.248446
15,0.6156,1.230338,0.546429,0.25833
20,0.5868,1.363156,0.548571,0.260263
25,0.5378,1.183997,0.555714,0.276706
30,0.4134,1.457492,0.532857,0.280108
35,0.4624,1.226666,0.527857,0.341921
40,0.4069,1.428545,0.536429,0.292029
45,0.3572,1.250173,0.541429,0.333796
50,0.3571,1.383951,0.55,0.352286


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3102 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,1.0746,1.506391,0.489286,0.223497
10,0.7242,1.484639,0.527143,0.248446
15,0.6156,1.230338,0.546429,0.25833
20,0.5868,1.363156,0.548571,0.260263
25,0.5378,1.183997,0.555714,0.276706
30,0.4134,1.457492,0.532857,0.280108
35,0.4624,1.226666,0.527857,0.341921
40,0.4069,1.428545,0.536429,0.292029
45,0.3572,1.250173,0.541429,0.333796
50,0.3571,1.383951,0.55,0.352286


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3102 [00:00<?, ? examples/s]

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,1.0746,1.506391,0.489286,0.223497
10,0.7242,1.484639,0.527143,0.248446
15,0.6156,1.230338,0.546429,0.25833
20,0.5868,1.363156,0.548571,0.260263
25,0.5378,1.183997,0.555714,0.276706
30,0.4134,1.457492,0.532857,0.280108
35,0.4624,1.226666,0.527857,0.341921
40,0.4069,1.428545,0.536429,0.292029
45,0.3572,1.250173,0.541429,0.333796
50,0.3571,1.383951,0.55,0.352286


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.561429,0.399479
3,faisalq/bert-base-arabic-senpiece,0.572143,0.445115
6,faisalq/bert-base-arabic-wordpiece,0.558571,0.422842
