In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel, TrainingArguments, Trainer, BertModel
from transformers.modeling_outputs import SequenceClassifierOutput
import numpy as np

log_file = 'iSarcasmEval_sarcasm_2.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/iSarcasmEval/train/train.Ar.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
df_test = pd.read_csv('datasets/iSarcasmEval/test/task_A_Ar_test.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3


      
display(df.columns)
display(df_test.columns)
display(df[:4])

df = df[df['dialect'] == 'nile']
df_test = df_test[df_test['dialect'] == 'nile']


classes = set(df['sarcastic'].values)
display(classes)

df['sarcastic'] = df['sarcastic'].astype('category')
df['label'] = df['sarcastic'].cat.codes


df_test['sarcastic'] = df_test['sarcastic'].astype('category')
df_test['label'] = df_test['sarcastic'].cat.codes

df = df[['text', 'label']]
df_test = df_test[['text', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))
display(len(df_test))


ds_t = Dataset.from_pandas(df)
ds_v = Dataset.from_pandas(df_test)

display(ds_t)
display(ds_v)

max_sequence_length = 128



class BertLSTMModel(nn.Module):
    def __init__(self, model_name, num_labels, hidden_dim=128, num_layers=2):
        super(BertLSTMModel, self).__init__()
        
        self.bert = AutoModel.from_pretrained(model_name)
        self.lstm = nn.LSTM(input_size=self.bert.config.hidden_size, 
                            hidden_size=hidden_dim, 
                            num_layers=num_layers, 
                            batch_first=True, 
                            bidirectional=True)
        self.classifier = nn.Linear(hidden_dim * 2, num_labels)
        self.num_labels = num_labels
    
    def forward(self, input_ids, attention_mask, labels=None):
        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = bert_outputs.last_hidden_state
        lstm_output, _ = self.lstm(sequence_output)
        lstm_output = lstm_output[:, -1, :]  
        logits = self.classifier(lstm_output)
        
        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        
        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
        )



class BertCNNModel(nn.Module):
    def __init__(self, model_name, num_labels, num_filters=100, filter_sizes=[3, 4, 5]):
        super(BertCNNModel, self).__init__()
        self.bert = BertModel.from_pretrained(model_name)
        self.convs = nn.ModuleList([
            nn.Conv2d(1, num_filters, (fs, self.bert.config.hidden_size)) for fs in filter_sizes
        ])
        self.classifier = nn.Linear(len(filter_sizes) * num_filters, num_labels)
        self.num_labels = num_labels

    
    def forward(self, input_ids, attention_mask, labels=None):
        bert_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = bert_outputs.last_hidden_state.unsqueeze(1)  
        
        conv_outputs = [torch.relu(conv(sequence_output)).squeeze(3) for conv in self.convs]
        pooled_outputs = [torch.max(output, 2)[0] for output in conv_outputs]
        cat_output = torch.cat(pooled_outputs, 1)
        
        logits = self.classifier(cat_output)
        
        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        
        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
        )








for j in range(2):
    if j == 0:
        model_name = 'EgyBERT_lstm'    
    else:
        model_name = 'EgyBERT_cnn'
    
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained('faisalq/EgyBERT')
        # model = AutoModelForSequenceClassification.from_pretrained(model_name,
        #                                                       num_labels=classes_num).to('cuda')                                                 

        if j == 0:
            model = BertLSTMModel(model_name='faisalq/EgyBERT', num_labels=classes_num).to('cuda')       
        else:
            model = BertCNNModel(model_name='faisalq/EgyBERT', num_labels=classes_num).to('cuda')
                                                     
        dataset_train = ds_t
        dataset_validation = ds_v                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 20
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 10, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 10
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('iSarcasmEval_sarcasm_results_2.csv')
display(best_results)



2024-09-02 16:55:01.047476: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-02 16:55:01.073940: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['id', 'text', 'sarcastic', 'rephrase', 'dialect'], dtype='object')

Index(['text', 'dialect', 'sarcastic'], dtype='object')

Unnamed: 0,id,text,sarcastic,rephrase,dialect
0,1,ضبط شخص بدبلوم انتحل صفة طبيب بلد مافيش حد فيها بيشتغل بشهادته,1,شخص ينتحل صفة طبيب ويفتتح عيادة فى بلد فاشلة ضاعت فيها الرقابة,nile
1,2,مش معنى انك قولتلى رايك يبقى أنا هعمل بيه طب افرض انت حمار استحمر أنا كما زيك,1,مش لازم دائما اعمل برأيك,nile
2,3,اية المهلبية دي يصحبي,1,ما هذا الجمال,nile
3,4,الحديث قياس فيه الفضة و فيه النحاس,1,لسانك ترجمان قلبك,magreb


{0, 1}

2

1294

520

Dataset({
    features: ['text', 'label', '__index_level_0__'],
    num_rows: 1294
})

Dataset({
    features: ['text', 'label', '__index_level_0__'],
    num_rows: 520
})

EgyBERT_lstm, try:0


Map:   0%|          | 0/1294 [00:00<?, ? examples/s]

Map:   0%|          | 0/520 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6889,0.676041,0.748077,0.427943
20,0.6838,0.664777,0.748077,0.427943
30,0.6792,0.665019,0.748077,0.427943
40,0.6504,0.647334,0.819231,0.678556
50,0.616,0.629784,0.698077,0.674117
60,0.575,0.649598,0.563462,0.559551
70,0.5023,0.590458,0.659615,0.642111
80,0.4385,0.579165,0.651923,0.637128
90,0.3879,0.570849,0.648077,0.631903
100,0.3194,0.596107,0.621154,0.608073


EgyBERT_lstm, try:1


Map:   0%|          | 0/1294 [00:00<?, ? examples/s]

Map:   0%|          | 0/520 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6852,0.658963,0.748077,0.427943
20,0.6852,0.65421,0.748077,0.427943
30,0.6863,0.64879,0.748077,0.427943
40,0.6563,0.638834,0.786538,0.587947
50,0.6188,0.610129,0.769231,0.731917
60,0.583,0.627584,0.636538,0.627036
70,0.5212,0.559952,0.738462,0.707981
80,0.4656,0.586628,0.65,0.633604
90,0.4129,0.498495,0.746154,0.713312
100,0.3266,0.513772,0.732692,0.709475


EgyBERT_lstm, try:2


Map:   0%|          | 0/1294 [00:00<?, ? examples/s]

Map:   0%|          | 0/520 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6852,0.658963,0.748077,0.427943
20,0.6852,0.65421,0.748077,0.427943
30,0.6863,0.64879,0.748077,0.427943
40,0.6563,0.638834,0.786538,0.587947
50,0.6188,0.610129,0.769231,0.731917
60,0.583,0.627584,0.636538,0.627036
70,0.5212,0.559952,0.738462,0.707981
80,0.4656,0.586628,0.65,0.633604
90,0.4129,0.498495,0.746154,0.713312
100,0.3266,0.513772,0.732692,0.709475


EgyBERT_cnn, try:0


Map:   0%|          | 0/1294 [00:00<?, ? examples/s]

Map:   0%|          | 0/520 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6927,0.685159,0.734615,0.437247
20,0.6808,0.661119,0.748077,0.427943
30,0.6698,0.654875,0.823077,0.717385
40,0.6227,0.616653,0.773077,0.72784
50,0.5256,0.570473,0.742308,0.709817
60,0.4716,0.639169,0.605769,0.597394
70,0.4001,0.495965,0.784615,0.747222
80,0.3363,0.597308,0.673077,0.655328
90,0.3037,0.433499,0.821154,0.783441
100,0.2466,0.527198,0.75,0.722386


EgyBERT_cnn, try:1


Map:   0%|          | 0/1294 [00:00<?, ? examples/s]

Map:   0%|          | 0/520 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6927,0.684809,0.734615,0.437247
20,0.6803,0.660446,0.748077,0.427943
30,0.6681,0.653868,0.826923,0.729567
40,0.6193,0.614268,0.757692,0.713616
50,0.5205,0.566584,0.744231,0.712398
60,0.4636,0.63335,0.617308,0.606802
70,0.3883,0.480345,0.796154,0.758207
80,0.3416,0.673571,0.611538,0.602121
90,0.3049,0.453354,0.8,0.761905
100,0.2419,0.507921,0.761538,0.731472


EgyBERT_cnn, try:2


Map:   0%|          | 0/1294 [00:00<?, ? examples/s]

Map:   0%|          | 0/520 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6927,0.684915,0.736538,0.437989
20,0.6806,0.660571,0.748077,0.427943
30,0.6692,0.654224,0.823077,0.719486
40,0.6227,0.612728,0.788462,0.739199
50,0.5282,0.567317,0.746154,0.713312
60,0.463,0.601209,0.657692,0.642261
70,0.3848,0.529846,0.730769,0.704886
80,0.321,0.525357,0.736538,0.710847
90,0.2802,0.44932,0.803846,0.765621
100,0.2214,0.469413,0.798077,0.763431


Unnamed: 0,Model,Accuracy,F1
0,EgyBERT_cnn,0.840385,0.79579
1,EgyBERT_lstm,0.830769,0.785924
