In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SDC_EDC_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


dfs = pd.read_csv('benchmarks/SDC_EDC/SDC.txt', header=None, names=['text'], encoding='utf-8', engine='python')
dfe = pd.read_csv('benchmarks/SDC_EDC/EDC.txt', header=None, names=['text'], encoding='utf-8', engine='python')

dfs['label'] = 0
dfe['label'] = 1


df = pd.concat([dfs, dfe], ignore_index=True)
display(len(df))
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

display(len(df))
display(df.columns)
display(df[:4])

classes_num = 2
display(classes_num)

ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128
models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib',
        'reemalyami/AraRoBERTa-SA',    
]

for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 5
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 50, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 50
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SDC_EDC_results_1.csv')
display(best_results)



2024-03-27 05:36:17.306983: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-27 05:36:17.332327: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


28613

28613

Index(['text', 'label'], dtype='object')

Unnamed: 0,text,label
0,لما تـسيب الفلاني و المسلسل الفلاني والأغاني اللي بتحبها عشان ربنا وبس,1
1,وانت محل ما اروح ورايااا انا ايش قلت عنك دحين,0
2,فاذا حد مرت عليه نفس السالفة يفيدني بالله اديني خبر اول ما تدخل لأَنِّي انا عندي نفس المشكله ولي سمعتوا عادي لو باقي يوم حتا مين جرب تعرفون احد جاي الله يعافيكم الي يعرف ايش الحل مع العلم انو جاني لاكن يوم ارجع اشيك انا كمان اسأل رجع تاني ع يا جماعة الخير إنّو ما فيها اول شي انصحك تبعدين عن,0
3,وفي مجلس الحريم بعد ماطلعت كلمت وقالتلها على الموضوع فرحت ورحبت بالموضوع لانه واهله ناس طيبين ومعروفين بأخلاقهم واصلهم وقالتلها انها تستشير وترد لهم خبر,0


2

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 22890
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 5723
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1972,0.115417,0.956841,0.956819
100,0.1251,0.121271,0.958938,0.958872
150,0.1062,0.128053,0.953696,0.953464
200,0.1236,0.089283,0.963481,0.963465
250,0.0946,0.081095,0.966626,0.966559
300,0.0954,0.091742,0.966102,0.965987
350,0.0819,0.08351,0.968373,0.968298
400,0.0482,0.091915,0.967674,0.967659
450,0.0373,0.107645,0.967849,0.96782
500,0.0503,0.089118,0.972043,0.97199


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1936,0.106874,0.960335,0.960274
100,0.1234,0.11322,0.958064,0.958039
150,0.113,0.096444,0.962083,0.961976
200,0.1156,0.089603,0.964354,0.96434
250,0.0964,0.077367,0.970645,0.970607
300,0.0944,0.116886,0.954569,0.954295
350,0.0786,0.083094,0.967674,0.967603
400,0.0522,0.088446,0.968548,0.968534
450,0.035,0.117811,0.965053,0.965042
500,0.0433,0.096633,0.972392,0.972358


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1936,0.106874,0.960335,0.960274
100,0.1234,0.11322,0.958064,0.958039
150,0.113,0.096444,0.962083,0.961976
200,0.1156,0.089603,0.964354,0.96434
250,0.0964,0.077367,0.970645,0.970607
300,0.0944,0.116886,0.954569,0.954295
350,0.0786,0.083094,0.967674,0.967603
400,0.0522,0.088446,0.968548,0.968534
450,0.035,0.117811,0.965053,0.965042
500,0.0433,0.096633,0.972392,0.972358


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2221,0.184069,0.940591,0.940589
100,0.1386,0.12993,0.959287,0.959267
150,0.1102,0.093413,0.963655,0.963563
200,0.1069,0.085176,0.970121,0.970083
250,0.098,0.099712,0.968723,0.968669
300,0.1026,0.099537,0.963131,0.963007
350,0.0858,0.084417,0.968198,0.968163
400,0.0446,0.106465,0.967325,0.967292
450,0.0459,0.121557,0.966102,0.966052
500,0.041,0.15759,0.96051,0.960499


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2221,0.184069,0.940591,0.940589
100,0.1386,0.12993,0.959287,0.959267
150,0.1102,0.093413,0.963655,0.963563
200,0.1069,0.085176,0.970121,0.970083
250,0.098,0.099712,0.968723,0.968669
300,0.1026,0.099537,0.963131,0.963007
350,0.0858,0.084417,0.968198,0.968163
400,0.0446,0.106465,0.967325,0.967292
450,0.0459,0.121557,0.966102,0.966052
500,0.041,0.15759,0.96051,0.960499


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2221,0.184069,0.940591,0.940589
100,0.1386,0.12993,0.959287,0.959267
150,0.1102,0.093413,0.963655,0.963563
200,0.1069,0.085176,0.970121,0.970083
250,0.098,0.099712,0.968723,0.968669
300,0.1026,0.099537,0.963131,0.963007
350,0.0858,0.084417,0.968198,0.968163
400,0.0446,0.106465,0.967325,0.967292
450,0.0459,0.121557,0.966102,0.966052
500,0.041,0.15759,0.96051,0.960499


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2071,0.090855,0.969247,0.969198
100,0.1254,0.121649,0.966975,0.966962
150,0.0988,0.089066,0.963655,0.963643
200,0.1197,0.07833,0.972567,0.972482
250,0.0958,0.079291,0.971518,0.9715
300,0.0825,0.084989,0.965577,0.965457
350,0.0728,0.074347,0.976236,0.976199
400,0.039,0.091609,0.976062,0.976036
450,0.0318,0.098821,0.971169,0.971103
500,0.0294,0.116597,0.97379,0.973769


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2071,0.090855,0.969247,0.969198
100,0.1254,0.121649,0.966975,0.966962
150,0.0988,0.089066,0.963655,0.963643
200,0.1197,0.07833,0.972567,0.972482
250,0.0958,0.079291,0.971518,0.9715
300,0.0825,0.084989,0.965577,0.965457
350,0.0728,0.074347,0.976236,0.976199
400,0.039,0.091609,0.976062,0.976036
450,0.0318,0.098821,0.971169,0.971103
500,0.0294,0.116597,0.97379,0.973769


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2071,0.090855,0.969247,0.969198
100,0.1254,0.121649,0.966975,0.966962
150,0.0988,0.089066,0.963655,0.963643
200,0.1197,0.07833,0.972567,0.972482
250,0.0958,0.079291,0.971518,0.9715
300,0.0825,0.084989,0.965577,0.965457
350,0.0728,0.074347,0.976236,0.976199
400,0.039,0.091609,0.976062,0.976036
450,0.0318,0.098821,0.971169,0.971103
500,0.0294,0.116597,0.97379,0.973769


reemalyami/AraRoBERTa-SA, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3409,0.218958,0.91141,0.91139
100,0.2296,0.210958,0.926961,0.926848
150,0.191,0.188196,0.919623,0.919027
200,0.1795,0.152007,0.940241,0.940071
250,0.1721,0.154477,0.937096,0.937095
300,0.1702,0.169231,0.938843,0.938545
350,0.1677,0.16586,0.935523,0.935508
400,0.1021,0.144008,0.945308,0.945236
450,0.0901,0.124129,0.954045,0.954004
500,0.0816,0.144609,0.953521,0.953451


reemalyami/AraRoBERTa-SA, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3409,0.218958,0.91141,0.91139
100,0.2296,0.210958,0.926961,0.926848
150,0.191,0.188196,0.919623,0.919027
200,0.1795,0.152007,0.940241,0.940071
250,0.1721,0.154477,0.937096,0.937095
300,0.1702,0.169231,0.938843,0.938545
350,0.1677,0.16586,0.935523,0.935508
400,0.1021,0.144008,0.945308,0.945236
450,0.0901,0.124129,0.954045,0.954004
500,0.0816,0.144609,0.953521,0.953451


reemalyami/AraRoBERTa-SA, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3409,0.218958,0.91141,0.91139
100,0.2296,0.210958,0.926961,0.926848
150,0.191,0.188196,0.919623,0.919027
200,0.1795,0.152007,0.940241,0.940071
250,0.1721,0.154477,0.937096,0.937095
300,0.1702,0.169231,0.938843,0.938545
350,0.1677,0.16586,0.935523,0.935508
400,0.1021,0.144008,0.945308,0.945236
450,0.0901,0.124129,0.954045,0.954004
500,0.0816,0.144609,0.953521,0.953451


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.972742,0.972699
3,aubmindlab/bert-base-arabertv02-twitter,0.975887,0.975842
4,qarib/bert-base-qarib,0.977984,0.977948
7,reemalyami/AraRoBERTa-SA,0.96383,0.963789
