In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SDC_EDC_2.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


dfs = pd.read_csv('benchmarks/SDC_EDC/SDC.txt', header=None, names=['text'], encoding='utf-8', engine='python')
dfe = pd.read_csv('benchmarks/SDC_EDC/EDC.txt', header=None, names=['text'], encoding='utf-8', engine='python')

dfs['label'] = 0
dfe['label'] = 1


df = pd.concat([dfs, dfe], ignore_index=True)
display(len(df))
 
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

display(len(df))
display(df.columns)
display(df[:4])

classes_num = 2
display(classes_num)

ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128


models = [ 
        'faisalq/SaudiBERT',
        'UBC-NLP/MARBERT',
        'UBC-NLP/MARBERTv2',  
]


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 5
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 50, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 50
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SDC_EDC_results_2.csv')
display(best_results)



28613

28613

Index(['text', 'label'], dtype='object')

Unnamed: 0,text,label
0,لما تـسيب الفلاني و المسلسل الفلاني والأغاني اللي بتحبها عشان ربنا وبس,1
1,وانت محل ما اروح ورايااا انا ايش قلت عنك دحين,0
2,فاذا حد مرت عليه نفس السالفة يفيدني بالله اديني خبر اول ما تدخل لأَنِّي انا عندي نفس المشكله ولي سمعتوا عادي لو باقي يوم حتا مين جرب تعرفون احد جاي الله يعافيكم الي يعرف ايش الحل مع العلم انو جاني لاكن يوم ارجع اشيك انا كمان اسأل رجع تاني ع يا جماعة الخير إنّو ما فيها اول شي انصحك تبعدين عن,0
3,وفي مجلس الحريم بعد ماطلعت كلمت وقالتلها على الموضوع فرحت ورحبت بالموضوع لانه واهله ناس طيبين ومعروفين بأخلاقهم واصلهم وقالتلها انها تستشير وترد لهم خبر,0


2

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 22890
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 5723
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1881,0.153226,0.946881,0.946871
100,0.1639,0.147106,0.952123,0.952113
150,0.0897,0.096387,0.966451,0.966451
200,0.1004,0.084142,0.969946,0.969943
250,0.0725,0.081938,0.972217,0.972207
300,0.0875,0.07461,0.972217,0.972188
350,0.087,0.065927,0.975363,0.975354
400,0.0401,0.091654,0.974139,0.974127
450,0.0256,0.125673,0.969771,0.969718
500,0.0233,0.111501,0.974838,0.974827


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1881,0.153226,0.946881,0.946871
100,0.1639,0.147106,0.952123,0.952113
150,0.0897,0.096387,0.966451,0.966451
200,0.1004,0.084142,0.969946,0.969943
250,0.0725,0.081938,0.972217,0.972207
300,0.0875,0.07461,0.972217,0.972188
350,0.087,0.065927,0.975363,0.975354
400,0.0401,0.091654,0.974139,0.974127
450,0.0256,0.125673,0.969771,0.969718
500,0.0233,0.111501,0.974838,0.974827


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1881,0.153226,0.946881,0.946871
100,0.1639,0.147106,0.952123,0.952113
150,0.0897,0.096387,0.966451,0.966451
200,0.1004,0.084142,0.969946,0.969943
250,0.0725,0.081938,0.972217,0.972207
300,0.0875,0.07461,0.972217,0.972188
350,0.087,0.065927,0.975363,0.975354
400,0.0401,0.091654,0.974139,0.974127
450,0.0256,0.125673,0.969771,0.969718
500,0.0233,0.111501,0.974838,0.974827


UBC-NLP/MARBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3407,0.154243,0.954045,0.954032
100,0.1471,0.104989,0.964704,0.96469
150,0.1188,0.129487,0.961733,0.961731
200,0.1302,0.099928,0.972392,0.972384
250,0.0943,0.152245,0.970295,0.970279
300,0.1199,0.168137,0.962083,0.962081
350,0.1233,0.082802,0.970645,0.970629
400,0.0609,0.131599,0.968723,0.968722
450,0.0535,0.193456,0.964529,0.964529
500,0.0477,0.160533,0.970295,0.970287


UBC-NLP/MARBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3407,0.154243,0.954045,0.954032
100,0.1471,0.104989,0.964704,0.96469
150,0.1188,0.129487,0.961733,0.961731
200,0.1302,0.099928,0.972392,0.972384
250,0.0943,0.152245,0.970295,0.970279
300,0.1199,0.168137,0.962083,0.962081
350,0.1233,0.082802,0.970645,0.970629
400,0.0609,0.131599,0.968723,0.968722
450,0.0535,0.193456,0.964529,0.964529
500,0.0477,0.160533,0.970295,0.970287


UBC-NLP/MARBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3407,0.154243,0.954045,0.954032
100,0.1471,0.104989,0.964704,0.96469
150,0.1188,0.129487,0.961733,0.961731
200,0.1302,0.099928,0.972392,0.972384
250,0.0943,0.152245,0.970295,0.970279
300,0.1199,0.168137,0.962083,0.962081
350,0.1233,0.082802,0.970645,0.970629
400,0.0609,0.131599,0.968723,0.968722
450,0.0535,0.193456,0.964529,0.964529
500,0.0477,0.160533,0.970295,0.970287


UBC-NLP/MARBERTv2, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2477,0.128719,0.959112,0.959107
100,0.1375,0.155176,0.966276,0.966276
150,0.119,0.143684,0.947056,0.947028
200,0.1363,0.086408,0.97082,0.970819
250,0.0802,0.100394,0.970645,0.970634
300,0.0895,0.096576,0.973441,0.97343
350,0.1041,0.069038,0.974838,0.974831
400,0.0504,0.096821,0.974314,0.974309
450,0.0429,0.114469,0.973441,0.973405
500,0.0523,0.084735,0.978333,0.978318


UBC-NLP/MARBERTv2, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2477,0.128719,0.959112,0.959107
100,0.1375,0.155176,0.966276,0.966276
150,0.119,0.143684,0.947056,0.947028
200,0.1363,0.086408,0.97082,0.970819
250,0.0802,0.100394,0.970645,0.970634
300,0.0895,0.096576,0.973441,0.97343
350,0.1041,0.069038,0.974838,0.974831
400,0.0504,0.096821,0.974314,0.974309
450,0.0429,0.114469,0.973441,0.973405
500,0.0523,0.084735,0.978333,0.978318


UBC-NLP/MARBERTv2, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22890 [00:00<?, ? examples/s]

Map:   0%|          | 0/5723 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2477,0.128719,0.959112,0.959107
100,0.1375,0.155176,0.966276,0.966276
150,0.119,0.143684,0.947056,0.947028
200,0.1363,0.086408,0.97082,0.970819
250,0.0802,0.100394,0.970645,0.970634
300,0.0895,0.096576,0.973441,0.97343
350,0.1041,0.069038,0.974838,0.974831
400,0.0504,0.096821,0.974314,0.974309
450,0.0429,0.114469,0.973441,0.973405
500,0.0523,0.084735,0.978333,0.978318


Unnamed: 0,Model,Accuracy,F1
0,UBC-NLP/MARBERT,0.976236,0.976225
3,UBC-NLP/MARBERTv2,0.980255,0.980249
6,faisalq/SaudiBERT,0.98008,0.980068
