In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SDCT_sa_dialect_iden_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('benchmarks/SDCT.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3 , sep='\t'
display(df.columns)
df.fillna('', inplace=True)

display(df[:4])

df = df[df['Text'] != '']

classes = set(df['class'].values)
display(classes)

df['class'] = df['class'].astype('category')
df['label'] = df['class'].cat.codes

df = df[['Text', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))
# display(len(df_test))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128

models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib',
        'reemalyami/AraRoBERTa-SA',    
]

for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['Text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 7
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 10, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 10
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SDCT_sa_dialect_iden_results_1.csv')
display(best_results)



2024-03-27 07:36:17.458003: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-27 07:36:17.483351: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['  Date', ' Location', 'Text', 'class', 'sentiment'], dtype='object')

Unnamed: 0,Date,Location,Text,class,sentiment
0,2020-03-26 16:46:48,"جدة, المملكة العربية السعودية",ما الومك بس الوم الروح لو حنت عليك,hj,2
1,2020-03-26 16:46:47,"Jeddah,Kingdom of saudi Arabia",في لهجة ثانية ثفلة,hj,2
2,2020-03-26 16:46:46,"جدة, المملكة العربية السعودية",احس اللي عندنا اصغر,hj,2
3,2020-03-26 16:46:44,"جدة, المملكة العربية السعودية",كورونا قاعد في البيت يلطم,hj,2


{'ga', 'hj', 'nj'}

3

4181

DatasetDict({
    train: Dataset({
        features: ['Text', 'label'],
        num_rows: 3344
    })
    test: Dataset({
        features: ['Text', 'label'],
        num_rows: 837
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7684,0.382939,0.886499,0.887607
20,0.2791,0.287545,0.908005,0.909999
30,0.2218,0.300422,0.916368,0.917925
40,0.1965,0.226692,0.9319,0.932727
50,0.1723,0.186012,0.945042,0.945521
60,0.1186,0.183925,0.946237,0.946808
70,0.1123,0.178094,0.942652,0.943367
80,0.1074,0.199589,0.939068,0.939929
90,0.0844,0.201047,0.935484,0.936409
100,0.0841,0.183569,0.941458,0.942451


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7205,0.370086,0.88172,0.88304
20,0.2608,0.261221,0.916368,0.918101
30,0.218,0.30574,0.911589,0.91312
40,0.2281,0.208898,0.939068,0.939827
50,0.16,0.195065,0.936679,0.937259
60,0.1394,0.19479,0.948626,0.949156
70,0.0944,0.186412,0.939068,0.939632
80,0.1162,0.218583,0.934289,0.935082
90,0.0806,0.226016,0.935484,0.936539
100,0.0956,0.220288,0.940263,0.941155


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7205,0.370086,0.88172,0.88304
20,0.2608,0.261221,0.916368,0.918101
30,0.218,0.30574,0.911589,0.91312
40,0.2281,0.208898,0.939068,0.939827
50,0.16,0.195065,0.936679,0.937259
60,0.1394,0.19479,0.948626,0.949156
70,0.0944,0.186412,0.939068,0.939632
80,0.1162,0.218583,0.934289,0.935082
90,0.0806,0.226016,0.935484,0.936539
100,0.0956,0.220288,0.940263,0.941155


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9358,0.564481,0.851852,0.853154
20,0.391,0.243969,0.911589,0.913186
30,0.2098,0.234528,0.915173,0.916634
40,0.1994,0.144282,0.955795,0.956357
50,0.152,0.137875,0.956989,0.957452
60,0.094,0.159535,0.955795,0.956577
70,0.0677,0.184357,0.945042,0.94523
80,0.0846,0.150517,0.959379,0.959936
90,0.0576,0.147224,0.962963,0.963639
100,0.1437,0.133423,0.953405,0.954303


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9358,0.564481,0.851852,0.853154
20,0.391,0.243969,0.911589,0.913186
30,0.2098,0.234528,0.915173,0.916634
40,0.1994,0.144282,0.955795,0.956357
50,0.152,0.137875,0.956989,0.957452
60,0.094,0.159535,0.955795,0.956577
70,0.0677,0.184357,0.945042,0.94523
80,0.0846,0.150517,0.959379,0.959936
90,0.0576,0.147224,0.962963,0.963639
100,0.1437,0.133423,0.953405,0.954303


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9358,0.564481,0.851852,0.853154
20,0.391,0.243969,0.911589,0.913186
30,0.2098,0.234528,0.915173,0.916634
40,0.1994,0.144282,0.955795,0.956357
50,0.152,0.137875,0.956989,0.957452
60,0.094,0.159535,0.955795,0.956577
70,0.0677,0.184357,0.945042,0.94523
80,0.0846,0.150517,0.959379,0.959936
90,0.0576,0.147224,0.962963,0.963639
100,0.1437,0.133423,0.953405,0.954303


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8487,0.391471,0.88172,0.881899
20,0.2512,0.120716,0.961768,0.962416
30,0.1248,0.120742,0.962963,0.963624
40,0.1445,0.110988,0.959379,0.960204
50,0.1259,0.201846,0.941458,0.941399
60,0.1839,0.113625,0.966547,0.967205
70,0.107,0.116589,0.964158,0.964783
80,0.0707,0.112014,0.966547,0.966858
90,0.0483,0.125711,0.961768,0.962454
100,0.0687,0.103057,0.972521,0.97289


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8487,0.391471,0.88172,0.881899
20,0.2512,0.120716,0.961768,0.962416
30,0.1248,0.120742,0.962963,0.963624
40,0.1445,0.110988,0.959379,0.960204
50,0.1259,0.201846,0.941458,0.941399
60,0.1839,0.113625,0.966547,0.967205
70,0.107,0.116589,0.964158,0.964783
80,0.0707,0.112014,0.966547,0.966858
90,0.0483,0.125711,0.961768,0.962454
100,0.0687,0.103057,0.972521,0.97289


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8487,0.391471,0.88172,0.881899
20,0.2512,0.120716,0.961768,0.962416
30,0.1248,0.120742,0.962963,0.963624
40,0.1445,0.110988,0.959379,0.960204
50,0.1259,0.201846,0.941458,0.941399
60,0.1839,0.113625,0.966547,0.967205
70,0.107,0.116589,0.964158,0.964783
80,0.0707,0.112014,0.966547,0.966858
90,0.0483,0.125711,0.961768,0.962454
100,0.0687,0.103057,0.972521,0.97289


reemalyami/AraRoBERTa-SA, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0375,0.724414,0.660693,0.60607
20,0.5267,0.291841,0.899642,0.900923
30,0.302,0.229733,0.921147,0.921834
40,0.3092,0.198383,0.930705,0.931821
50,0.2302,0.259991,0.902031,0.902255
60,0.1805,0.189287,0.935484,0.936677
70,0.1293,0.215834,0.9319,0.932979
80,0.14,0.201971,0.937873,0.939103
90,0.1045,0.189487,0.940263,0.941403
100,0.0879,0.20492,0.930705,0.931711


reemalyami/AraRoBERTa-SA, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0375,0.724414,0.660693,0.60607
20,0.5267,0.291841,0.899642,0.900923
30,0.302,0.229733,0.921147,0.921834
40,0.3092,0.198383,0.930705,0.931821
50,0.2302,0.259991,0.902031,0.902255
60,0.1805,0.189287,0.935484,0.936677
70,0.1293,0.215834,0.9319,0.932979
80,0.14,0.201971,0.937873,0.939103
90,0.1045,0.189487,0.940263,0.941403
100,0.0879,0.20492,0.930705,0.931711


reemalyami/AraRoBERTa-SA, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3344 [00:00<?, ? examples/s]

Map:   0%|          | 0/837 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0375,0.724414,0.660693,0.60607
20,0.5267,0.291841,0.899642,0.900923
30,0.302,0.229733,0.921147,0.921834
40,0.3092,0.198383,0.930705,0.931821
50,0.2302,0.259991,0.902031,0.902255
60,0.1805,0.189287,0.935484,0.936677
70,0.1293,0.215834,0.9319,0.932979
80,0.14,0.201971,0.937873,0.939103
90,0.1045,0.189487,0.940263,0.941403
100,0.0879,0.20492,0.930705,0.931711


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.962963,0.963639
3,aubmindlab/bert-base-arabertv02-twitter,0.949821,0.95028
4,qarib/bert-base-qarib,0.9773,0.977593
7,reemalyami/AraRoBERTa-SA,0.948626,0.949526
