In [1]:
# cell-1  
#load and clean the data (removing diacritics and unwanted text)

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'HARD_unbalanced.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')



# ds = load_dataset('hard')

# df = pd.read_csv('HARD/balanced-reviews.txt', encoding='utf-16', engine='python', sep='\t') #, quotechar="'"
df = pd.read_csv('HARD/unbalanced-reviews.txt', encoding='utf-16', engine='python', sep='\t') #, quotechar="'"
# display(df.columns)
df = df[['rating', 'review']]

df['rating'].replace({1: 0, 2: 0}, inplace=True) #replace 1 and 2 with 0 (negative)
df['rating'].replace({3: 1}, inplace=True)  #replace 3 with 1 (neutral)
df['rating'].replace({4: 2, 5: 2}, inplace=True)  #replace 4 and 5 with 2 (positive)
df['label'] = df['rating']

classes = set(df['rating'].values)
display(classes)
classes_num = len(classes)
display(classes_num)
display(len(df))

display(df[:4])


ds = Dataset.from_pandas(df)

# ds = ds['train']
ds = ds.train_test_split(test_size=0.2)
display(ds)
df = ''

max_sequence_length = 128

# classes_num = 6
# display(classes_num)
# display(ds)


models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['review'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 4
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 250, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 250
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('HARD_unbalanced_results.csv')
display(best_results)



2024-02-18 04:18:06.607197: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-18 04:18:06.633203: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


{0, 1, 2}

3

409562

Unnamed: 0,rating,review,label
0,2,“فندق راقي وسوف تتكرر زيارتي له”. الفندق بصراحه جيد وطاقم العمل متعاون جدا وشك ان سريع كان وش أوت نفس الشي من غير اي تعقيد والغرف واسعه وانظيفه والحمام اعزكم لله ممتاز وكبير ونظيف فيه كل مايلزمك وتعامل الطاقم جدا راقي طلبت تغير غرفتي اول ماوصلت والسبب ان في جنبها عمارة تحت الإنشاء وغيرو لي الغرفه علي طول بصراحه من الاخر الفندق جيد جدا وسعره مناسب جدا انا اعتبره شقق راقيه جدا اكثر من فندق. لاشئ بصراحه الا ملاحظه واحدة اني شاهدة خنفسانا (حشرة) صغيره جدا علي الارض هذي الملاحظة الوحيدة فقط ولا الغرفه عموم نظيفة ولا شفنا فيها اي شي,2
1,0,“ممتاز”. النظافة والطاقم متعاون.,0
2,2,استثنائي. سهولة إنهاء المعاملة في الاستقبال. لاشيئ,2
3,2,جيد جداً. 😊👍. لايوجد,2


DatasetDict({
    train: Dataset({
        features: ['rating', 'review', 'label'],
        num_rows: 327649
    })
    test: Dataset({
        features: ['rating', 'review', 'label'],
        num_rows: 81913
    })
})

faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/327649 [00:00<?, ? examples/s]

Map:   0%|          | 0/81913 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.2991,0.255358,0.893985,0.842121
500,0.2481,0.240328,0.900968,0.848998
750,0.2444,0.235655,0.903312,0.85309
1000,0.2395,0.238469,0.903056,0.852437
1250,0.2341,0.236858,0.903935,0.852455
1500,0.2151,0.241729,0.903178,0.851802
1750,0.2072,0.239902,0.904399,0.854252
2000,0.2134,0.23474,0.904081,0.85487
2250,0.2133,0.243671,0.902348,0.851419
2500,0.2177,0.232521,0.903593,0.854356


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/327649 [00:00<?, ? examples/s]

Map:   0%|          | 0/81913 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.2975,0.258503,0.893472,0.841223
500,0.2477,0.23923,0.901871,0.850298
750,0.2442,0.236448,0.902457,0.853286
1000,0.2394,0.237222,0.903336,0.853852
1250,0.2344,0.237401,0.903666,0.851886
1500,0.2151,0.243001,0.902457,0.851011
1750,0.2076,0.241082,0.903837,0.853914
2000,0.2133,0.234739,0.904496,0.855538
2250,0.2139,0.244777,0.903654,0.852859
2500,0.2172,0.234434,0.903153,0.854058


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/327649 [00:00<?, ? examples/s]

Map:   0%|          | 0/81913 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.2975,0.258503,0.893472,0.841223
500,0.2477,0.23923,0.901871,0.850298
750,0.2442,0.236448,0.902457,0.853286
1000,0.2394,0.237222,0.903336,0.853852
1250,0.2344,0.237401,0.903666,0.851886
1500,0.2151,0.243001,0.902457,0.851011
1750,0.2076,0.241082,0.903837,0.853914
2000,0.2133,0.234739,0.904496,0.855538
2250,0.2139,0.244777,0.903654,0.852859
2500,0.2172,0.234434,0.903153,0.854058


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/327649 [00:00<?, ? examples/s]

Map:   0%|          | 0/81913 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.2904,0.249375,0.897123,0.844568
500,0.2475,0.241767,0.899698,0.849241
750,0.2436,0.236973,0.902238,0.852159
1000,0.239,0.23532,0.903178,0.853101
1250,0.2347,0.236256,0.902958,0.850834
1500,0.2147,0.242976,0.902262,0.851181
1750,0.2077,0.239721,0.903251,0.852774
2000,0.2134,0.233871,0.904228,0.856581
2250,0.2139,0.238045,0.903727,0.854883
2500,0.2174,0.233327,0.903568,0.855992


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/327649 [00:00<?, ? examples/s]

Map:   0%|          | 0/81913 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.2904,0.249375,0.897123,0.844568
500,0.2475,0.241767,0.899698,0.849241
750,0.2436,0.236973,0.902238,0.852159
1000,0.239,0.23532,0.903178,0.853101
1250,0.2347,0.236256,0.902958,0.850834
1500,0.2147,0.242976,0.902262,0.851181
1750,0.2077,0.239721,0.903251,0.852774
2000,0.2134,0.233871,0.904228,0.856581
2250,0.2139,0.238045,0.903727,0.854883
2500,0.2174,0.233327,0.903568,0.855992


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/327649 [00:00<?, ? examples/s]

Map:   0%|          | 0/81913 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.2904,0.249375,0.897123,0.844568
500,0.2475,0.241767,0.899698,0.849241
750,0.2436,0.236973,0.902238,0.852159
1000,0.239,0.23532,0.903178,0.853101
1250,0.2347,0.236256,0.902958,0.850834
1500,0.2147,0.242976,0.902262,0.851181
1750,0.2077,0.239721,0.903251,0.852774
2000,0.2134,0.233871,0.904228,0.856581
2250,0.2139,0.238045,0.903727,0.854883
2500,0.2174,0.233327,0.903568,0.855992


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/327649 [00:00<?, ? examples/s]

Map:   0%|          | 0/81913 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.2958,0.254216,0.896646,0.844357
500,0.2476,0.240402,0.899808,0.848525
750,0.2451,0.237576,0.902128,0.853158
1000,0.2391,0.235955,0.902592,0.851407
1250,0.2346,0.236288,0.90214,0.848961
1500,0.2137,0.244974,0.900309,0.846061
1750,0.2062,0.238249,0.903373,0.853962
2000,0.2099,0.236249,0.903056,0.855202
2250,0.2132,0.24353,0.902348,0.853213
2500,0.215,0.235206,0.902763,0.854647


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/327649 [00:00<?, ? examples/s]

Map:   0%|          | 0/81913 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.2958,0.254216,0.896646,0.844357
500,0.2476,0.240402,0.899808,0.848525
750,0.2451,0.237576,0.902128,0.853158
1000,0.2391,0.235955,0.902592,0.851407
1250,0.2346,0.236288,0.90214,0.848961
1500,0.2137,0.244974,0.900309,0.846061
1750,0.2062,0.238249,0.903373,0.853962
2000,0.2099,0.236249,0.903056,0.855202
2250,0.2132,0.24353,0.902348,0.853213
2500,0.215,0.235206,0.902763,0.854647


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/327649 [00:00<?, ? examples/s]

Map:   0%|          | 0/81913 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
250,0.2958,0.254216,0.896646,0.844357
500,0.2476,0.240402,0.899808,0.848525
750,0.2451,0.237576,0.902128,0.853158
1000,0.2391,0.235955,0.902592,0.851407
1250,0.2346,0.236288,0.90214,0.848961
1500,0.2137,0.244974,0.900309,0.846061
1750,0.2062,0.238249,0.903373,0.853962
2000,0.2099,0.236249,0.903056,0.855202
2250,0.2132,0.24353,0.902348,0.853213
2500,0.215,0.235206,0.902763,0.854647


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.903056,0.855202
3,faisalq/bert-base-arabic-senpiece,0.904228,0.856581
6,faisalq/bert-base-arabic-wordpiece,0.904496,0.855538
