In [1]:


import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'HARD_balanced.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')



# ds = load_dataset('hard')

df = pd.read_csv('HARD/balanced-reviews.txt', encoding='utf-16', engine='python', sep='\t') #, quotechar="'"
# df = pd.read_csv('HARD/unbalanced-reviews.txt', encoding='utf-16', engine='python', sep='\t') #, quotechar="'"
# display(df.columns)
df = df[['rating', 'review']]

df['rating'].replace({1: 0, 2: 0}, inplace=True) #replace 1 and 2 with 0 (negative)
# df['rating'].replace({3: 1}, inplace=True)  #replace 3 with 1 (neutral)
df['rating'].replace({4: 2, 5: 1}, inplace=True)  #replace 4 and 5 with 1 (positive)
df['label'] = df['rating']

classes = set(df['rating'].values)
display(classes)
classes_num = len(classes)
display(classes_num)
display(len(df))

display(df[:4])


ds = Dataset.from_pandas(df)

# ds = ds['train']
ds = ds.train_test_split(test_size=0.2)
display(ds)
df = ''

max_sequence_length = 128

# classes_num = 6
# display(classes_num)
# display(ds)


models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['review'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 5
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 100, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 100
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('HARD_balanced_results.csv')
display(best_results)



2024-02-18 04:51:24.233082: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-18 04:51:24.256682: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


{0, 1, 2}

3

105698

Unnamed: 0,rating,review,label
0,0,“ممتاز”. النظافة والطاقم متعاون.,0
1,1,استثنائي. سهولة إنهاء المعاملة في الاستقبال. لاشيئ,1
2,1,استثنائي. انصح بأختيار الاسويت و بالاخص غرفه رقم 801. نوعية الارضيه,1
3,0,“استغرب تقييم الفندق كخمس نجوم”. لا شي. يستحق 2 نجمه,0


DatasetDict({
    train: Dataset({
        features: ['rating', 'review', 'label'],
        num_rows: 84558
    })
    test: Dataset({
        features: ['rating', 'review', 'label'],
        num_rows: 21140
    })
})

faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/84558 [00:00<?, ? examples/s]

Map:   0%|          | 0/21140 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3756,0.275632,0.882592,0.857906
200,0.2785,0.277788,0.879659,0.857864
300,0.2727,0.270719,0.886282,0.860953
400,0.2426,0.261818,0.890445,0.868443
500,0.2352,0.257472,0.890255,0.866646
600,0.2341,0.258672,0.888505,0.865203
700,0.215,0.273723,0.8886,0.865368
800,0.1947,0.279397,0.885525,0.863928
900,0.1806,0.274433,0.888127,0.864809
1000,0.1863,0.295517,0.888269,0.864102


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/84558 [00:00<?, ? examples/s]

Map:   0%|          | 0/21140 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.366,0.280145,0.880937,0.855952
200,0.2783,0.27525,0.879754,0.857828
300,0.2752,0.273434,0.887606,0.862719
400,0.2439,0.261616,0.88912,0.866037
500,0.2351,0.256878,0.888411,0.8641
600,0.235,0.258045,0.888458,0.865481
700,0.2166,0.267294,0.887985,0.865291
800,0.1971,0.278194,0.884059,0.862133
900,0.1825,0.275274,0.886566,0.863543
1000,0.1858,0.301297,0.885572,0.860734


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/84558 [00:00<?, ? examples/s]

Map:   0%|          | 0/21140 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.366,0.280145,0.880937,0.855952
200,0.2783,0.27525,0.879754,0.857828
300,0.2752,0.273434,0.887606,0.862719
400,0.2439,0.261616,0.88912,0.866037
500,0.2351,0.256878,0.888411,0.8641
600,0.235,0.258045,0.888458,0.865481
700,0.2166,0.267294,0.887985,0.865291
800,0.1971,0.278194,0.884059,0.862133
900,0.1825,0.275274,0.886566,0.863543
1000,0.1858,0.301297,0.885572,0.860734


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/84558 [00:00<?, ? examples/s]

Map:   0%|          | 0/21140 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3591,0.279279,0.880322,0.856292
200,0.2758,0.270215,0.881883,0.85982
300,0.2713,0.258756,0.888411,0.864244
400,0.2384,0.265911,0.888505,0.865233
500,0.2337,0.261622,0.888647,0.86484
600,0.2335,0.270314,0.883349,0.859323
700,0.2109,0.280781,0.889877,0.867375
800,0.192,0.281546,0.885005,0.863005
900,0.1781,0.279798,0.888978,0.866141
1000,0.1864,0.291265,0.89106,0.868155


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/84558 [00:00<?, ? examples/s]

Map:   0%|          | 0/21140 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3591,0.279279,0.880322,0.856292
200,0.2758,0.270215,0.881883,0.85982
300,0.2713,0.258756,0.888411,0.864244
400,0.2384,0.265911,0.888505,0.865233
500,0.2337,0.261622,0.888647,0.86484
600,0.2335,0.270314,0.883349,0.859323
700,0.2109,0.280781,0.889877,0.867375
800,0.192,0.281546,0.885005,0.863005
900,0.1781,0.279798,0.888978,0.866141
1000,0.1864,0.291265,0.89106,0.868155


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/84558 [00:00<?, ? examples/s]

Map:   0%|          | 0/21140 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3591,0.279279,0.880322,0.856292
200,0.2758,0.270215,0.881883,0.85982
300,0.2713,0.258756,0.888411,0.864244
400,0.2384,0.265911,0.888505,0.865233
500,0.2337,0.261622,0.888647,0.86484
600,0.2335,0.270314,0.883349,0.859323
700,0.2109,0.280781,0.889877,0.867375
800,0.192,0.281546,0.885005,0.863005
900,0.1781,0.279798,0.888978,0.866141
1000,0.1864,0.291265,0.89106,0.868155


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/84558 [00:00<?, ? examples/s]

Map:   0%|          | 0/21140 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3627,0.276799,0.880085,0.856517
200,0.2764,0.266082,0.887228,0.86431
300,0.2722,0.273791,0.887181,0.862528
400,0.2419,0.262058,0.890445,0.868463
500,0.2339,0.262488,0.890445,0.867431
600,0.2345,0.264067,0.884957,0.862111
700,0.2175,0.275083,0.889593,0.866657
800,0.1949,0.2843,0.885714,0.863679
900,0.1804,0.280625,0.888269,0.864708
1000,0.189,0.29409,0.888174,0.863928


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/84558 [00:00<?, ? examples/s]

Map:   0%|          | 0/21140 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3627,0.276799,0.880085,0.856517
200,0.2764,0.266082,0.887228,0.86431
300,0.2722,0.273791,0.887181,0.862528
400,0.2419,0.262058,0.890445,0.868463
500,0.2339,0.262488,0.890445,0.867431
600,0.2345,0.264067,0.884957,0.862111
700,0.2175,0.275083,0.889593,0.866657
800,0.1949,0.2843,0.885714,0.863679
900,0.1804,0.280625,0.888269,0.864708
1000,0.189,0.29409,0.888174,0.863928


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/84558 [00:00<?, ? examples/s]

Map:   0%|          | 0/21140 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
100,0.3627,0.276799,0.880085,0.856517
200,0.2764,0.266082,0.887228,0.86431
300,0.2722,0.273791,0.887181,0.862528
400,0.2419,0.262058,0.890445,0.868463
500,0.2339,0.262488,0.890445,0.867431
600,0.2345,0.264067,0.884957,0.862111
700,0.2175,0.275083,0.889593,0.866657
800,0.1949,0.2843,0.885714,0.863679
900,0.1804,0.280625,0.888269,0.864708
1000,0.189,0.29409,0.888174,0.863928


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.890445,0.868463
3,faisalq/bert-base-arabic-senpiece,0.89106,0.868155
6,faisalq/bert-base-arabic-wordpiece,0.890445,0.868443
