In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SaudiShopInsights_elect_2.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('benchmarks2/SaudiShopInsights/Electronics Dataset.csv', encoding='utf-8', 
                 engine='python', sep='\t') #, quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

display(df[:4])


df = df[df['Reviews'] != '']
df = df[df['General'] != '']
classes = set(df['General'].values)
display(classes)


display(len(df))

df['General'] = df['General'].astype('category')
df['label'] = df['General'].cat.codes

df = df[['Reviews', 'label']]

classes_num = len(classes)
display(classes_num)
display(len(df))

ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128

models = [ 
        'faisalq/SaudiBERT',
        'UBC-NLP/MARBERT',
        'UBC-NLP/MARBERTv2',  
]

for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['Reviews'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 10
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 10, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 10
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SaudiShopInsights_elect_results_2.csv')
display(best_results)




2024-04-03 04:51:13.438777: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-03 04:51:13.463027: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['General', 'Camera', 'Size', 'Design', 'Audio', 'Post Purchase Service',
       'Life Time', 'Display', 'Battery', 'Speed', 'Charger', 'Usage',
       'Quality', 'Price', 'Reviews'],
      dtype='object')

Unnamed: 0,General,Camera,Size,Design,Audio,Post Purchase Service,Life Time,Display,Battery,Speed,Charger,Usage,Quality,Price,Reviews
0,1.0,,,,,,,,,,,,1.0,-1.0,الشكل جميل والجودة حلوه بس السعر عالي
1,-1.0,,,,,-1.0,,,,,,-1.0,-1.0,,لا انصح لانها غير مضمونة يا خربان يا كويسالجهاز كل بعد فلم بيخرب ارجع افرمتو بيخرب ثاني والان باتصل بخدمة العملا وما يردوا
2,1.0,,,,,,,,,,,,,1.0,رخيص مره مخصص اكثر للجوالات وجيده نوعا ما على الكاميرات
3,1.0,,,,,,,,,,,,1.0,1.0,جهاز كويس بالنسبة لسعره


{-1.0, 0.0, 1.0}

1999

3

1999

DatasetDict({
    train: Dataset({
        features: ['Reviews', 'label', '__index_level_0__'],
        num_rows: 1599
    })
    test: Dataset({
        features: ['Reviews', 'label', '__index_level_0__'],
        num_rows: 400
    })
})

faisalq/SaudiBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1599 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.5784,0.188253,0.9475,0.946827
20,0.2453,0.259889,0.9325,0.930893
30,0.1577,0.180778,0.9525,0.951891
40,0.1431,0.170586,0.9475,0.946827
50,0.1041,0.184842,0.9375,0.937072
60,0.0839,0.172553,0.9425,0.941763
70,0.0637,0.184262,0.955,0.954286
80,0.0372,0.204192,0.945,0.944387
90,0.0505,0.360603,0.9275,0.925656
100,0.0508,0.208229,0.9475,0.946827


faisalq/SaudiBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1599 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.4909,0.178596,0.945,0.944444
20,0.2382,0.316554,0.92,0.918031
30,0.2105,0.172536,0.9475,0.946632
40,0.1568,0.163482,0.9475,0.946943
50,0.1174,0.163045,0.94,0.939196
60,0.084,0.174537,0.94,0.939265
70,0.0436,0.240676,0.9475,0.9467
80,0.0355,0.250551,0.9475,0.946827
90,0.0392,0.360161,0.92,0.917898
100,0.0352,0.241593,0.945,0.944263


faisalq/SaudiBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/SaudiBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1599 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.4909,0.178596,0.945,0.944444
20,0.2382,0.316554,0.92,0.918031
30,0.2105,0.172536,0.9475,0.946632
40,0.1568,0.163482,0.9475,0.946943
50,0.1174,0.163045,0.94,0.939196
60,0.084,0.174537,0.94,0.939265
70,0.0436,0.240676,0.9475,0.9467
80,0.0355,0.250551,0.9475,0.946827
90,0.0392,0.360161,0.92,0.917898
100,0.0352,0.241593,0.945,0.944263


UBC-NLP/MARBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1599 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6288,0.289018,0.92,0.917761
20,0.334,0.409818,0.855,0.846138
30,0.2548,0.211685,0.925,0.923389
40,0.1783,0.218287,0.935,0.934527
50,0.1829,0.213138,0.93,0.929491
60,0.1351,0.193911,0.9375,0.9369
70,0.057,0.373719,0.91,0.907157
80,0.0459,0.281292,0.9375,0.936467
90,0.0787,0.526137,0.8975,0.893556
100,0.0867,0.217076,0.9425,0.941889


UBC-NLP/MARBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1599 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6288,0.289018,0.92,0.917761
20,0.334,0.409818,0.855,0.846138
30,0.2548,0.211685,0.925,0.923389
40,0.1783,0.218287,0.935,0.934527
50,0.1829,0.213138,0.93,0.929491
60,0.1351,0.193911,0.9375,0.9369
70,0.057,0.373719,0.91,0.907157
80,0.0459,0.281292,0.9375,0.936467
90,0.0787,0.526137,0.8975,0.893556
100,0.0867,0.217076,0.9425,0.941889


UBC-NLP/MARBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1599 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.6288,0.289018,0.92,0.917761
20,0.334,0.409818,0.855,0.846138
30,0.2548,0.211685,0.925,0.923389
40,0.1783,0.218287,0.935,0.934527
50,0.1829,0.213138,0.93,0.929491
60,0.1351,0.193911,0.9375,0.9369
70,0.057,0.373719,0.91,0.907157
80,0.0459,0.281292,0.9375,0.936467
90,0.0787,0.526137,0.8975,0.893556
100,0.0867,0.217076,0.9425,0.941889


UBC-NLP/MARBERTv2, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1599 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7393,0.386591,0.905,0.904309
20,0.3426,0.248938,0.93,0.928277
30,0.2028,0.20299,0.9375,0.936467
40,0.1814,0.241732,0.915,0.914639
50,0.1809,0.169875,0.94,0.939196
60,0.0994,0.175249,0.945,0.944387
70,0.0797,0.198787,0.945,0.944263
80,0.0609,0.259436,0.935,0.933504
90,0.0568,0.259767,0.9325,0.930999
100,0.0629,0.216064,0.945,0.944055


UBC-NLP/MARBERTv2, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1599 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7393,0.386591,0.905,0.904309
20,0.3426,0.248938,0.93,0.928277
30,0.2028,0.20299,0.9375,0.936467
40,0.1814,0.241732,0.915,0.914639
50,0.1809,0.169875,0.94,0.939196
60,0.0994,0.175249,0.945,0.944387
70,0.0797,0.198787,0.945,0.944263
80,0.0609,0.259436,0.935,0.933504
90,0.0568,0.259767,0.9325,0.930999
100,0.0629,0.216064,0.945,0.944055


UBC-NLP/MARBERTv2, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1599 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7393,0.386591,0.905,0.904309
20,0.3426,0.248938,0.93,0.928277
30,0.2028,0.20299,0.9375,0.936467
40,0.1814,0.241732,0.915,0.914639
50,0.1809,0.169875,0.94,0.939196
60,0.0994,0.175249,0.945,0.944387
70,0.0797,0.198787,0.945,0.944263
80,0.0609,0.259436,0.935,0.933504
90,0.0568,0.259767,0.9325,0.930999
100,0.0629,0.216064,0.945,0.944055


Unnamed: 0,Model,Accuracy,F1
0,UBC-NLP/MARBERT,0.945,0.944127
3,UBC-NLP/MARBERTv2,0.9525,0.951996
6,faisalq/SaudiBERT,0.955,0.954286
