In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'SaudiShopInsights_cloths_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')




df = pd.read_csv('benchmarks2/SaudiShopInsights/ClothesDataset.csv', encoding='utf-8', 
                 engine='python', sep='\t') #, quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

display(df[:4])

df = df[df['Review'] != '']
df = df[df['in general'] != '']
classes = set(df['in general'].values)
display(classes)

display(len(df))


display(len(df))

df['in general'] = df['in general'].astype('category')
df['label'] = df['in general'].cat.codes



df = df[['Review', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))
# display(len(df_test))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128

models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib',
        'reemalyami/AraRoBERTa-SA',    
]
for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['Review'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 10
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 10, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 10
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()
     

results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('SaudiShopInsights_cloths_results_1.csv')
display(best_results)




Index(['Review', 'Size', 'Color', 'Price', 'Smell', 'Sleeve', 'Quality',
       'Fabric', 'Style', 'Length', 'Image', 'transperancy', 'in general',
       'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16'],
      dtype='object')

Unnamed: 0,Review,Size,Color,Price,Smell,Sleeve,Quality,Fabric,Style,Length,Image,transperancy,in general,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16
0,قماشه خفيييييف ينفع لصيف وطولي ١٥٦ وطلع ع طولي,,,,,,,1.0,,,,,1.0,681.0,318.0,141.0,459.0
1,ملائم: رووووووووووووووو ووووووووووعة ماشاءالله تبارك الله جميييييييييييييييييييييييييييييييييييييل جدددددددددددددددددا فنتاستتتتتتتتتتتتتتتتتتتتتتتتتتتتتك راااااااااااااااااااااااااااااااااااااااااااااااااااااااااااائع,,,,,,,,,,,,1.0,,,,
2,القصه والفستان واللون حلو بس القماش مره لا😭😭,,1.0,,,,,-1.0,1.0,,,,-1.0,,,,
3,يابنات صدق خذوووه قسم بالله يحلليييي ويخليييك احلاهم 🫠❤️‍🔥❤️‍🔥❤️‍🔥❤️‍🔥❤️‍🔥,,,,,,,,,,,,1.0,,,,


{-1.0, 0.0, 1.0}

2140

2140

3

2140

DatasetDict({
    train: Dataset({
        features: ['Review', 'label', '__index_level_0__'],
        num_rows: 1712
    })
    test: Dataset({
        features: ['Review', 'label', '__index_level_0__'],
        num_rows: 428
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7234,0.448682,0.871495,0.585082
20,0.3952,0.3759,0.878505,0.589643
30,0.3632,0.343049,0.885514,0.595146
40,0.2809,0.324388,0.890187,0.59766
50,0.2431,0.344003,0.89486,0.600622
60,0.2028,0.354098,0.880841,0.591668
70,0.163,0.342627,0.892523,0.746029
80,0.1629,0.405159,0.897196,0.696611
90,0.0763,0.385352,0.897196,0.757585
100,0.1214,0.396226,0.899533,0.766789


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7234,0.448682,0.871495,0.585082
20,0.3952,0.3759,0.878505,0.589643
30,0.3632,0.343049,0.885514,0.595146
40,0.2809,0.324388,0.890187,0.59766
50,0.2431,0.344003,0.89486,0.600622
60,0.2028,0.354098,0.880841,0.591668
70,0.163,0.342627,0.892523,0.746029
80,0.1629,0.405159,0.897196,0.696611
90,0.0763,0.385352,0.897196,0.757585
100,0.1214,0.396226,0.899533,0.766789


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7234,0.448682,0.871495,0.585082
20,0.3952,0.3759,0.878505,0.589643
30,0.3632,0.343049,0.885514,0.595146
40,0.2809,0.324388,0.890187,0.59766
50,0.2431,0.344003,0.89486,0.600622
60,0.2028,0.354098,0.880841,0.591668
70,0.163,0.342627,0.892523,0.746029
80,0.1629,0.405159,0.897196,0.696611
90,0.0763,0.385352,0.897196,0.757585
100,0.1214,0.396226,0.899533,0.766789


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8369,0.564754,0.817757,0.539719
20,0.4978,0.430806,0.841121,0.565136
30,0.4322,0.417387,0.845794,0.568864
40,0.3746,0.422779,0.848131,0.570722
50,0.3142,0.378103,0.880841,0.589867
60,0.245,0.39278,0.869159,0.581624
70,0.2443,0.414743,0.86215,0.576247
80,0.1895,0.400666,0.864486,0.627884
90,0.1077,0.479664,0.859813,0.69485
100,0.1203,0.480241,0.859813,0.703206


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8369,0.564754,0.817757,0.539719
20,0.4978,0.430806,0.841121,0.565136
30,0.4322,0.417387,0.845794,0.568864
40,0.3746,0.422779,0.848131,0.570722
50,0.3142,0.378103,0.880841,0.589867
60,0.245,0.39278,0.869159,0.581624
70,0.2443,0.414743,0.86215,0.576247
80,0.1895,0.400666,0.864486,0.627884
90,0.1077,0.479664,0.859813,0.69485
100,0.1203,0.480241,0.859813,0.703206


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8369,0.564754,0.817757,0.539719
20,0.4978,0.430806,0.841121,0.565136
30,0.4322,0.417387,0.845794,0.568864
40,0.3746,0.422779,0.848131,0.570722
50,0.3142,0.378103,0.880841,0.589867
60,0.245,0.39278,0.869159,0.581624
70,0.2443,0.414743,0.86215,0.576247
80,0.1895,0.400666,0.864486,0.627884
90,0.1077,0.479664,0.859813,0.69485
100,0.1203,0.480241,0.859813,0.703206


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7501,0.49354,0.813084,0.548099
20,0.4792,0.433498,0.859813,0.577135
30,0.4354,0.360901,0.892523,0.59972
40,0.3266,0.337057,0.88785,0.59676
50,0.2648,0.341293,0.897196,0.601843
60,0.2082,0.38706,0.883178,0.591931
70,0.1884,0.390651,0.871495,0.702849
80,0.1663,0.369491,0.892523,0.754136
90,0.0692,0.496983,0.873832,0.701266
100,0.1101,0.43763,0.885514,0.759116


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7501,0.49354,0.813084,0.548099
20,0.4792,0.433498,0.859813,0.577135
30,0.4354,0.360901,0.892523,0.59972
40,0.3266,0.337057,0.88785,0.59676
50,0.2648,0.341293,0.897196,0.601843
60,0.2082,0.38706,0.883178,0.591931
70,0.1884,0.390651,0.871495,0.702849
80,0.1663,0.369491,0.892523,0.754136
90,0.0692,0.496983,0.873832,0.701266
100,0.1101,0.43763,0.885514,0.759116


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7501,0.49354,0.813084,0.548099
20,0.4792,0.433498,0.859813,0.577135
30,0.4354,0.360901,0.892523,0.59972
40,0.3266,0.337057,0.88785,0.59676
50,0.2648,0.341293,0.897196,0.601843
60,0.2082,0.38706,0.883178,0.591931
70,0.1884,0.390651,0.871495,0.702849
80,0.1663,0.369491,0.892523,0.754136
90,0.0692,0.496983,0.873832,0.701266
100,0.1101,0.43763,0.885514,0.759116


reemalyami/AraRoBERTa-SA, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8047,0.676994,0.75,0.505209
20,0.5552,0.497851,0.806075,0.539476
30,0.5011,0.447941,0.838785,0.561113
40,0.4202,0.437689,0.843458,0.564407
50,0.3519,0.418061,0.859813,0.576735
60,0.3391,0.511895,0.801402,0.540408
70,0.3385,0.442954,0.845794,0.567984
80,0.2846,0.630209,0.808411,0.529092
90,0.2149,0.434407,0.843458,0.563903
100,0.219,0.456998,0.841121,0.565046


reemalyami/AraRoBERTa-SA, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8047,0.676994,0.75,0.505209
20,0.5552,0.497851,0.806075,0.539476
30,0.5011,0.447941,0.838785,0.561113
40,0.4202,0.437689,0.843458,0.564407
50,0.3519,0.418061,0.859813,0.576735
60,0.3391,0.511895,0.801402,0.540408
70,0.3385,0.442954,0.845794,0.567984
80,0.2846,0.630209,0.808411,0.529092
90,0.2149,0.434407,0.843458,0.563903
100,0.219,0.456998,0.841121,0.565046


reemalyami/AraRoBERTa-SA, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1712 [00:00<?, ? examples/s]

Map:   0%|          | 0/428 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8047,0.676994,0.75,0.505209
20,0.5552,0.497851,0.806075,0.539476
30,0.5011,0.447941,0.838785,0.561113
40,0.4202,0.437689,0.843458,0.564407
50,0.3519,0.418061,0.859813,0.576735
60,0.3391,0.511895,0.801402,0.540408
70,0.3385,0.442954,0.845794,0.567984
80,0.2846,0.630209,0.808411,0.529092
90,0.2149,0.434407,0.843458,0.563903
100,0.219,0.456998,0.841121,0.565046


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.890187,0.751494
3,aubmindlab/bert-base-arabertv02-twitter,0.908879,0.773886
6,qarib/bert-base-qarib,0.892523,0.804997
9,reemalyami/AraRoBERTa-SA,0.848131,0.680665
