In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'ArEgyCorpus2_2.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/Arabic-Egyptian-Corpus-2.csv', encoding='utf-8', engine='python') #, sep='\t' , quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

display(df[:4])



df = df[df['review'] != '']


df.loc[df['label'].str.contains('negative', na=False), 'label'] = 'negative'
df.loc[df['label'].str.contains('positive', na=False), 'label'] = 'positive'

classes = set(df['label'].values)
display(classes)

df['label'] = df['label'].astype('category')
df['label'] = df['label'].cat.codes



df = df[['review', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128


models = [ 
        'faisalq/EgyBERT',
        'UBC-NLP/MARBERT',
        'UBC-NLP/MARBERTv2',  
]


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['review'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}

            
        epochs = 5
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 20, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 20
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('ArEgyCorpus2_results_2.csv')
display(best_results)



2024-07-27 02:21:53.755351: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-27 02:21:53.778581: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['review', 'label'], dtype='object')

Unnamed: 0,review,label
0,يالاهوى لسه الناس بتسلم و تبوس كل اللي اتغير انهم بيقولوا مع ان في كورونا بس هات بوسة.,negative
1,هقول ايه مريض نفسي للاسف.,negative
2,دعوة ليا وليكم يارب الدنيا بخيرها والأخره بفردوسها.,positive
3,ياريت نطلع رحلة بعد الامتحانات.,positive


{'negative', 'positive'}

2

10000

DatasetDict({
    train: Dataset({
        features: ['review', 'label'],
        num_rows: 8000
    })
    test: Dataset({
        features: ['review', 'label'],
        num_rows: 2000
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6711,0.612681,0.943,0.942964
40,0.5139,0.376366,0.9695,0.9695
60,0.3192,0.235196,0.978,0.977995
80,0.2296,0.166438,0.979,0.978993
100,0.1871,0.130901,0.98,0.979992
120,0.1377,0.103146,0.983,0.982992
140,0.1119,0.108619,0.9755,0.9755
160,0.0813,0.098798,0.976,0.975977
180,0.074,0.087036,0.9795,0.979499
200,0.0521,0.080153,0.982,0.981997


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6685,0.605367,0.9325,0.9325
40,0.5086,0.37361,0.975,0.974997
60,0.3198,0.24166,0.974,0.973998
80,0.2384,0.17742,0.978,0.977996
100,0.1982,0.151145,0.975,0.974973
120,0.1469,0.113238,0.9785,0.978484
140,0.1039,0.090786,0.983,0.982996
160,0.0846,0.086987,0.9805,0.980483
180,0.0673,0.082929,0.9795,0.979495
200,0.053,0.076152,0.9815,0.981491


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6685,0.605367,0.9325,0.9325
40,0.5086,0.37361,0.975,0.974997
60,0.3198,0.24166,0.974,0.973998
80,0.2384,0.17742,0.978,0.977996
100,0.1982,0.151145,0.975,0.974973
120,0.1469,0.113238,0.9785,0.978484
140,0.1039,0.090786,0.983,0.982996
160,0.0846,0.086987,0.9805,0.980483
180,0.0673,0.082929,0.9795,0.979495
200,0.053,0.076152,0.9815,0.981491


UBC-NLP/MARBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3172,0.141419,0.955,0.954917
40,0.1352,0.092945,0.9745,0.974483
60,0.1146,0.112066,0.971,0.971
80,0.1062,0.13343,0.956,0.955943
100,0.1197,0.097944,0.9755,0.975499
120,0.1037,0.073996,0.978,0.977998
140,0.0501,0.096665,0.9785,0.978485
160,0.0368,0.117973,0.975,0.97498
180,0.0612,0.105087,0.9745,0.974499
200,0.0465,0.099453,0.977,0.976984


UBC-NLP/MARBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3172,0.141419,0.955,0.954917
40,0.1352,0.092945,0.9745,0.974483
60,0.1146,0.112066,0.971,0.971
80,0.1062,0.13343,0.956,0.955943
100,0.1197,0.097944,0.9755,0.975499
120,0.1037,0.073996,0.978,0.977998
140,0.0501,0.096665,0.9785,0.978485
160,0.0368,0.117973,0.975,0.97498
180,0.0612,0.105087,0.9745,0.974499
200,0.0465,0.099453,0.977,0.976984


UBC-NLP/MARBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3172,0.141419,0.955,0.954917
40,0.1352,0.092945,0.9745,0.974483
60,0.1146,0.112066,0.971,0.971
80,0.1062,0.13343,0.956,0.955943
100,0.1197,0.097944,0.9755,0.975499
120,0.1037,0.073996,0.978,0.977998
140,0.0501,0.096665,0.9785,0.978485
160,0.0368,0.117973,0.975,0.97498
180,0.0612,0.105087,0.9745,0.974499
200,0.0465,0.099453,0.977,0.976984


UBC-NLP/MARBERTv2, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3943,0.108327,0.973,0.972984
40,0.1456,0.103197,0.973,0.972969
60,0.1653,0.073741,0.984,0.983991
80,0.0974,0.098406,0.971,0.971
100,0.0847,0.07241,0.98,0.979987
120,0.1155,0.087289,0.976,0.975999
140,0.0447,0.060266,0.984,0.983998
160,0.0433,0.078614,0.982,0.98199
180,0.0391,0.080229,0.98,0.979983
200,0.0569,0.069324,0.981,0.980983


UBC-NLP/MARBERTv2, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3943,0.108327,0.973,0.972984
40,0.1456,0.103197,0.973,0.972969
60,0.1653,0.073741,0.984,0.983991
80,0.0974,0.098406,0.971,0.971
100,0.0847,0.07241,0.98,0.979987
120,0.1155,0.087289,0.976,0.975999
140,0.0447,0.060266,0.984,0.983998
160,0.0433,0.078614,0.982,0.98199
180,0.0391,0.080229,0.98,0.979983
200,0.0569,0.069324,0.981,0.980983


UBC-NLP/MARBERTv2, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.3943,0.108327,0.973,0.972984
40,0.1456,0.103197,0.973,0.972969
60,0.1653,0.073741,0.984,0.983991
80,0.0974,0.098406,0.971,0.971
100,0.0847,0.07241,0.98,0.979987
120,0.1155,0.087289,0.976,0.975999
140,0.0447,0.060266,0.984,0.983998
160,0.0433,0.078614,0.982,0.98199
180,0.0391,0.080229,0.98,0.979983
200,0.0569,0.069324,0.981,0.980983


Unnamed: 0,Model,Accuracy,F1
0,UBC-NLP/MARBERT,0.9825,0.982495
3,UBC-NLP/MARBERTv2,0.9875,0.987495
6,faisalq/EgyBERT,0.988,0.987994
