In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)




log_file = 'ArSAS_sentiment_mix_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


ds = load_dataset('arbml/ArSAS')
display(ds)

df = pd.DataFrame(ds['train'])



display(df.columns)
df.fillna('', inplace=True)

display(df[:4])

# return

df = df[df['Tweet_text'] != '']

classes = set(df['label'].values)
display(classes)


df = df[['Tweet_text', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128



models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib', 
]


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['Tweet_text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}

            
        epochs = 8
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 50, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 50
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('ArSAS_sentiment_mix_results_1.csv')
display(best_results)



2024-07-27 16:39:34.291239: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-27 16:39:34.316587: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Found cached dataset parquet (/home/ffq/.cache/huggingface/datasets/arbml___parquet/arbml--ArSAS-1d9da4f767fa2dec/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['#Tweet_ID', 'Tweet_text', 'Topic', 'Sentiment_label_confidence', 'Speech_act_label', 'Speech_act_label_confidence', 'label'],
        num_rows: 19897
    })
})

Index(['#Tweet_ID', 'Tweet_text', 'Topic', 'Sentiment_label_confidence',
       'Speech_act_label', 'Speech_act_label_confidence', 'label'],
      dtype='object')

Unnamed: 0,#Tweet_ID,Tweet_text,Topic,Sentiment_label_confidence,Speech_act_label,Speech_act_label_confidence,label
0,929241870508724224,المباراة القـادمة #غانا x #مصر الجولة الأخيرة من المجموعة الـ 5 تصفيات كاس العالم 2018 روسـيا ترتيب مصر : المركز الاول 12 نقطة ( تم حسم التأهل للمونديال ) غــدا الساعة 5:30 ع قناة : بين ســبورت 1 تـــوقعاتكم لـ نتيجة الماتش .؟ 😀😁 https://t.co/RTQBNZXDqM,Event,0.38,Assertion,0.62,2
1,928942264583376897,هل هذه هي سياسة خارجيه لدوله تحترم نفسها والآخرين :#اليمن عدوان وحصار ل 3 سنوات #البحرين قمع حراك شعبها المسالم المطالب بالمساواة والعداله #سوريا #العراق دعموا الإرهاب وارسلوا المال والسلاح والانتحاريين #قطر حصار ومحاولة فرض الوصايه والآن #لبنان محاولة فرض وصايه علني!!,Entity,1.0,Expression,0.68,0
2,928615163250520065,وزير خارجية فرنسا عن منتدى شباب العالم: شعرت بارتياح وأنا أتابعه من باريس - https://t.co/hSvsbEaeUz #youm,Event,0.69,Assertion,1.0,2
3,931614713368186880,ومع السيسي و بشار و ايران و بن زايد و والا خليفه و روافض إلعراق و حفتر و علي صالح كل طواغيت العرب العلاقات عسل علي سمن,Event,1.0,Expression,1.0,0


{0, 1, 2, 3}

4

19897

DatasetDict({
    train: Dataset({
        features: ['Tweet_text', 'label'],
        num_rows: 15917
    })
    test: Dataset({
        features: ['Tweet_text', 'label'],
        num_rows: 3980
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.7484,0.619942,0.777387,0.609362
100,0.6112,0.585385,0.778894,0.593502
150,0.5902,0.576436,0.780905,0.604877
200,0.5604,0.582844,0.783166,0.628925
250,0.6105,0.559163,0.788442,0.625446
300,0.4151,0.605601,0.772613,0.65946
350,0.4248,0.61027,0.768844,0.656186
400,0.3872,0.614653,0.774121,0.668591
450,0.4413,0.605909,0.791457,0.637158
500,0.4154,0.606942,0.781407,0.644339


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.7383,0.620297,0.773367,0.604905
100,0.6146,0.581531,0.781658,0.601609
150,0.5886,0.570885,0.782161,0.608506
200,0.5623,0.591555,0.773618,0.615888
250,0.6089,0.558607,0.791709,0.636798
300,0.4181,0.594435,0.776884,0.66763
350,0.4349,0.58856,0.782663,0.665455
400,0.3951,0.616126,0.772111,0.665033
450,0.4388,0.618555,0.788693,0.642535
500,0.4203,0.607082,0.782915,0.646294


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.7383,0.620297,0.773367,0.604905
100,0.6146,0.581531,0.781658,0.601609
150,0.5886,0.570885,0.782161,0.608506
200,0.5623,0.591555,0.773618,0.615888
250,0.6089,0.558607,0.791709,0.636798
300,0.4181,0.594435,0.776884,0.66763
350,0.4349,0.58856,0.782663,0.665455
400,0.3951,0.616126,0.772111,0.665033
450,0.4388,0.618555,0.788693,0.642535
500,0.4203,0.607082,0.782915,0.646294


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.8963,0.691986,0.753769,0.574436
100,0.692,0.663002,0.761809,0.572517
150,0.6799,0.617161,0.774874,0.59307
200,0.6245,0.639767,0.758794,0.581742
250,0.6624,0.619291,0.769598,0.632413
300,0.4627,0.634016,0.755528,0.642578
350,0.4726,0.633543,0.769095,0.639838
400,0.4422,0.661194,0.767337,0.64041
450,0.4757,0.650949,0.775126,0.610901
500,0.4516,0.648647,0.771106,0.640303


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.8963,0.691986,0.753769,0.574436
100,0.692,0.663002,0.761809,0.572517
150,0.6799,0.617161,0.774874,0.59307
200,0.6245,0.639767,0.758794,0.581742
250,0.6624,0.619291,0.769598,0.632413
300,0.4627,0.634016,0.755528,0.642578
350,0.4726,0.633543,0.769095,0.639838
400,0.4422,0.661194,0.767337,0.64041
450,0.4757,0.650949,0.775126,0.610901
500,0.4516,0.648647,0.771106,0.640303


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.8963,0.691986,0.753769,0.574436
100,0.692,0.663002,0.761809,0.572517
150,0.6799,0.617161,0.774874,0.59307
200,0.6245,0.639767,0.758794,0.581742
250,0.6624,0.619291,0.769598,0.632413
300,0.4627,0.634016,0.755528,0.642578
350,0.4726,0.633543,0.769095,0.639838
400,0.4422,0.661194,0.767337,0.64041
450,0.4757,0.650949,0.775126,0.610901
500,0.4516,0.648647,0.771106,0.640303


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.7954,0.68286,0.763568,0.581131
100,0.633,0.610996,0.777387,0.59808
150,0.6086,0.576403,0.784925,0.60836
200,0.5774,0.621104,0.771608,0.59176
250,0.641,0.577044,0.778392,0.627367
300,0.4008,0.619681,0.769347,0.657542
350,0.4113,0.638404,0.759045,0.652537
400,0.3919,0.634374,0.760804,0.659618
450,0.4212,0.621261,0.78191,0.632469
500,0.3889,0.626712,0.773869,0.656958


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.7954,0.68286,0.763568,0.581131
100,0.633,0.610996,0.777387,0.59808
150,0.6086,0.576403,0.784925,0.60836
200,0.5774,0.621104,0.771608,0.59176
250,0.641,0.577044,0.778392,0.627367
300,0.4008,0.619681,0.769347,0.657542
350,0.4113,0.638404,0.759045,0.652537
400,0.3919,0.634374,0.760804,0.659618
450,0.4212,0.621261,0.78191,0.632469
500,0.3889,0.626712,0.773869,0.656958


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.7954,0.68286,0.763568,0.581131
100,0.633,0.610996,0.777387,0.59808
150,0.6086,0.576403,0.784925,0.60836
200,0.5774,0.621104,0.771608,0.59176
250,0.641,0.577044,0.778392,0.627367
300,0.4008,0.619681,0.769347,0.657542
350,0.4113,0.638404,0.759045,0.652537
400,0.3919,0.634374,0.760804,0.659618
450,0.4212,0.621261,0.78191,0.632469
500,0.3889,0.626712,0.773869,0.656958


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.753518,0.652776
3,aubmindlab/bert-base-arabertv02-twitter,0.770854,0.675073
4,qarib/bert-base-qarib,0.762563,0.661607
