In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)




log_file = 'ArSAS_topic_2.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


ds = load_dataset('arbml/ArSAS')
display(ds)

df = pd.DataFrame(ds['train'])

# 'Topic', 'Speech_act_label',

display(df.columns)
df.fillna('', inplace=True)

display(df[:4])

# return

df = df[df['Tweet_text'] != '']

classes = set(df['Topic'].values)
display(classes)

df['Topic'] = df['Topic'].astype('category')
df['label'] = df['Topic'].cat.codes



df = df[['Tweet_text', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128


models = [ 
        'faisalq/EgyBERT',
        'UBC-NLP/MARBERT',
        'UBC-NLP/MARBERTv2',  
]


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['Tweet_text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}

            
        epochs = 8
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 50, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 50
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('ArSAS_topic_results_2.csv')
display(best_results)



2024-07-27 13:24:03.289047: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-27 13:24:03.319053: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Found cached dataset parquet (/home/ffq/.cache/huggingface/datasets/arbml___parquet/arbml--ArSAS-1d9da4f767fa2dec/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['#Tweet_ID', 'Tweet_text', 'Topic', 'Sentiment_label_confidence', 'Speech_act_label', 'Speech_act_label_confidence', 'label'],
        num_rows: 19897
    })
})

Index(['#Tweet_ID', 'Tweet_text', 'Topic', 'Sentiment_label_confidence',
       'Speech_act_label', 'Speech_act_label_confidence', 'label'],
      dtype='object')

Unnamed: 0,#Tweet_ID,Tweet_text,Topic,Sentiment_label_confidence,Speech_act_label,Speech_act_label_confidence,label
0,929241870508724224,المباراة القـادمة #غانا x #مصر الجولة الأخيرة من المجموعة الـ 5 تصفيات كاس العالم 2018 روسـيا ترتيب مصر : المركز الاول 12 نقطة ( تم حسم التأهل للمونديال ) غــدا الساعة 5:30 ع قناة : بين ســبورت 1 تـــوقعاتكم لـ نتيجة الماتش .؟ 😀😁 https://t.co/RTQBNZXDqM,Event,0.38,Assertion,0.62,2
1,928942264583376897,هل هذه هي سياسة خارجيه لدوله تحترم نفسها والآخرين :#اليمن عدوان وحصار ل 3 سنوات #البحرين قمع حراك شعبها المسالم المطالب بالمساواة والعداله #سوريا #العراق دعموا الإرهاب وارسلوا المال والسلاح والانتحاريين #قطر حصار ومحاولة فرض الوصايه والآن #لبنان محاولة فرض وصايه علني!!,Entity,1.0,Expression,0.68,0
2,928615163250520065,وزير خارجية فرنسا عن منتدى شباب العالم: شعرت بارتياح وأنا أتابعه من باريس - https://t.co/hSvsbEaeUz #youm,Event,0.69,Assertion,1.0,2
3,931614713368186880,ومع السيسي و بشار و ايران و بن زايد و والا خليفه و روافض إلعراق و حفتر و علي صالح كل طواغيت العرب العلاقات عسل علي سمن,Event,1.0,Expression,1.0,0


{'Entity', 'Event', 'Long_Standing'}

3

19897

DatasetDict({
    train: Dataset({
        features: ['Tweet_text', 'label'],
        num_rows: 15917
    })
    test: Dataset({
        features: ['Tweet_text', 'label'],
        num_rows: 3980
    })
})

faisalq/EgyBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.9639,0.68139,0.956281,0.956159
100,0.4594,0.245283,0.98995,0.990003
150,0.1923,0.110165,0.990955,0.99097
200,0.1078,0.069361,0.991709,0.991738
250,0.0865,0.060374,0.990704,0.990789
300,0.0512,0.047093,0.990452,0.990455
350,0.0456,0.043363,0.991709,0.991805
400,0.039,0.038015,0.992714,0.99276
450,0.0364,0.033353,0.993467,0.993496
500,0.0414,0.032534,0.992462,0.992471


faisalq/EgyBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.9366,0.629738,0.984422,0.984555
100,0.4355,0.237966,0.989698,0.98974
150,0.1848,0.116826,0.990704,0.990753
200,0.104,0.071886,0.992211,0.992269
250,0.082,0.067781,0.988693,0.988751
300,0.057,0.04397,0.992462,0.992517
350,0.0401,0.039844,0.992965,0.992965
400,0.0425,0.034253,0.994724,0.994729
450,0.0361,0.035992,0.993216,0.993221
500,0.0417,0.035876,0.993467,0.993416


faisalq/EgyBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/EgyBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.9366,0.629738,0.984422,0.984555
100,0.4355,0.237966,0.989698,0.98974
150,0.1848,0.116826,0.990704,0.990753
200,0.104,0.071886,0.992211,0.992269
250,0.082,0.067781,0.988693,0.988751
300,0.057,0.04397,0.992462,0.992517
350,0.0401,0.039844,0.992965,0.992965
400,0.0425,0.034253,0.994724,0.994729
450,0.0361,0.035992,0.993216,0.993221
500,0.0417,0.035876,0.993467,0.993416


UBC-NLP/MARBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2355,0.065024,0.982663,0.982413
100,0.0598,0.050835,0.986935,0.986827
150,0.055,0.03899,0.991457,0.991385
200,0.0467,0.043691,0.991709,0.991717
250,0.0534,0.054483,0.987688,0.987812
300,0.0317,0.027013,0.992965,0.992978
350,0.0334,0.041721,0.988442,0.988467
400,0.0245,0.036423,0.991457,0.991342
450,0.0263,0.022506,0.994221,0.9942
500,0.0329,0.031241,0.993216,0.993223


UBC-NLP/MARBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2355,0.065024,0.982663,0.982413
100,0.0598,0.050835,0.986935,0.986827
150,0.055,0.03899,0.991457,0.991385
200,0.0467,0.043691,0.991709,0.991717
250,0.0534,0.054483,0.987688,0.987812
300,0.0317,0.027013,0.992965,0.992978
350,0.0334,0.041721,0.988442,0.988467
400,0.0245,0.036423,0.991457,0.991342
450,0.0263,0.022506,0.994221,0.9942
500,0.0329,0.031241,0.993216,0.993223


UBC-NLP/MARBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.2355,0.065024,0.982663,0.982413
100,0.0598,0.050835,0.986935,0.986827
150,0.055,0.03899,0.991457,0.991385
200,0.0467,0.043691,0.991709,0.991717
250,0.0534,0.054483,0.987688,0.987812
300,0.0317,0.027013,0.992965,0.992978
350,0.0334,0.041721,0.988442,0.988467
400,0.0245,0.036423,0.991457,0.991342
450,0.0263,0.022506,0.994221,0.9942
500,0.0329,0.031241,0.993216,0.993223


UBC-NLP/MARBERTv2, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3085,0.059788,0.985427,0.985396
100,0.0492,0.025249,0.99397,0.993915
150,0.0478,0.028072,0.99196,0.991997
200,0.0393,0.030396,0.993216,0.993062
250,0.0512,0.02973,0.992211,0.9922
300,0.0226,0.028712,0.993467,0.993255
350,0.0236,0.018606,0.99397,0.993863
400,0.0161,0.020984,0.994221,0.994117
450,0.0215,0.017691,0.995226,0.995223
500,0.0206,0.014982,0.994975,0.99498


UBC-NLP/MARBERTv2, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3085,0.059788,0.985427,0.985396
100,0.0492,0.025249,0.99397,0.993915
150,0.0478,0.028072,0.99196,0.991997
200,0.0393,0.030396,0.993216,0.993062
250,0.0512,0.02973,0.992211,0.9922
300,0.0226,0.028712,0.993467,0.993255
350,0.0236,0.018606,0.99397,0.993863
400,0.0161,0.020984,0.994221,0.994117
450,0.0215,0.017691,0.995226,0.995223
500,0.0206,0.014982,0.994975,0.99498


UBC-NLP/MARBERTv2, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/15917 [00:00<?, ? examples/s]

Map:   0%|          | 0/3980 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.3085,0.059788,0.985427,0.985396
100,0.0492,0.025249,0.99397,0.993915
150,0.0478,0.028072,0.99196,0.991997
200,0.0393,0.030396,0.993216,0.993062
250,0.0512,0.02973,0.992211,0.9922
300,0.0226,0.028712,0.993467,0.993255
350,0.0236,0.018606,0.99397,0.993863
400,0.0161,0.020984,0.994221,0.994117
450,0.0215,0.017691,0.995226,0.995223
500,0.0206,0.014982,0.994975,0.99498


Unnamed: 0,Model,Accuracy,F1
0,UBC-NLP/MARBERT,0.995477,0.995503
3,UBC-NLP/MARBERTv2,0.995729,0.995586
6,faisalq/EgyBERT,0.995477,0.99547
