In [1]:
# cell-1  
#load and clean the data (removing diacritics and unwanted text)

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'FloDusTA_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')




df = pd.read_csv('benchmarks/FloDusTA.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

display(df[:4])


df = df[df['Full_text'] != '']


classes = set(df['Label'].values)
display(classes)

df['label'] = df['Label']


df = df[['Full_text', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))

ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128


models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib',
        'reemalyami/AraRoBERTa-SA',    
]

for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['Full_text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 5
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 20, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 20
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('FloDusTA_results_1.csv')
display(best_results)



2024-03-27 01:37:07.918099: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-27 01:37:07.942961: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['id_str', 'Full_text', 'Label'], dtype='object')

Unnamed: 0,id_str,Full_text,Label
0,998294855544590336,هيئة محامي دارفور تدين حادثة اغتيال الطالب عوض الله أبكر وتطالب بعدم إفلات الجناة من المحاكمة https://t.co/a7boK6DHb2,0
1,989053332915474432,#واجب_القراءة للاستاذ جميل الذيابي @JameelAlTheyabi https://t.co/pg9KTlrGKk,0
2,980602110126559232,@M_ALROKIBH السلام عليكم ورحمة الله وبركاته الاخ منصور هل نراك بعد هذا التقرير تصور وانت تقود السيارة او لا مع العلم انك تحذر من استخدام الجوال بتقريرك اثناء القيادة واكثر اسباب حوادث المرورية هو استخدام الجوال .....؟,0
3,999051083438854146,حادثة غريبة على مجتمعنا.\nمن له مصلحة بقتل رجل كهذا؟\nوفي شهر رمضان؟ 🤔\n\n#وفاه_الدكتور_ابراهيم_الغصن,0


{0, 1, 2, 3}

4

8998

DatasetDict({
    train: Dataset({
        features: ['Full_text', 'label'],
        num_rows: 7198
    })
    test: Dataset({
        features: ['Full_text', 'label'],
        num_rows: 1800
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6076,0.290213,0.892778,0.888729
40,0.3331,0.241709,0.908889,0.906103
60,0.2449,0.256847,0.904444,0.904223
80,0.1831,0.157231,0.947778,0.945549
100,0.1704,0.136703,0.954444,0.951118
120,0.1413,0.146345,0.951111,0.947678
140,0.1057,0.138074,0.955556,0.953107
160,0.1051,0.149365,0.947778,0.94495
180,0.1283,0.140798,0.955556,0.953601
200,0.0932,0.150082,0.950556,0.947919


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6256,0.295189,0.897778,0.891939
40,0.3254,0.234894,0.908333,0.905744
60,0.2671,0.270332,0.896111,0.896492
80,0.1843,0.150136,0.948333,0.94628
100,0.1715,0.142185,0.953333,0.950036
120,0.1499,0.142276,0.956111,0.954177
140,0.0994,0.153438,0.950556,0.948058
160,0.0982,0.161787,0.945,0.941913
180,0.1045,0.164075,0.945556,0.943325
200,0.0844,0.170885,0.947778,0.945457


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.6256,0.295189,0.897778,0.891939
40,0.3254,0.234894,0.908333,0.905744
60,0.2671,0.270332,0.896111,0.896492
80,0.1843,0.150136,0.948333,0.94628
100,0.1715,0.142185,0.953333,0.950036
120,0.1499,0.142276,0.956111,0.954177
140,0.0994,0.153438,0.950556,0.948058
160,0.0982,0.161787,0.945,0.941913
180,0.1045,0.164075,0.945556,0.943325
200,0.0844,0.170885,0.947778,0.945457


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.9632,0.536136,0.817778,0.801319
40,0.4776,0.421538,0.842778,0.842976
60,0.3472,0.351255,0.875556,0.865935
80,0.2942,0.271349,0.901667,0.900465
100,0.2652,0.241152,0.909444,0.907065
120,0.2407,0.196512,0.922222,0.920166
140,0.1461,0.200598,0.933889,0.930828
160,0.1143,0.201187,0.934444,0.931501
180,0.1373,0.248881,0.916111,0.914687
200,0.1201,0.19458,0.939444,0.937161


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.9632,0.536136,0.817778,0.801319
40,0.4776,0.421538,0.842778,0.842976
60,0.3472,0.351255,0.875556,0.865935
80,0.2942,0.271349,0.901667,0.900465
100,0.2652,0.241152,0.909444,0.907065
120,0.2407,0.196512,0.922222,0.920166
140,0.1461,0.200598,0.933889,0.930828
160,0.1143,0.201187,0.934444,0.931501
180,0.1373,0.248881,0.916111,0.914687
200,0.1201,0.19458,0.939444,0.937161


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.9632,0.536136,0.817778,0.801319
40,0.4776,0.421538,0.842778,0.842976
60,0.3472,0.351255,0.875556,0.865935
80,0.2942,0.271349,0.901667,0.900465
100,0.2652,0.241152,0.909444,0.907065
120,0.2407,0.196512,0.922222,0.920166
140,0.1461,0.200598,0.933889,0.930828
160,0.1143,0.201187,0.934444,0.931501
180,0.1373,0.248881,0.916111,0.914687
200,0.1201,0.19458,0.939444,0.937161


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.7007,0.35742,0.871667,0.869268
40,0.3536,0.327724,0.872778,0.872688
60,0.2815,0.29381,0.897222,0.890479
80,0.248,0.195788,0.925,0.922802
100,0.1907,0.166317,0.943889,0.940322
120,0.1228,0.166365,0.941667,0.938476
140,0.1088,0.205493,0.936667,0.930978
160,0.1474,0.16425,0.948889,0.946399
180,0.1125,0.152428,0.95,0.94716
200,0.0764,0.174327,0.951111,0.948784


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.7007,0.35742,0.871667,0.869268
40,0.3536,0.327724,0.872778,0.872688
60,0.2815,0.29381,0.897222,0.890479
80,0.248,0.195788,0.925,0.922802
100,0.1907,0.166317,0.943889,0.940322
120,0.1228,0.166365,0.941667,0.938476
140,0.1088,0.205493,0.936667,0.930978
160,0.1474,0.16425,0.948889,0.946399
180,0.1125,0.152428,0.95,0.94716
200,0.0764,0.174327,0.951111,0.948784


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,0.7007,0.35742,0.871667,0.869268
40,0.3536,0.327724,0.872778,0.872688
60,0.2815,0.29381,0.897222,0.890479
80,0.248,0.195788,0.925,0.922802
100,0.1907,0.166317,0.943889,0.940322
120,0.1228,0.166365,0.941667,0.938476
140,0.1088,0.205493,0.936667,0.930978
160,0.1474,0.16425,0.948889,0.946399
180,0.1125,0.152428,0.95,0.94716
200,0.0764,0.174327,0.951111,0.948784


reemalyami/AraRoBERTa-SA, try:0


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,1.0696,0.689214,0.721667,0.616939
40,0.5938,0.392336,0.845556,0.845128
60,0.3904,0.499274,0.822222,0.793439
80,0.3632,0.298558,0.877222,0.877091
100,0.3236,0.260473,0.898889,0.896324
120,0.2916,0.255009,0.902778,0.901374
140,0.2018,0.240046,0.915,0.910372
160,0.2211,0.259884,0.898333,0.89746
180,0.2079,0.215691,0.917222,0.91405
200,0.2031,0.209722,0.918333,0.914992


reemalyami/AraRoBERTa-SA, try:1


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,1.0696,0.689214,0.721667,0.616939
40,0.5938,0.392336,0.845556,0.845128
60,0.3904,0.499274,0.822222,0.793439
80,0.3632,0.298558,0.877222,0.877091
100,0.3236,0.260473,0.898889,0.896324
120,0.2916,0.255009,0.902778,0.901374
140,0.2018,0.240046,0.915,0.910372
160,0.2211,0.259884,0.898333,0.89746
180,0.2079,0.215691,0.917222,0.91405
200,0.2031,0.209722,0.918333,0.914992


reemalyami/AraRoBERTa-SA, try:2


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at reemalyami/AraRoBERTa-SA and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7198 [00:00<?, ? examples/s]

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
20,1.0696,0.689214,0.721667,0.616939
40,0.5938,0.392336,0.845556,0.845128
60,0.3904,0.499274,0.822222,0.793439
80,0.3632,0.298558,0.877222,0.877091
100,0.3236,0.260473,0.898889,0.896324
120,0.2916,0.255009,0.902778,0.901374
140,0.2018,0.240046,0.915,0.910372
160,0.2211,0.259884,0.898333,0.89746
180,0.2079,0.215691,0.917222,0.91405
200,0.2031,0.209722,0.918333,0.914992


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.946111,0.944364
3,aubmindlab/bert-base-arabertv02-twitter,0.962222,0.96024
4,qarib/bert-base-qarib,0.956111,0.953769
7,reemalyami/AraRoBERTa-SA,0.933333,0.929994
