In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'Abusive_mubarak17_1.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/Abusive_mubarak17.csv', encoding='utf-8', engine='python') #, sep='\t' , quotechar="'"  , quoting=3
display(df.columns)
df.fillna('', inplace=True)

display(df[:4])



df = df[df['text'] != '']

classes = set(df['aggregatedAnnotation'].values)
display(classes)

df['aggregatedAnnotation'] = df['aggregatedAnnotation'].astype('category')
df['label'] = df['aggregatedAnnotation'].cat.codes



df = df[['text', 'label']]


classes_num = len(classes)
display(classes_num)
display(len(df))


ds = Dataset.from_pandas(df)

ds = ds.train_test_split(test_size=0.2)
display(ds)

max_sequence_length = 128


models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib',  
]


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                    
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length, add_special_tokens=True)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}

            
        epochs = 25
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 10, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 10
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('Abusive_mubarak17_results_1.csv')
display(best_results)



2024-07-25 03:28:30.273228: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-25 03:28:30.297736: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['#', 'type', 'text', 'aggregatedAnnotation',
       'aggregatedAnnotationConfidence', 'annotator1', 'annotator2',
       'annotator3'],
      dtype='object')

Unnamed: 0,#,type,text,aggregatedAnnotation,aggregatedAnnotationConfidence,annotator1,annotator2,annotator3
0,1,TWEET,مبروك و سامحونا لعجزنا التام. عقبال اللي جوه. اللي بره يا عاجز يا بيزايد على العاجز,0,0.6667,-1,0,0
1,2,C1,كلنا بره ومش هنبطل نزايد على العجايز الي جابونا ورى,-1,0.6667,-1,-1,0
2,3,C2,بدل ما انت قاعد بره كده تعالي ازرع الصحرا,0,1.0,0,0,0
3,4,C3,قذر اتفووو ماتيجى مصر وتورينا نفسك كدا ياجبان,-1,1.0,-1,-1,-1


{-2, -1, 0}

3

1100

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 880
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 220
    })
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/880 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.983,0.817393,0.704545,0.658798
20,0.653,0.596806,0.781818,0.777083
30,0.4555,0.532017,0.781818,0.77815
40,0.3111,0.569369,0.781818,0.774077
50,0.1825,0.617054,0.754545,0.753095
60,0.1237,0.628827,0.790909,0.787248
70,0.0738,0.749499,0.786364,0.779547
80,0.0361,0.872076,0.754545,0.753298
90,0.0581,0.879154,0.763636,0.759584
100,0.0175,0.964468,0.754545,0.750418


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/880 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9625,0.776543,0.686364,0.641898
20,0.6025,0.590815,0.772727,0.762792
30,0.4262,0.556274,0.772727,0.768596
40,0.2849,0.641637,0.759091,0.754865
50,0.1571,0.668976,0.763636,0.761294
60,0.1209,0.742842,0.759091,0.757255
70,0.0452,0.955932,0.768182,0.768435
80,0.0335,1.026879,0.759091,0.759834
90,0.0324,1.121678,0.740909,0.737396
100,0.009,1.144067,0.759091,0.757715


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/880 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9625,0.776543,0.686364,0.641898
20,0.6025,0.590815,0.772727,0.762792
30,0.4262,0.556274,0.772727,0.768596
40,0.2849,0.641637,0.759091,0.754865
50,0.1571,0.668976,0.763636,0.761294
60,0.1209,0.742842,0.759091,0.757255
70,0.0452,0.955932,0.768182,0.768435
80,0.0335,1.026879,0.759091,0.759834
90,0.0324,1.121678,0.740909,0.737396
100,0.009,1.144067,0.759091,0.757715


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/880 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0206,0.846237,0.631818,0.629997
20,0.684,0.689966,0.704545,0.693234
30,0.5076,0.658861,0.731818,0.734611
40,0.3116,0.647086,0.736364,0.734881
50,0.2463,0.813244,0.75,0.747507
60,0.1512,0.866953,0.736364,0.741643
70,0.0912,0.881348,0.75,0.755139
80,0.0379,1.135765,0.731818,0.734595
90,0.0707,1.236455,0.727273,0.727098
100,0.0349,1.210595,0.759091,0.760921


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/880 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0206,0.846237,0.631818,0.629997
20,0.684,0.689966,0.704545,0.693234
30,0.5076,0.658861,0.731818,0.734611
40,0.3116,0.647086,0.736364,0.734881
50,0.2463,0.813244,0.75,0.747507
60,0.1512,0.866953,0.736364,0.741643
70,0.0912,0.881348,0.75,0.755139
80,0.0379,1.135765,0.731818,0.734595
90,0.0707,1.236455,0.727273,0.727098
100,0.0349,1.210595,0.759091,0.760921


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/880 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,1.0206,0.846237,0.631818,0.629997
20,0.684,0.689966,0.704545,0.693234
30,0.5076,0.658861,0.731818,0.734611
40,0.3116,0.647086,0.736364,0.734881
50,0.2463,0.813244,0.75,0.747507
60,0.1512,0.866953,0.736364,0.741643
70,0.0912,0.881348,0.75,0.755139
80,0.0379,1.135765,0.731818,0.734595
90,0.0707,1.236455,0.727273,0.727098
100,0.0349,1.210595,0.759091,0.760921


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/880 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9522,0.73563,0.681818,0.662158
20,0.607,0.599245,0.75,0.747587
30,0.3709,0.568827,0.759091,0.756312
40,0.1798,0.73942,0.745455,0.735324
50,0.1072,0.802928,0.786364,0.786909
60,0.0765,0.880606,0.759091,0.759261
70,0.0348,0.986144,0.759091,0.741189
80,0.0174,1.102146,0.759091,0.759009
90,0.0454,1.289267,0.740909,0.722287
100,0.0149,1.228429,0.781818,0.777055


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/880 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9522,0.73563,0.681818,0.662158
20,0.607,0.599245,0.75,0.747587
30,0.3709,0.568827,0.759091,0.756312
40,0.1798,0.73942,0.745455,0.735324
50,0.1072,0.802928,0.786364,0.786909
60,0.0765,0.880606,0.759091,0.759261
70,0.0348,0.986144,0.759091,0.741189
80,0.0174,1.102146,0.759091,0.759009
90,0.0454,1.289267,0.740909,0.722287
100,0.0149,1.228429,0.781818,0.777055


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/880 [00:00<?, ? examples/s]

Map:   0%|          | 0/220 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9522,0.73563,0.681818,0.662158
20,0.607,0.599245,0.75,0.747587
30,0.3709,0.568827,0.759091,0.756312
40,0.1798,0.73942,0.745455,0.735324
50,0.1072,0.802928,0.786364,0.786909
60,0.0765,0.880606,0.759091,0.759261
70,0.0348,0.986144,0.759091,0.741189
80,0.0174,1.102146,0.759091,0.759009
90,0.0454,1.289267,0.740909,0.722287
100,0.0149,1.228429,0.781818,0.777055


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.759091,0.760921
3,aubmindlab/bert-base-arabertv02-twitter,0.790909,0.78732
4,qarib/bert-base-qarib,0.790909,0.791055
