In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)

fname = 'OMCD_1'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/OMCD/train.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
df_test = pd.read_csv('datasets/OMCD/test.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3


      
display(df.columns)
display(df_test.columns)
display(df[:4])



classes = set(df['off'].values)
display(classes)

df['off'] = df['off'].astype('category')
df['label'] = df['off'].cat.codes


df_test['off'] = df_test['off'].astype('category')
df_test['label'] = df_test['off'].cat.codes

df = df[['comment', 'label']]
df_test = df_test[['comment', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))
display(len(df_test))


ds_t = Dataset.from_pandas(df)
ds_v = Dataset.from_pandas(df_test)

display(ds_t)
display(ds_v)

max_sequence_length = 128



models = [ 
        'aubmindlab/bert-base-arabertv02-twitter',
        'CAMeL-Lab/bert-base-arabic-camelbert-da',
        'qarib/bert-base-qarib', 
        'UBC-NLP/MARBERT',
        'UBC-NLP/MARBERTv2',
]





for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds_t
        dataset_validation = ds_v                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['comment'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 20
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 60, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 60
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



Index(['Unnamed: 0', 'comment', 'off'], dtype='object')

Index(['Unnamed: 0', 'comment', 'off'], dtype='object')

Unnamed: 0.1,Unnamed: 0,comment,off
0,2908,فنانين الكبت والفساد .عقلية جنسية لا غير. العفن وليس الفن. شعب فيه اغلبية مستعدة للجنس وعندها قابلية .,1
1,1055,الدعارة هربت منها في المحمدية و سكنت في بوزنيقة و هي بحال بحال موجودة في كل المدن و السبب ملكنا زامل و الناس فقراء بالزاف,1
2,181,كون غير خريتي و مدرتيش هادشي,1
3,4313,لا حول ولا قوة الا بالله العلي العظيم لا حول ولا قوة الا بالله العلي العظيم. استغفرالله العظيم واتوب اليه. مساكن الوالدين هما اللي في الواجهة. شعب لا يرحم,0


{0, 1}

2

6419

1605

Dataset({
    features: ['comment', 'label'],
    num_rows: 6419
})

Dataset({
    features: ['comment', 'label'],
    num_rows: 1605
})

aubmindlab/bert-base-arabertv02-twitter, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4741,0.355572,0.831153,0.828904
120,0.3292,0.390318,0.854206,0.850551
180,0.2294,0.352683,0.847352,0.846078
240,0.1513,0.521985,0.838629,0.838165
300,0.1199,0.452485,0.848598,0.846921
360,0.0699,0.578867,0.851713,0.849803
420,0.0543,0.679837,0.85109,0.849882
480,0.0393,0.660356,0.846106,0.844786
540,0.0518,0.74172,0.854206,0.851642
600,0.0316,0.750985,0.838629,0.83766


aubmindlab/bert-base-arabertv02-twitter, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.472,0.355592,0.839875,0.838426
120,0.3273,0.410306,0.85109,0.847388
180,0.2279,0.397928,0.839875,0.839174
240,0.1499,0.54824,0.82243,0.822111
300,0.1226,0.43504,0.850467,0.848985
360,0.0659,0.581865,0.847352,0.846912
420,0.0613,0.683487,0.843614,0.842161
480,0.0441,0.66871,0.85296,0.851311
540,0.0412,0.700836,0.842368,0.839831
600,0.0312,0.766585,0.84486,0.843601


aubmindlab/bert-base-arabertv02-twitter, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at aubmindlab/bert-base-arabertv02-twitter and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.472,0.355592,0.839875,0.838426
120,0.3273,0.410306,0.85109,0.847388
180,0.2279,0.397928,0.839875,0.839174
240,0.1499,0.54824,0.82243,0.822111
300,0.1226,0.43504,0.850467,0.848985
360,0.0659,0.581865,0.847352,0.846912
420,0.0613,0.683487,0.843614,0.842161
480,0.0441,0.66871,0.85296,0.851311
540,0.0412,0.700836,0.842368,0.839831
600,0.0312,0.766585,0.84486,0.843601


CAMeL-Lab/bert-base-arabic-camelbert-da, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4956,0.41161,0.8081,0.806564
120,0.3597,0.476689,0.82866,0.8244
180,0.2424,0.392218,0.82866,0.826277
240,0.1766,0.539481,0.828037,0.825968
300,0.1343,0.593891,0.820561,0.819982
360,0.0929,0.632952,0.827414,0.82458
420,0.0651,0.852692,0.824922,0.822161
480,0.0611,0.876412,0.827414,0.825265
540,0.0471,0.806885,0.832399,0.829914
600,0.0388,0.801617,0.819938,0.818308


CAMeL-Lab/bert-base-arabic-camelbert-da, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4956,0.41161,0.8081,0.806564
120,0.3597,0.476689,0.82866,0.8244
180,0.2424,0.392218,0.82866,0.826277
240,0.1766,0.539481,0.828037,0.825968
300,0.1343,0.593891,0.820561,0.819982
360,0.0929,0.632952,0.827414,0.82458
420,0.0651,0.852692,0.824922,0.822161
480,0.0611,0.876412,0.827414,0.825265
540,0.0471,0.806885,0.832399,0.829914
600,0.0388,0.801617,0.819938,0.818308


CAMeL-Lab/bert-base-arabic-camelbert-da, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4956,0.41161,0.8081,0.806564
120,0.3597,0.476689,0.82866,0.8244
180,0.2424,0.392218,0.82866,0.826277
240,0.1766,0.539481,0.828037,0.825968
300,0.1343,0.593891,0.820561,0.819982
360,0.0929,0.632952,0.827414,0.82458
420,0.0651,0.852692,0.824922,0.822161
480,0.0611,0.876412,0.827414,0.825265
540,0.0471,0.806885,0.832399,0.829914
600,0.0388,0.801617,0.819938,0.818308


qarib/bert-base-qarib, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4687,0.35947,0.832399,0.830922
120,0.3254,0.432746,0.839252,0.837892
180,0.1785,0.439517,0.841745,0.838629
240,0.1358,0.510566,0.842368,0.841481
300,0.0935,0.56104,0.839875,0.839553
360,0.0571,0.616783,0.846106,0.84509
420,0.0481,0.739965,0.843614,0.842161
480,0.0492,0.688089,0.839252,0.836992
540,0.0425,0.88134,0.83676,0.83293
600,0.0259,1.044324,0.834268,0.831519


qarib/bert-base-qarib, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4687,0.35947,0.832399,0.830922
120,0.3254,0.432746,0.839252,0.837892
180,0.1785,0.439517,0.841745,0.838629
240,0.1358,0.510566,0.842368,0.841481
300,0.0935,0.56104,0.839875,0.839553
360,0.0571,0.616783,0.846106,0.84509
420,0.0481,0.739965,0.843614,0.842161
480,0.0492,0.688089,0.839252,0.836992
540,0.0425,0.88134,0.83676,0.83293
600,0.0259,1.044324,0.834268,0.831519


qarib/bert-base-qarib, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4687,0.35947,0.832399,0.830922
120,0.3254,0.432746,0.839252,0.837892
180,0.1785,0.439517,0.841745,0.838629
240,0.1358,0.510566,0.842368,0.841481
300,0.0935,0.56104,0.839875,0.839553
360,0.0571,0.616783,0.846106,0.84509
420,0.0481,0.739965,0.843614,0.842161
480,0.0492,0.688089,0.839252,0.836992
540,0.0425,0.88134,0.83676,0.83293
600,0.0259,1.044324,0.834268,0.831519


UBC-NLP/MARBERT, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4946,0.373621,0.835514,0.831046
120,0.3463,0.482709,0.826168,0.818201
180,0.2016,0.419521,0.842368,0.839179
240,0.1492,0.595966,0.835514,0.832786
300,0.1125,0.556316,0.83676,0.83621
360,0.0589,0.671152,0.834268,0.832788
420,0.0864,0.927175,0.812461,0.812461
480,0.0554,0.863668,0.816199,0.816195
540,0.045,1.043578,0.821184,0.814439
600,0.0472,1.108412,0.818692,0.81817


UBC-NLP/MARBERT, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4946,0.373621,0.835514,0.831046
120,0.3463,0.482709,0.826168,0.818201
180,0.2016,0.419521,0.842368,0.839179
240,0.1492,0.595966,0.835514,0.832786
300,0.1125,0.556316,0.83676,0.83621
360,0.0589,0.671152,0.834268,0.832788
420,0.0864,0.927175,0.812461,0.812461
480,0.0554,0.863668,0.816199,0.816195
540,0.045,1.043578,0.821184,0.814439
600,0.0472,1.108412,0.818692,0.81817


UBC-NLP/MARBERT, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4946,0.373621,0.835514,0.831046
120,0.3463,0.482709,0.826168,0.818201
180,0.2016,0.419521,0.842368,0.839179
240,0.1492,0.595966,0.835514,0.832786
300,0.1125,0.556316,0.83676,0.83621
360,0.0589,0.671152,0.834268,0.832788
420,0.0864,0.927175,0.812461,0.812461
480,0.0554,0.863668,0.816199,0.816195
540,0.045,1.043578,0.821184,0.814439
600,0.0472,1.108412,0.818692,0.81817


UBC-NLP/MARBERTv2, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4845,0.360196,0.841745,0.839471
120,0.3415,0.449397,0.847352,0.845896
180,0.2303,0.401768,0.854206,0.853072
240,0.1518,0.539415,0.831776,0.831606
300,0.1118,0.492254,0.850467,0.84871
360,0.0691,0.621607,0.85296,0.852174
420,0.0817,0.770473,0.833022,0.827018
480,0.0636,0.708961,0.857321,0.856289
540,0.0531,0.646465,0.849221,0.848115
600,0.0454,0.832403,0.846729,0.844527


UBC-NLP/MARBERTv2, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4845,0.360196,0.841745,0.839471
120,0.3415,0.449397,0.847352,0.845896
180,0.2303,0.401768,0.854206,0.853072
240,0.1518,0.539415,0.831776,0.831606
300,0.1118,0.492254,0.850467,0.84871
360,0.0691,0.621607,0.85296,0.852174
420,0.0817,0.770473,0.833022,0.827018
480,0.0636,0.708961,0.857321,0.856289
540,0.0531,0.646465,0.849221,0.848115
600,0.0454,0.832403,0.846729,0.844527


UBC-NLP/MARBERTv2, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERTv2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4845,0.360196,0.841745,0.839471
120,0.3415,0.449397,0.847352,0.845896
180,0.2303,0.401768,0.854206,0.853072
240,0.1518,0.539415,0.831776,0.831606
300,0.1118,0.492254,0.850467,0.84871
360,0.0691,0.621607,0.85296,0.852174
420,0.0817,0.770473,0.833022,0.827018
480,0.0636,0.708961,0.857321,0.856289
540,0.0531,0.646465,0.849221,0.848115
600,0.0454,0.832403,0.846729,0.844527


Unnamed: 0,Model,Accuracy,F1
0,CAMeL-Lab/bert-base-arabic-camelbert-da,0.832399,0.829914
3,UBC-NLP/MARBERT,0.842368,0.839179
6,UBC-NLP/MARBERTv2,0.861682,0.86038
9,aubmindlab/bert-base-arabertv02-twitter,0.85919,0.857723
10,qarib/bert-base-qarib,0.846106,0.84509
