In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)

fname = 'OMCD_3'
log_file = fname + '.txt'

with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


df = pd.read_csv('datasets/OMCD/train.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3
df_test = pd.read_csv('datasets/OMCD/test.csv', encoding='utf-8', engine='python') #, quotechar="'"  , quoting=3


      
display(df.columns)
display(df_test.columns)
display(df[:4])



classes = set(df['off'].values)
display(classes)

c = df['off'].value_counts()
display(c)
c = df_test['off'].value_counts()
display(c)



df['off'] = df['off'].astype('category')
df['label'] = df['off'].cat.codes


df_test['off'] = df_test['off'].astype('category')
df_test['label'] = df_test['off'].cat.codes

df = df[['comment', 'label']]
df_test = df_test[['comment', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))
display(len(df_test))


ds_t = Dataset.from_pandas(df)
ds_v = Dataset.from_pandas(df_test)

display(ds_t)
display(ds_v)

max_sequence_length = 128


models = [ 
      'SI2M-Lab/DarijaBERT-mix',
    'SI2M-Lab/DarijaBERT-arabizi'
            
]


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds_t
        dataset_validation = ds_v                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['comment'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 20
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 64
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 60, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 60
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv(f'{fname}.csv')
display(best_results)



2024-09-18 16:58:15.604964: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-18 16:58:15.628689: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['Unnamed: 0', 'comment', 'off'], dtype='object')

Index(['Unnamed: 0', 'comment', 'off'], dtype='object')

Unnamed: 0.1,Unnamed: 0,comment,off
0,2908,فنانين الكبت والفساد .عقلية جنسية لا غير. العفن وليس الفن. شعب فيه اغلبية مستعدة للجنس وعندها قابلية .,1
1,1055,الدعارة هربت منها في المحمدية و سكنت في بوزنيقة و هي بحال بحال موجودة في كل المدن و السبب ملكنا زامل و الناس فقراء بالزاف,1
2,181,كون غير خريتي و مدرتيش هادشي,1
3,4313,لا حول ولا قوة الا بالله العلي العظيم لا حول ولا قوة الا بالله العلي العظيم. استغفرالله العظيم واتوب اليه. مساكن الوالدين هما اللي في الواجهة. شعب لا يرحم,0


{0, 1}

off
1    3416
0    3003
Name: count, dtype: int64

off
1    888
0    717
Name: count, dtype: int64

2

6419

1605

Dataset({
    features: ['comment', 'label'],
    num_rows: 6419
})

Dataset({
    features: ['comment', 'label'],
    num_rows: 1605
})

SI2M-Lab/DarijaBERT-mix, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4517,0.390812,0.814953,0.814925
120,0.3367,0.360388,0.854829,0.853259
180,0.2682,0.318082,0.866044,0.865075
240,0.1834,0.37372,0.861059,0.8597
300,0.1395,0.423366,0.85919,0.858246
360,0.0782,0.532566,0.857321,0.856227
420,0.0731,0.663134,0.858567,0.858313
480,0.0677,0.740566,0.845483,0.84548
540,0.0518,0.598342,0.868536,0.866111
600,0.0406,0.766944,0.849844,0.849109


SI2M-Lab/DarijaBERT-mix, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4736,0.388314,0.828037,0.827681
120,0.3377,0.387708,0.846729,0.846647
180,0.2682,0.327357,0.862305,0.860177
240,0.1839,0.389516,0.862928,0.862119
300,0.1411,0.376708,0.872897,0.872241
360,0.0811,0.479805,0.864174,0.863178
420,0.0735,0.604715,0.869782,0.869145
480,0.0573,0.670882,0.868536,0.867528
540,0.0401,0.647301,0.862928,0.860655
600,0.0468,0.557204,0.860436,0.858757


SI2M-Lab/DarijaBERT-mix, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-mix and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.4736,0.388314,0.828037,0.827681
120,0.3377,0.387708,0.846729,0.846647
180,0.2682,0.327357,0.862305,0.860177
240,0.1839,0.389516,0.862928,0.862119
300,0.1411,0.376708,0.872897,0.872241
360,0.0811,0.479805,0.864174,0.863178
420,0.0735,0.604715,0.869782,0.869145
480,0.0573,0.670882,0.868536,0.867528
540,0.0401,0.647301,0.862928,0.860655
600,0.0468,0.557204,0.860436,0.858757


SI2M-Lab/DarijaBERT-arabizi, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.497,0.414307,0.803738,0.803034
120,0.367,0.441514,0.819315,0.818477
180,0.2576,0.419557,0.829907,0.827058
240,0.171,0.573303,0.823053,0.821677
300,0.1266,0.550487,0.834268,0.833257
360,0.0832,0.622583,0.813084,0.812607
420,0.0662,0.71075,0.831776,0.829203
480,0.0483,0.909449,0.82866,0.827729
540,0.0506,0.883048,0.83053,0.828537
600,0.0545,0.865695,0.812461,0.812442


SI2M-Lab/DarijaBERT-arabizi, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.497,0.414307,0.803738,0.803034
120,0.367,0.441514,0.819315,0.818477
180,0.2576,0.419557,0.829907,0.827058
240,0.171,0.573303,0.823053,0.821677
300,0.1266,0.550487,0.834268,0.833257
360,0.0832,0.622583,0.813084,0.812607
420,0.0662,0.71075,0.831776,0.829203
480,0.0483,0.909449,0.82866,0.827729
540,0.0506,0.883048,0.83053,0.828537
600,0.0545,0.865695,0.812461,0.812442


SI2M-Lab/DarijaBERT-arabizi, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at SI2M-Lab/DarijaBERT-arabizi and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6419 [00:00<?, ? examples/s]

Map:   0%|          | 0/1605 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
60,0.497,0.414307,0.803738,0.803034
120,0.367,0.441514,0.819315,0.818477
180,0.2576,0.419557,0.829907,0.827058
240,0.171,0.573303,0.823053,0.821677
300,0.1266,0.550487,0.834268,0.833257
360,0.0832,0.622583,0.813084,0.812607
420,0.0662,0.71075,0.831776,0.829203
480,0.0483,0.909449,0.82866,0.827729
540,0.0506,0.883048,0.83053,0.828537
600,0.0545,0.865695,0.812461,0.812442


Unnamed: 0,Model,Accuracy,F1
0,SI2M-Lab/DarijaBERT-arabizi,0.836137,0.833708
3,SI2M-Lab/DarijaBERT-mix,0.872897,0.872241
