In [1]:
# cell-1  
#load and clean the data (removing diacritics and unwanted text)

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'MNADv2.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')

df = pd.read_csv('MNADv2/MNADv2.csv')
df.fillna('', inplace=True)


df['Category'] = df['Category'].astype('category')
# display(dfc['meter'].unique())

df['label'] = df['Category'].cat.codes #assign cat_value for each meter type
# dftrain, dftest = train_test_split(df, test_size=0.20, random_state=42, stratify=df['label'])
# ytrain = dftrain['label'].values.tolist()
# ytest = dftest['label'].values.tolist()



max_sequence_length = 128
train_batch_size = 128
classes_num = len(df['Category'].unique())
df = df[['Body', 'label']]
display(classes_num)
# display(len(df))
# display(len(dftrain))
# display(len(dftest))

# dftrain = dftrain[['Body']]
# dftest = dftest[['Body']]
# df = ''

# display(df[:2])

ds = Dataset.from_pandas(df)
ds = ds.train_test_split(test_size=0.2)



models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['Body'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 7
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 1000, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 1000
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results.to_csv('MNADv2_results.csv')
display(best_results)

2024-02-12 17:12:26.765973: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-12 17:12:26.791231: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


20

faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/855591 [00:00<?, ? examples/s]

Map:   0%|          | 0/213898 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
1000,0.7153,0.607891,0.784299,0.668462
2000,0.5952,0.560654,0.798245,0.69726
3000,0.5632,0.541652,0.803579,0.71005
4000,0.5124,0.53005,0.810087,0.719087
5000,0.4908,0.525943,0.810082,0.725743
6000,0.4892,0.51645,0.812743,0.725677
7000,0.4626,0.533392,0.80897,0.727631
8000,0.4086,0.54568,0.808469,0.725988
9000,0.412,0.532448,0.812476,0.728544
10000,0.4106,0.528056,0.813514,0.734117


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/855591 [00:00<?, ? examples/s]

Map:   0%|          | 0/213898 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
1000,0.7044,0.597834,0.788726,0.672371
2000,0.589,0.555061,0.800391,0.701494
3000,0.5579,0.539297,0.803757,0.714249
4000,0.5049,0.530882,0.809778,0.720448
5000,0.4823,0.521116,0.812228,0.727563
6000,0.4844,0.514516,0.813009,0.726605
7000,0.4569,0.526329,0.809802,0.734367
8000,0.4107,0.53752,0.81105,0.728391
9000,0.4137,0.531252,0.811639,0.724944
10000,0.4142,0.521816,0.814276,0.732429


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/855591 [00:00<?, ? examples/s]

Map:   0%|          | 0/213898 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
1000,0.7044,0.597834,0.788726,0.672371
2000,0.589,0.555061,0.800391,0.701494
3000,0.5579,0.539297,0.803757,0.714249
4000,0.5049,0.530882,0.809778,0.720448
5000,0.4823,0.521116,0.812228,0.727563
6000,0.4844,0.514516,0.813009,0.726605
7000,0.4569,0.526329,0.809802,0.734367
8000,0.4107,0.53752,0.81105,0.728391
9000,0.4137,0.531252,0.811639,0.724944
10000,0.4142,0.521816,0.814276,0.732429


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/855591 [00:00<?, ? examples/s]

Map:   0%|          | 0/213898 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
1000,0.7188,0.614298,0.780779,0.65468
2000,0.6117,0.582437,0.787689,0.67971
3000,0.5806,0.557771,0.797497,0.694344
4000,0.5339,0.543953,0.803374,0.706749
5000,0.5142,0.538505,0.806211,0.713974
6000,0.5115,0.529191,0.807212,0.718118
7000,0.4856,0.525138,0.810092,0.727541
8000,0.4421,0.532561,0.809362,0.728333
9000,0.4443,0.530362,0.810124,0.721733
10000,0.4432,0.523239,0.811663,0.729814


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/855591 [00:00<?, ? examples/s]

Map:   0%|          | 0/213898 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
1000,0.7188,0.614298,0.780779,0.65468
2000,0.6117,0.582437,0.787689,0.67971
3000,0.5806,0.557771,0.797497,0.694344
4000,0.5339,0.543953,0.803374,0.706749
5000,0.5142,0.538505,0.806211,0.713974
6000,0.5115,0.529191,0.807212,0.718118
7000,0.4856,0.525138,0.810092,0.727541
8000,0.4421,0.532561,0.809362,0.728333
9000,0.4443,0.530362,0.810124,0.721733
10000,0.4432,0.523239,0.811663,0.729814


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/855591 [00:00<?, ? examples/s]

Map:   0%|          | 0/213898 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
1000,0.7188,0.614298,0.780779,0.65468
2000,0.6117,0.582437,0.787689,0.67971
3000,0.5806,0.557771,0.797497,0.694344
4000,0.5339,0.543953,0.803374,0.706749
5000,0.5142,0.538505,0.806211,0.713974
6000,0.5115,0.529191,0.807212,0.718118
7000,0.4856,0.525138,0.810092,0.727541
8000,0.4421,0.532561,0.809362,0.728333
9000,0.4443,0.530362,0.810124,0.721733
10000,0.4432,0.523239,0.811663,0.729814


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/855591 [00:00<?, ? examples/s]

Map:   0%|          | 0/213898 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
1000,0.691,0.590281,0.791298,0.682653
2000,0.5792,0.543536,0.804411,0.709508
3000,0.5487,0.527571,0.808077,0.72547
4000,0.4929,0.518878,0.812939,0.732112
5000,0.4702,0.5127,0.815169,0.740451
6000,0.4692,0.505807,0.816478,0.740602
7000,0.4366,0.524987,0.814295,0.741304
8000,0.3786,0.530692,0.814052,0.742296
9000,0.3833,0.528067,0.814659,0.741613
10000,0.3827,0.526882,0.816244,0.7461


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/855591 [00:00<?, ? examples/s]

Map:   0%|          | 0/213898 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
1000,0.691,0.590281,0.791298,0.682653
2000,0.5792,0.543536,0.804411,0.709508
3000,0.5487,0.527571,0.808077,0.72547
4000,0.4929,0.518878,0.812939,0.732112
5000,0.4702,0.5127,0.815169,0.740451
6000,0.4692,0.505807,0.816478,0.740602
7000,0.4366,0.524987,0.814295,0.741304
8000,0.3786,0.530692,0.814052,0.742296
9000,0.3833,0.528067,0.814659,0.741613
10000,0.3827,0.526882,0.816244,0.7461


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/855591 [00:00<?, ? examples/s]

Map:   0%|          | 0/213898 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
1000,0.691,0.590281,0.791298,0.682653
2000,0.5792,0.543536,0.804411,0.709508
3000,0.5487,0.527571,0.808077,0.72547
4000,0.4929,0.518878,0.812939,0.732112
5000,0.4702,0.5127,0.815169,0.740451
6000,0.4692,0.505807,0.816478,0.740602
7000,0.4366,0.524987,0.814295,0.741304
8000,0.3786,0.530692,0.814052,0.742296
9000,0.3833,0.528067,0.814659,0.741613
10000,0.3827,0.526882,0.816244,0.7461


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.816244,0.7461
1,faisalq/bert-base-arabic-bbpe,0.816244,0.7461
2,faisalq/bert-base-arabic-bbpe,0.816244,0.7461
3,faisalq/bert-base-arabic-senpiece,0.81069,0.731307
4,faisalq/bert-base-arabic-senpiece,0.81069,0.731307
5,faisalq/bert-base-arabic-senpiece,0.81069,0.731307
6,faisalq/bert-base-arabic-wordpiece,0.809802,0.734367
7,faisalq/bert-base-arabic-wordpiece,0.809802,0.734367
