In [1]:
# cell-1  
#load and clean the data (removing diacritics and unwanted text)

import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'Adult.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')



# ds = load_dataset('hard')

with open('Adult/README.txt', 'r') as file:
    headers = file.readline().strip().split('\t')

# display(headers)

df = pd.read_csv('Adult/adult-train.tsv', encoding='utf-8', engine='python', sep='\t', 
                 quoting=3, header=None) #, quotechar="'"

df2 = pd.read_csv('Adult/adult-test.tsv', encoding='utf-8', engine='python', sep='\t', 
                 quoting=3, header=None)

df3 = pd.read_csv('Adult/adult-dev.tsv', encoding='utf-8', engine='python', sep='\t', 
                 quoting=3, header=None)

df.columns = headers
df2.columns = headers
df3.columns = headers
# display(df.columns)



df_test = pd.concat([df2, df3], axis=0, ignore_index=True)


df = df[['text', 'adult']]
df_test = df_test[['text', 'adult']]

classes = set(df['adult'].values)
display(classes)

df['adult'] = df['adult'].astype('category')
df['label'] = df['adult'].cat.codes


df_test['adult'] = df_test['adult'].astype('category')
df_test['label'] = df_test['adult'].cat.codes

df = df[['text', 'label']]
df_test = df_test[['text', 'label']]
classes_num = len(classes)
display(classes_num)
display(len(df))
# display(len(df2))
# display(len(df3))

# display(df[:4])
# display(df_test[:4])

# return
ds_t = Dataset.from_pandas(df)
ds_v = Dataset.from_pandas(df_test)

# ds = ds['train']
# ds = ds.train_test_split(test_size=0.2)
display(ds_t)
display(ds_v)
df = ''

max_sequence_length = 128

# classes_num = 6
# display(classes_num)
# display(ds)


models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds_t
        dataset_validation = ds_v
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 5
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 50, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 50
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('Adult_results.csv')
display(best_results)



2024-02-20 04:30:40.027509: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-20 04:30:40.053251: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


{'ADULT', 'NOT_ADULT'}

2

35000

Dataset({
    features: ['text', 'label'],
    num_rows: 35000
})

Dataset({
    features: ['text', 'label'],
    num_rows: 15000
})

faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.193,0.126609,0.950867,0.873421
100,0.1346,0.115971,0.956733,0.896095
150,0.1199,0.122195,0.954933,0.89838
200,0.0894,0.1159,0.958067,0.902429
250,0.0895,0.129651,0.956933,0.892471
300,0.0821,0.125011,0.958933,0.902964
350,0.0554,0.130953,0.9594,0.901387
400,0.0612,0.139984,0.959733,0.902943
450,0.0465,0.166225,0.958533,0.899556
500,0.0395,0.167421,0.956733,0.894648


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1882,0.1277,0.9492,0.867232
100,0.1316,0.113184,0.956733,0.895738
150,0.1235,0.116801,0.955733,0.89987
200,0.0885,0.117232,0.958133,0.900261
250,0.088,0.120687,0.959333,0.900756
300,0.0803,0.127299,0.958933,0.902824
350,0.055,0.125583,0.9588,0.901131
400,0.0622,0.134221,0.958467,0.900839
450,0.0465,0.16423,0.9572,0.897541
500,0.0403,0.165828,0.9564,0.892739


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1882,0.1277,0.9492,0.867232
100,0.1316,0.113184,0.956733,0.895738
150,0.1235,0.116801,0.955733,0.89987
200,0.0885,0.117232,0.958133,0.900261
250,0.088,0.120687,0.959333,0.900756
300,0.0803,0.127299,0.958933,0.902824
350,0.055,0.125583,0.9588,0.901131
400,0.0622,0.134221,0.958467,0.900839
450,0.0465,0.16423,0.9572,0.897541
500,0.0403,0.165828,0.9564,0.892739


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1732,0.119858,0.9548,0.893197
100,0.1203,0.113982,0.956667,0.890904
150,0.1138,0.120347,0.9588,0.905134
200,0.0859,0.113664,0.958,0.900806
250,0.0848,0.131916,0.955867,0.88706
300,0.0746,0.152095,0.956133,0.891368
350,0.0506,0.127162,0.959667,0.905308
400,0.057,0.143176,0.959267,0.901891
450,0.0427,0.156519,0.9598,0.902937
500,0.04,0.16885,0.955867,0.889694


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1732,0.119858,0.9548,0.893197
100,0.1203,0.113982,0.956667,0.890904
150,0.1138,0.120347,0.9588,0.905134
200,0.0859,0.113664,0.958,0.900806
250,0.0848,0.131916,0.955867,0.88706
300,0.0746,0.152095,0.956133,0.891368
350,0.0506,0.127162,0.959667,0.905308
400,0.057,0.143176,0.959267,0.901891
450,0.0427,0.156519,0.9598,0.902937
500,0.04,0.16885,0.955867,0.889694


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1732,0.119858,0.9548,0.893197
100,0.1203,0.113982,0.956667,0.890904
150,0.1138,0.120347,0.9588,0.905134
200,0.0859,0.113664,0.958,0.900806
250,0.0848,0.131916,0.955867,0.88706
300,0.0746,0.152095,0.956133,0.891368
350,0.0506,0.127162,0.959667,0.905308
400,0.057,0.143176,0.959267,0.901891
450,0.0427,0.156519,0.9598,0.902937
500,0.04,0.16885,0.955867,0.889694


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1715,0.11982,0.950933,0.876082
100,0.1273,0.111247,0.956667,0.89361
150,0.1131,0.120574,0.957733,0.900272
200,0.0822,0.128293,0.956733,0.891776
250,0.0876,0.12384,0.9554,0.8889
300,0.0786,0.132884,0.9578,0.899443
350,0.0535,0.122987,0.958133,0.900645
400,0.0579,0.140206,0.9574,0.899509
450,0.0424,0.156155,0.9568,0.898985
500,0.04,0.152212,0.956533,0.893923


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1715,0.11982,0.950933,0.876082
100,0.1273,0.111247,0.956667,0.89361
150,0.1131,0.120574,0.957733,0.900272
200,0.0822,0.128293,0.956733,0.891776
250,0.0876,0.12384,0.9554,0.8889
300,0.0786,0.132884,0.9578,0.899443
350,0.0535,0.122987,0.958133,0.900645
400,0.0579,0.140206,0.9574,0.899509
450,0.0424,0.156155,0.9568,0.898985
500,0.04,0.152212,0.956533,0.893923


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
50,0.1715,0.11982,0.950933,0.876082
100,0.1273,0.111247,0.956667,0.89361
150,0.1131,0.120574,0.957733,0.900272
200,0.0822,0.128293,0.956733,0.891776
250,0.0876,0.12384,0.9554,0.8889
300,0.0786,0.132884,0.9578,0.899443
350,0.0535,0.122987,0.958133,0.900645
400,0.0579,0.140206,0.9574,0.899509
450,0.0424,0.156155,0.9568,0.898985
500,0.04,0.152212,0.956533,0.893923


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.958133,0.900645
3,faisalq/bert-base-arabic-senpiece,0.959667,0.905308
6,faisalq/bert-base-arabic-wordpiece,0.958933,0.902964
