In [1]:


import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'ASTD.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')



# ds = load_dataset('hard')

df = pd.read_csv('ASTD/Tweets.txt', encoding='utf-8', engine='python', sep='\t', quoting=3,
                names = ['text', 'category'], header=None) #, quotechar='"'
# df = pd.read_csv('HARD/unbalanced-reviews.txt', encoding='utf-16', engine='python', sep='\t') #, quotechar="'"
display(df.columns)

df['category'] = df['category'].astype('category')
df['label'] = df['category'].cat.codes
classes_num = df['label'].unique()
classes_num = len(classes_num)
display(classes_num)
display(len(df))

display(df[:4])

df = df[['text', 'label']]  # a column of type 'category' will cause an issue when converted to a ds

ds = Dataset.from_pandas(df)

# ds = ds['train']
ds = ds.train_test_split(test_size=0.2)
display(ds)
df = ''

# return

max_sequence_length = 128

# classes_num = 6
# display(classes_num)
# display(ds)


models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 10
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 10, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 10
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('ASTD_results.csv')
display(best_results)



2024-02-18 06:32:32.600968: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-18 06:32:32.624108: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Index(['text', 'category'], dtype='object')

4

10006

Unnamed: 0,text,category,label
0,بعد استقالة رئيس #المحكمة_الدستورية ننتظر استقالة #رئيس_القضاء #السودان,OBJ,2
1,أهنئ الدكتور أحمد جمال الدين، القيادي بحزب مصر، بمناسبة صدور أولى روايته,POS,3
2,البرادعي يستقوى بامريكا مرةاخرى و يرسل عصام العريان الي واشنطن شئ مقرف,NEG,0
3,#الحرية_والعدالة | شاهد الآن: #ليلة_الاتحادية أول فيلم استقصائي يتناول أسرار و كواليس تعرض لأول مرة حول حقيقة,OBJ,2


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8004
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2002
    })
})

faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8004 [00:00<?, ? examples/s]

Map:   0%|          | 0/2002 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.991,0.841394,0.674825,0.215323
20,0.8483,0.749325,0.71978,0.432179
30,0.7434,0.717856,0.731269,0.479447
40,0.6673,0.694445,0.740759,0.486291
50,0.6053,0.72235,0.742757,0.531228
60,0.6394,0.709898,0.73976,0.530932
70,0.543,0.715369,0.742757,0.556818
80,0.4834,0.766504,0.738761,0.554615
90,0.4444,0.771547,0.732268,0.550231
100,0.4786,0.760534,0.735265,0.534131


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8004 [00:00<?, ? examples/s]

Map:   0%|          | 0/2002 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.987,0.861659,0.683816,0.237323
20,0.8568,0.769296,0.708791,0.389455
30,0.7442,0.71886,0.725275,0.465404
40,0.6673,0.689588,0.742757,0.486074
50,0.6047,0.719909,0.746753,0.527855
60,0.6486,0.70596,0.745754,0.525821
70,0.5468,0.708095,0.74026,0.546837
80,0.4906,0.748526,0.745255,0.550249
90,0.4498,0.747112,0.739261,0.546425
100,0.4837,0.748474,0.740759,0.539216


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8004 [00:00<?, ? examples/s]

Map:   0%|          | 0/2002 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.987,0.861659,0.683816,0.237323
20,0.8568,0.769296,0.708791,0.389455
30,0.7442,0.71886,0.725275,0.465404
40,0.6673,0.689588,0.742757,0.486074
50,0.6047,0.719909,0.746753,0.527855
60,0.6486,0.70596,0.745754,0.525821
70,0.5468,0.708095,0.74026,0.546837
80,0.4906,0.748526,0.745255,0.550249
90,0.4498,0.747112,0.739261,0.546425
100,0.4837,0.748474,0.740759,0.539216


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8004 [00:00<?, ? examples/s]

Map:   0%|          | 0/2002 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9888,0.827304,0.683317,0.244241
20,0.8533,0.730856,0.728272,0.414446
30,0.7284,0.701294,0.740759,0.504054
40,0.6447,0.700973,0.75025,0.503611
50,0.6183,0.701538,0.749251,0.543411
60,0.6293,0.687927,0.750749,0.567338
70,0.5396,0.700319,0.754246,0.574064
80,0.4818,0.738506,0.753247,0.559277
90,0.4553,0.741241,0.75025,0.550913
100,0.4626,0.744361,0.744755,0.552963


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8004 [00:00<?, ? examples/s]

Map:   0%|          | 0/2002 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9888,0.827304,0.683317,0.244241
20,0.8533,0.730856,0.728272,0.414446
30,0.7284,0.701294,0.740759,0.504054
40,0.6447,0.700973,0.75025,0.503611
50,0.6183,0.701538,0.749251,0.543411
60,0.6293,0.687927,0.750749,0.567338
70,0.5396,0.700319,0.754246,0.574064
80,0.4818,0.738506,0.753247,0.559277
90,0.4553,0.741241,0.75025,0.550913
100,0.4626,0.744361,0.744755,0.552963


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8004 [00:00<?, ? examples/s]

Map:   0%|          | 0/2002 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9888,0.827304,0.683317,0.244241
20,0.8533,0.730856,0.728272,0.414446
30,0.7284,0.701294,0.740759,0.504054
40,0.6447,0.700973,0.75025,0.503611
50,0.6183,0.701538,0.749251,0.543411
60,0.6293,0.687927,0.750749,0.567338
70,0.5396,0.700319,0.754246,0.574064
80,0.4818,0.738506,0.753247,0.559277
90,0.4553,0.741241,0.75025,0.550913
100,0.4626,0.744361,0.744755,0.552963


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8004 [00:00<?, ? examples/s]

Map:   0%|          | 0/2002 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9729,0.823924,0.694306,0.272521
20,0.8355,0.729494,0.733267,0.429913
30,0.7249,0.708982,0.735265,0.478874
40,0.6426,0.693436,0.746753,0.495441
50,0.6068,0.69707,0.747253,0.546864
60,0.6183,0.693249,0.752248,0.56436
70,0.5289,0.695716,0.755744,0.559493
80,0.471,0.745396,0.752248,0.546294
90,0.4442,0.758396,0.75025,0.550154
100,0.4594,0.745185,0.746254,0.53108


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8004 [00:00<?, ? examples/s]

Map:   0%|          | 0/2002 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9729,0.823924,0.694306,0.272521
20,0.8355,0.729494,0.733267,0.429913
30,0.7249,0.708982,0.735265,0.478874
40,0.6426,0.693436,0.746753,0.495441
50,0.6068,0.69707,0.747253,0.546864
60,0.6183,0.693249,0.752248,0.56436
70,0.5289,0.695716,0.755744,0.559493
80,0.471,0.745396,0.752248,0.546294
90,0.4442,0.758396,0.75025,0.550154
100,0.4594,0.745185,0.746254,0.53108


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8004 [00:00<?, ? examples/s]

Map:   0%|          | 0/2002 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.9729,0.823924,0.694306,0.272521
20,0.8355,0.729494,0.733267,0.429913
30,0.7249,0.708982,0.735265,0.478874
40,0.6426,0.693436,0.746753,0.495441
50,0.6068,0.69707,0.747253,0.546864
60,0.6183,0.693249,0.752248,0.56436
70,0.5289,0.695716,0.755744,0.559493
80,0.471,0.745396,0.752248,0.546294
90,0.4442,0.758396,0.75025,0.550154
100,0.4594,0.745185,0.746254,0.53108


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.752248,0.56436
3,faisalq/bert-base-arabic-senpiece,0.754246,0.574064
6,faisalq/bert-base-arabic-wordpiece,0.742757,0.556818
