In [1]:


import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'AnsStance.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')


ds = load_dataset('strombergnlp/ans-stance')  


ds_t = concatenate_datasets([ds['train'], ds['test']])
ds_v = ds['validation']

ds_t = ds_t.rename_column('stance', 'label')
ds_v = ds_v.rename_column('stance', 'label')

display(ds_t)
display(ds_v)

unique_labels = set(ds_t['label'])
classes_num = len(unique_labels)

print(f'Unique Labels: {unique_labels}')
print(f'Number of Classes: {classes_num}')

# return

max_sequence_length = 128



models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds_t
        dataset_validation = ds_v                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['s1'], examples['s2'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 30
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 10, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 10
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('AnsStance_results.csv')
display(best_results)



2024-02-21 11:59:43.391734: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-21 11:59:43.414785: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Found cached dataset ans-stance (/home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5)


  0%|          | 0/3 [00:00<?, ?it/s]

Dataset({
    features: ['id', 's1', 's2', 'label'],
    num_rows: 3031
})

Dataset({
    features: ['id', 's1', 's2', 'label'],
    num_rows: 755
})

Unique Labels: {0, 1, 2}
Number of Classes: 3
faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3031 [00:00<?, ? examples/s]

Map:   0%|          | 0/755 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.814,0.735253,0.623841,0.256117
20,0.7393,0.712776,0.627815,0.281405
30,0.7226,0.680102,0.687417,0.420405
40,0.6389,0.678947,0.67947,0.419138
50,0.565,0.674557,0.683444,0.420977
60,0.4709,0.677647,0.672848,0.475839
70,0.3665,0.758494,0.666225,0.438663
80,0.335,0.707036,0.692715,0.508911
90,0.2538,0.864679,0.67947,0.454244
100,0.2143,0.789894,0.725828,0.564957


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Loading cached processed dataset at /home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5/cache-55adb6ccb8992872.arrow


Map:   0%|          | 0/755 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8126,0.736964,0.623841,0.256117
20,0.7469,0.723942,0.623841,0.256117
30,0.7162,0.713743,0.638411,0.314423
40,0.6352,0.64239,0.662252,0.41596
50,0.558,0.742377,0.639735,0.430124
60,0.4788,0.656888,0.692715,0.494889
70,0.3698,0.691349,0.699338,0.553214
80,0.2763,0.875234,0.675497,0.576274
90,0.2511,0.787417,0.725828,0.557441
100,0.1874,0.96363,0.670199,0.516207


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Loading cached processed dataset at /home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5/cache-55adb6ccb8992872.arrow
Loading cached processed dataset at /home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5/cache-7a9148e8c5468da5.arrow


Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8126,0.736964,0.623841,0.256117
20,0.7469,0.723942,0.623841,0.256117
30,0.7162,0.713743,0.638411,0.314423
40,0.6352,0.64239,0.662252,0.41596
50,0.558,0.742377,0.639735,0.430124
60,0.4788,0.656888,0.692715,0.494889
70,0.3698,0.691349,0.699338,0.553214
80,0.2763,0.875234,0.675497,0.576274
90,0.2511,0.787417,0.725828,0.557441
100,0.1874,0.96363,0.670199,0.516207


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3031 [00:00<?, ? examples/s]

Map:   0%|          | 0/755 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8074,0.735759,0.62649,0.266017
20,0.7311,0.697965,0.635762,0.301352
30,0.6788,0.697275,0.662252,0.372707
40,0.5942,0.673372,0.682119,0.389857
50,0.5293,0.598852,0.731126,0.468228
60,0.4608,0.63318,0.733775,0.507652
70,0.3886,0.6631,0.717881,0.607371
80,0.3124,0.748658,0.716556,0.563527
90,0.2465,0.681765,0.743046,0.707815
100,0.2453,0.921532,0.713907,0.588654


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Loading cached processed dataset at /home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5/cache-53cc617169d5ea5c.arrow


Map:   0%|          | 0/755 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8074,0.735759,0.62649,0.266017
20,0.7311,0.697965,0.635762,0.301352
30,0.6788,0.697275,0.662252,0.372707
40,0.5942,0.673372,0.682119,0.389857
50,0.5293,0.598852,0.731126,0.468228
60,0.4608,0.63318,0.733775,0.507652
70,0.3886,0.6631,0.717881,0.607371
80,0.3124,0.748658,0.716556,0.563527
90,0.2465,0.681765,0.743046,0.707815
100,0.2453,0.921532,0.713907,0.588654


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Loading cached processed dataset at /home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5/cache-53cc617169d5ea5c.arrow
Loading cached processed dataset at /home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5/cache-98e1c1d8529a620d.arrow


Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8074,0.735759,0.62649,0.266017
20,0.7311,0.697965,0.635762,0.301352
30,0.6788,0.697275,0.662252,0.372707
40,0.5942,0.673372,0.682119,0.389857
50,0.5293,0.598852,0.731126,0.468228
60,0.4608,0.63318,0.733775,0.507652
70,0.3886,0.6631,0.717881,0.607371
80,0.3124,0.748658,0.716556,0.563527
90,0.2465,0.681765,0.743046,0.707815
100,0.2453,0.921532,0.713907,0.588654


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/3031 [00:00<?, ? examples/s]

Map:   0%|          | 0/755 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7978,0.748564,0.647682,0.373254
20,0.7302,0.70334,0.637086,0.375694
30,0.6717,0.644874,0.696689,0.443538
40,0.534,0.640085,0.695364,0.452458
50,0.3927,0.649534,0.715232,0.513708
60,0.2917,0.729404,0.716556,0.524394
70,0.2133,0.726656,0.750993,0.61233
80,0.1471,0.814779,0.740397,0.676218
90,0.1173,0.781104,0.748344,0.672437
100,0.0888,0.85366,0.757616,0.651408


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Loading cached processed dataset at /home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5/cache-f9b36d17509ff884.arrow


Map:   0%|          | 0/755 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7978,0.748564,0.647682,0.373254
20,0.7302,0.70334,0.637086,0.375694
30,0.6717,0.644874,0.696689,0.443538
40,0.534,0.640085,0.695364,0.452458
50,0.3927,0.649534,0.715232,0.513708
60,0.2917,0.729404,0.716556,0.524394
70,0.2133,0.726656,0.750993,0.61233
80,0.1471,0.814779,0.740397,0.676218
90,0.1173,0.781104,0.748344,0.672437
100,0.0888,0.85366,0.757616,0.651408


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Loading cached processed dataset at /home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5/cache-f9b36d17509ff884.arrow
Loading cached processed dataset at /home/ffq/.cache/huggingface/datasets/strombergnlp___ans-stance/stance/1.0.0/37dfb18c578ac3fd45426c2323c8e4a067f605fc7a08caf5688c6540aa8472c5/cache-c19207dda292f2a1.arrow


Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.7978,0.748564,0.647682,0.373254
20,0.7302,0.70334,0.637086,0.375694
30,0.6717,0.644874,0.696689,0.443538
40,0.534,0.640085,0.695364,0.452458
50,0.3927,0.649534,0.715232,0.513708
60,0.2917,0.729404,0.716556,0.524394
70,0.2133,0.726656,0.750993,0.61233
80,0.1471,0.814779,0.740397,0.676218
90,0.1173,0.781104,0.748344,0.672437
100,0.0888,0.85366,0.757616,0.651408


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.778808,0.734623
3,faisalq/bert-base-arabic-senpiece,0.786755,0.749935
6,faisalq/bert-base-arabic-wordpiece,0.715232,0.654909
