In [1]:


import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

import numpy as np
import tensorflow as tf
import pandas as pd
import pyarabic.araby as araby
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
import torch
from sklearn.metrics import accuracy_score, f1_score
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, BertForSequenceClassification
from datasets import load_dataset, Dataset, concatenate_datasets
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 1000)


log_file = 'Mawqif_stance.txt'
with open(log_file, 'w') as f:
    f.write('Model,Accuracy,F1\n')



ds = load_dataset('NoraAlt/Mawqif_Stance-Detection')
ds = ds['train']
df = ds.to_pandas()


# ['target', 'stance', 'sarcasm']
df = df[['text', 'target', 'stance']]

display(df['target'].unique())

df.fillna('neutral', inplace=True)


df['target'] = df['target'].replace('Women empowerment', 'تمكين المرأة')
df['target'] = df['target'].replace('Covid Vaccine', 'لقاح كوفيد')
df['target'] = df['target'].replace('Digital Transformation', 'التحول الرقمي')
display(df['target'].unique())
display(df['stance'].unique())
df['stance'] = df['stance'].astype('category')

df['label'] = df['stance'].cat.codes
classes = df['stance'].unique()
classes_num = len(classes)
display(classes)
display(classes_num)

df = df[['text', 'target', 'label']]

ds = Dataset.from_pandas(df)

# ds = ds['train']
ds = ds.train_test_split(test_size=0.2)
display(ds)
# df = ''


# return

max_sequence_length = 128

# classes_num = 5
# display(classes_num)
# display(ds)


models = ['faisalq/bert-base-arabic-wordpiece', 'faisalq/bert-base-arabic-senpiece',
          'faisalq/bert-base-arabic-bbpe']


for model_name in models:
    for i in range(3):
        print(f'{model_name}, try:{i}')
              
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = BertForSequenceClassification.from_pretrained(model_name,
                                                              num_labels=classes_num).to('cuda')                                                 
        dataset_train = ds['train']
        dataset_validation = ds['test']                                                     
        
      

        def preprocess_function(examples):
            return tokenizer(examples['text'], examples['target'], truncation=True, padding="max_length",
                            max_length=max_sequence_length)
        
        
        dataset_train = dataset_train.map(preprocess_function, batched=True)
        dataset_validation = dataset_validation.map(preprocess_function, batched=True)
        
       
        
        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            predictions = np.argmax(logits, axis=-1)    
            acc = accuracy_score(labels, predictions)        
            f1 = f1_score(labels, predictions, average='macro')   
            with open(log_file, 'a') as f:
                f.write(f'{model_name},{acc},{f1}\n')
            return {'accuracy': acc, 'f1_score': f1}


        
        
        epochs = 10
        save_steps = 10000 #save checkpoint every 10000 steps
        batch_size = 256
        
        training_args = TrainingArguments(
            output_dir = 'bert/',
            overwrite_output_dir=True,
            num_train_epochs = epochs,
            per_device_train_batch_size = batch_size,
            per_device_eval_batch_size = batch_size,
            save_steps = save_steps,
            save_total_limit = 1, #only save the last 5 checkpoints
            fp16=True,
            learning_rate = 5e-5,  # 5e-5 is the default
            logging_steps = 5, #50_000
            evaluation_strategy = 'steps',
            # evaluate_during_training = True,
            eval_steps = 5
            
        )
        
        trainer = Trainer(
            model = model,
            args = training_args,
            # data_collator=data_collator,
            train_dataset=dataset_train,
            eval_dataset=dataset_validation,
            compute_metrics = compute_metrics
        )
        
        
        # trainer.train(resume_from_checkpoint=True)
        trainer.train()


results = pd.read_csv(log_file)

best_results = results.groupby('Model', as_index=False)['F1'].max()

best_results = pd.merge(best_results, results, on=['Model', 'F1'])
best_results = best_results[['Model', 'Accuracy', 'F1']]
best_results = best_results.drop_duplicates()
best_results.to_csv('Mawqif_stance_results.csv')
display(best_results)



2024-02-19 10:16:28.981887: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-19 10:16:29.007853: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Found cached dataset csv (/home/ffq/.cache/huggingface/datasets/NoraAlt___csv/NoraAlt--Mawqif_Stance-Detection-5af5d638123fb939/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d)


  0%|          | 0/1 [00:00<?, ?it/s]

array(['Women empowerment', 'Covid Vaccine', 'Digital Transformation'],
      dtype=object)

array(['تمكين المرأة', 'لقاح كوفيد', 'التحول الرقمي'], dtype=object)

array(['Against', 'neutral', 'Favor'], dtype=object)

['Against', 'neutral', 'Favor']
Categories (3, object): ['Against', 'Favor', 'neutral']

3

DatasetDict({
    train: Dataset({
        features: ['text', 'target', 'label'],
        num_rows: 2801
    })
    test: Dataset({
        features: ['text', 'target', 'label'],
        num_rows: 701
    })
})

faisalq/bert-base-arabic-wordpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2801 [00:00<?, ? examples/s]

Map:   0%|          | 0/701 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,0.901,0.743914,0.659058,0.422755
10,0.6795,0.651334,0.714693,0.491613
15,0.6156,0.620822,0.736091,0.549778
20,0.554,0.609371,0.723252,0.530997
25,0.4847,0.636791,0.741797,0.563882
30,0.4784,0.580556,0.768902,0.665083
35,0.3791,0.600581,0.767475,0.607507
40,0.3348,0.654747,0.757489,0.655027
45,0.3069,0.695563,0.770328,0.628825
50,0.2411,0.714379,0.773181,0.669366


faisalq/bert-base-arabic-wordpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2801 [00:00<?, ? examples/s]

Map:   0%|          | 0/701 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,0.8926,0.724681,0.684736,0.461276
10,0.6775,0.674284,0.706134,0.480913
15,0.6405,0.644421,0.731812,0.515308
20,0.5626,0.611533,0.730385,0.509629
25,0.4873,0.651133,0.731812,0.523042
30,0.4905,0.620795,0.746077,0.585354
35,0.3977,0.621668,0.754636,0.602541
40,0.3622,0.641509,0.760342,0.651937
45,0.3183,0.671607,0.764622,0.647809
50,0.2571,0.737218,0.761769,0.650557


faisalq/bert-base-arabic-wordpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-wordpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2801 [00:00<?, ? examples/s]

Map:   0%|          | 0/701 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,0.8926,0.724681,0.684736,0.461276
10,0.6775,0.674284,0.706134,0.480913
15,0.6405,0.644421,0.731812,0.515308
20,0.5626,0.611533,0.730385,0.509629
25,0.4873,0.651133,0.731812,0.523042
30,0.4905,0.620795,0.746077,0.585354
35,0.3977,0.621668,0.754636,0.602541
40,0.3622,0.641509,0.760342,0.651937
45,0.3183,0.671607,0.764622,0.647809
50,0.2571,0.737218,0.761769,0.650557


faisalq/bert-base-arabic-senpiece, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2801 [00:00<?, ? examples/s]

Map:   0%|          | 0/701 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,0.8887,0.741033,0.693295,0.474202
10,0.67,0.657962,0.714693,0.49158
15,0.612,0.636109,0.728959,0.506695
20,0.5617,0.633668,0.731812,0.513562
25,0.4836,0.603316,0.738944,0.533304
30,0.4583,0.606501,0.751783,0.619673
35,0.3773,0.665998,0.746077,0.585342
40,0.3491,0.646195,0.773181,0.684598
45,0.2992,0.66936,0.774608,0.661124
50,0.2448,0.742802,0.754636,0.641112


faisalq/bert-base-arabic-senpiece, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2801 [00:00<?, ? examples/s]

Map:   0%|          | 0/701 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,0.8887,0.741033,0.693295,0.474202
10,0.67,0.657962,0.714693,0.49158
15,0.612,0.636109,0.728959,0.506695
20,0.5617,0.633668,0.731812,0.513562
25,0.4836,0.603316,0.738944,0.533304
30,0.4583,0.606501,0.751783,0.619673
35,0.3773,0.665998,0.746077,0.585342
40,0.3491,0.646195,0.773181,0.684598
45,0.2992,0.66936,0.774608,0.661124
50,0.2448,0.742802,0.754636,0.641112


faisalq/bert-base-arabic-senpiece, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-senpiece and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2801 [00:00<?, ? examples/s]

Map:   0%|          | 0/701 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,0.8887,0.741033,0.693295,0.474202
10,0.67,0.657962,0.714693,0.49158
15,0.612,0.636109,0.728959,0.506695
20,0.5617,0.633668,0.731812,0.513562
25,0.4836,0.603316,0.738944,0.533304
30,0.4583,0.606501,0.751783,0.619673
35,0.3773,0.665998,0.746077,0.585342
40,0.3491,0.646195,0.773181,0.684598
45,0.2992,0.66936,0.774608,0.661124
50,0.2448,0.742802,0.754636,0.641112


faisalq/bert-base-arabic-bbpe, try:0


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2801 [00:00<?, ? examples/s]

Map:   0%|          | 0/701 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,0.9054,0.753982,0.67475,0.456541
10,0.684,0.683275,0.708987,0.488947
15,0.63,0.654829,0.714693,0.515886
20,0.5724,0.633591,0.720399,0.52521
25,0.4958,0.621555,0.727532,0.584462
30,0.4718,0.662604,0.724679,0.611437
35,0.3968,0.67206,0.727532,0.607115
40,0.3582,0.66126,0.74893,0.645981
45,0.3331,0.722104,0.734665,0.621028
50,0.2628,0.720915,0.74893,0.643588


faisalq/bert-base-arabic-bbpe, try:1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2801 [00:00<?, ? examples/s]

Map:   0%|          | 0/701 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,0.9054,0.753982,0.67475,0.456541
10,0.684,0.683275,0.708987,0.488947
15,0.63,0.654829,0.714693,0.515886
20,0.5724,0.633591,0.720399,0.52521
25,0.4958,0.621555,0.727532,0.584462
30,0.4718,0.662604,0.724679,0.611437
35,0.3968,0.67206,0.727532,0.607115
40,0.3582,0.66126,0.74893,0.645981
45,0.3331,0.722104,0.734665,0.621028
50,0.2628,0.720915,0.74893,0.643588


faisalq/bert-base-arabic-bbpe, try:2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at faisalq/bert-base-arabic-bbpe and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/2801 [00:00<?, ? examples/s]

Map:   0%|          | 0/701 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss,Accuracy,F1 Score
5,0.9054,0.753982,0.67475,0.456541
10,0.684,0.683275,0.708987,0.488947
15,0.63,0.654829,0.714693,0.515886
20,0.5724,0.633591,0.720399,0.52521
25,0.4958,0.621555,0.727532,0.584462
30,0.4718,0.662604,0.724679,0.611437
35,0.3968,0.67206,0.727532,0.607115
40,0.3582,0.66126,0.74893,0.645981
45,0.3331,0.722104,0.734665,0.621028
50,0.2628,0.720915,0.74893,0.643588


Unnamed: 0,Model,Accuracy,F1
0,faisalq/bert-base-arabic-bbpe,0.750357,0.658869
3,faisalq/bert-base-arabic-senpiece,0.773181,0.684598
6,faisalq/bert-base-arabic-wordpiece,0.773181,0.669366
