In [None]:
import os, time, torch
from transformers import logging as hfl
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import logging, warnings, tensorflow as tf
from huggingface_hub.utils import disable_progress_bars


disable_progress_bars()
start_time = time.time()
hfl.set_verbosity_error()
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel('INFO')
tf.get_logger().setLevel(logging.ERROR)
os.environ['TOKENIZERS_PARALLELISM'] = 'False'
warnings.simplefilter(action='ignore', category=Warning)
warnings.simplefilter(action='ignore', category=FutureWarning)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def load(model, dataset):
    if model == 'bert' and dataset == 'hard':
        from transformers import BertTokenizer, BertForSequenceClassification
        tokenizer = BertTokenizer.from_pretrained('models/bert-base-arabertv2')
        model = BertForSequenceClassification.from_pretrained('models/BERThard', num_labels=4)
        return model, tokenizer

    elif model == 'cnn' and dataset == 'hard':
        import pickle
        from huggingface_hub import from_pretrained_keras
        tokenizer = pickle.load(open('tokenizers/tokenizerCNNhard.pickle', 'rb'))
        model = from_pretrained_keras('models/2dCNNhard')
        return model, tokenizer

    elif model == 'bilstm' and dataset == 'hard':
        import pickle
        from huggingface_hub import from_pretrained_keras
        tokenizer = pickle.load(open('tokenizers/tokenizerbiLSTMhard.pickle', 'rb'))
        model = from_pretrained_keras('models/biLSTMhard')
        return model, tokenizer

    elif model == 'bert' and dataset == 'msda':
        from transformers import BertTokenizer, BertForSequenceClassification
        tokenizer = BertTokenizer.from_pretrained('models/bert-base-arabertv2')
        model = BertForSequenceClassification.from_pretrained('models/BERTmsda', num_labels=3)
        return model, tokenizer

    elif model == 'cnn' and dataset == 'msda':
        import pickle
        from huggingface_hub import from_pretrained_keras
        tokenizer = pickle.load(open('tokenizers/tokenizerCNNmsda.pickle', 'rb'))
        model = from_pretrained_keras('models/2dCNNmsda')
        return model, tokenizer

    elif model == 'bilstm' and dataset == 'msda':
        import pickle
        from huggingface_hub import from_pretrained_keras
        tokenizer = pickle.load(open('tokenizers/tokenizerbiLSTMmsda.pickle', 'rb'))
        model = from_pretrained_keras('models/biLSTMmsda')
        return model, tokenizer

    else:
        print("ERROR: load() function takes 2 arguments: \n  \
        model={bert, cnn, or bilstm}, \n\t  dataset={hard or msda}")



def predict(text, model):
    import numpy as np
    if model == 'bert_hard':
        model, tokenizer = load('bert', 'hard')
        inputs = tokenizer(text, return_tensors="pt", padding=True)
        outputs = model(**inputs).logits
        id2label = {0: 'Poor', 1: 'Fair', 2: 'Good', 3: 'Excellent'}
        predicted_class_id = outputs.argmax().item()
        preds = outputs.softmax(dim=-1).tolist()
        predicted_score = np.max(preds)
        return id2label[predicted_class_id], predicted_score

    elif model == 'cnn_hard':
        import torch, numpy as np, tensorflow as tf
        from keras_preprocessing.sequence import pad_sequences
        model, tokenizer = load('cnn', 'hard')
        inputs = tokenizer.texts_to_sequences([text])
        inputs = pad_sequences(inputs, maxlen=512)
        outputs = torch.from_numpy(model.predict(inputs, verbose=0))
        id2label = {0: 'Poor', 1: 'Fair', 2: 'Good', 3: 'Excellent'}
        predicted_class_id = outputs.argmax().item()
        preds = outputs.softmax(dim=-1).tolist()
        predicted_score = np.max(preds)
        return id2label[predicted_class_id], predicted_score

    elif model == 'bilstm_hard':
        import torch, numpy as np, tensorflow as tf
        from keras_preprocessing.sequence import pad_sequences
        model, tokenizer = load('bilstm', 'hard')
        inputs = tokenizer.texts_to_sequences([text])
        inputs = pad_sequences(inputs, maxlen=512)
        outputs = torch.from_numpy(model.predict(inputs, verbose=0))
        id2label = {0: 'Poor', 1: 'Fair', 2: 'Good', 3: 'Excellent'}
        predicted_class_id = outputs.argmax().item()
        preds = outputs.softmax(dim=-1).tolist()
        predicted_score = np.max(preds)
        return id2label[predicted_class_id], predicted_score

    elif model == 'bert_msda':
        model, tokenizer = load('bert', 'msda')
        inputs = tokenizer(text, return_tensors="pt")
        outputs = model(**inputs).logits
        id2label = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
        predicted_class_id = outputs.argmax().item()
        preds = outputs.softmax(dim=-1).tolist()
        predicted_score = np.max(preds)
        return id2label[predicted_class_id], predicted_score

    elif model == 'cnn_msda':
        import torch, numpy as np, tensorflow as tf
        from keras_preprocessing.sequence import pad_sequences
        model, tokenizer = load('cnn', 'msda')
        inputs = tokenizer.texts_to_sequences([text])
        inputs = pad_sequences(inputs, maxlen=330)
        outputs = torch.from_numpy(model.predict(inputs, verbose=0))
        id2label = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
        predicted_class_id = outputs.argmax().item()
        preds = outputs.softmax(dim=-1).tolist()
        predicted_score = np.max(preds)
        return id2label[predicted_class_id], predicted_score

    elif model == 'bilstm_msda':
        import torch, numpy as np, tensorflow as tf
        from keras_preprocessing.sequence import pad_sequences
        model, tokenizer = load('bilstm', 'msda')
        inputs = tokenizer.texts_to_sequences([text])
        inputs = pad_sequences(inputs, maxlen=330)
        outputs = torch.from_numpy(model.predict(inputs, verbose=0))
        id2label = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
        predicted_class_id = outputs.argmax().item()
        preds = outputs.softmax(dim=-1).tolist()
        predicted_score = np.max(preds)
        return id2label[predicted_class_id], predicted_score

    else:
        print("ERROR: predict() function takes 3 arguments: \n  \
        text={str}, \n\t  model={bert, cnn, or bilstm}, \n\t  dataset={hard or msda}")

In [None]:
def test_transferability(victim_model, advs_model_1, advs_model_2, n_samples):
    import pandas as pd
    model_1_dataframe = pd.read_csv(advs_model_1)
    model_1_dataframe = model_1_dataframe.sample(n_samples)

    model_1_dataframe['predication_score'] = model_1_dataframe['example'].apply(lambda example: predict(example, victim_model)[1])
    model_1_dataframe['adv_predication_score'] = model_1_dataframe['adversarial_example'].apply(lambda example: predict(example, victim_model)[1])

    model_1_org_scores_mean = model_1_dataframe['predication_score'].mean()*100
    model_1_adv_scores_mean = model_1_dataframe['adv_predication_score'].mean()*100

    model_2_dataframe = pd.read_csv(advs_model_2)
    model_2_dataframe = model_2_dataframe.sample(n_samples)

    model_2_dataframe['predication_score'] = model_2_dataframe['example'].apply(lambda example: predict(example, victim_model)[1])
    model_2_dataframe['adv_predication_score'] = model_2_dataframe['adversarial_example'].apply(lambda example: predict(example, victim_model)[1])

    model_2_org_scores_mean = model_2_dataframe['predication_score'].mean()*100
    model_2_adv_scores_mean = model_2_dataframe['adv_predication_score'].mean()*100

    delta_model_1 = model_1_org_scores_mean-model_1_adv_scores_mean
    delta_model_2 = model_2_org_scores_mean-model_2_adv_scores_mean

    return (model_1_org_scores_mean, model_1_adv_scores_mean, delta_model_1), (model_2_org_scores_mean, model_2_adv_scores_mean, delta_model_2)


def test_transferability1(victim_model, advs_model_1, advs_model_2, n_samples):
    import pandas as pd
    model_1_dataframe = pd.read_csv(advs_model_1)
    model_1_dataframe = model_1_dataframe.sample(n_samples)

    model_1_dataframe['predication_label'] = model_1_dataframe['example'].apply(lambda example: predict(example, victim_model)[0])
    model_1_dataframe['adv_predication_label'] = model_1_dataframe['adversarial_example'].apply(lambda example: predict(example, victim_model)[0])
    model_1_dataframe['accuracy'] = model_1_dataframe.apply(lambda x: 1 if x['predication_label'] == x['adv_predication_label'] else 0, axis=1)
    model_1_accuracy = model_1_dataframe['accuracy'].mean()*100

    model_2_dataframe = pd.read_csv(advs_model_2)
    model_2_dataframe = model_2_dataframe.sample(n_samples)
    model_2_dataframe['predication_label'] = model_2_dataframe['example'].apply(lambda example: predict(example, victim_model)[0])
    model_2_dataframe['adv_predication_label'] = model_2_dataframe['adversarial_example'].apply(lambda example: predict(example, victim_model)[0])
    model_2_dataframe['accuracy'] = model_2_dataframe.apply(lambda x: 1 if x['predication_label'] == x['adv_predication_label'] else 0, axis=1)
    model_2_accuracy = model_2_dataframe['accuracy'].mean()*100

    return (model_1_accuracy, 100-model_1_accuracy), (model_2_accuracy, 100-model_2_accuracy)

In [None]:
print("\t\t\t\t\t\t adv_CNN_hard \t\t\t\t\t\t adv_biLSTM_hard")
print("BERT_hard \t << \t", test_transferability("bert_hard", "transferability/cnn_hard.csv", "transferability/bilstm_hard.csv", 245), "\n")

print("\t\t\t\t\t\t adv_CNN_msda \t\t\t\t\t\t adv_biLSTM_msda")
print("BERT_msda \t << \t", test_transferability("bert_msda", "transferability/cnn_msda.csv", "transferability/bilstm_msda.csv", 245), "\n")

print("\t\t\tadv_CNN_hard \t adv_biLSTM_hard")
print("BERT_hard \t << \t", test_transferability1("bert_hard", "transferability/cnn_hard.csv", "transferability/bilstm_hard.csv", 245), "\n")

print("\t\t\tadv_CNN_msda \t adv_biLSTM_msda")
print("BERT_msda \t << \t", test_transferability1("bert_msda", "transferability/cnn_msda.csv", "transferability/bilstm_msda.csv", 245), "\n")

In [None]:
print("\t\t\t\t\t\t adv_BERT_hard \t\t\t\t\t\t adv_biLSTM_hard")
print("CNN_hard \t << \t", test_transferability("cnn_hard", "transferability/bert_hard.csv", "transferability/bilstm_hard.csv", 245), "\n")

print("\t\t\t\t\t\t adv_BERT_msda \t\t\t\t\t\t adv_biLSTM_msda")
print("CNN_msda \t << \t", test_transferability("cnn_msda", "transferability/bert_msda.csv", "transferability/bilstm_msda.csv", 245), "\n")

print("\t\t\tadv_BERT_hard \t adv_biLSTM_hard")
print("CNN_hard \t << \t", test_transferability1("cnn_hard", "transferability/bert_hard.csv", "transferability/bilstm_hard.csv", 245), "\n")

print("\t\t\tadv_BERT_msda \t adv_biLSTM_msda")
print("CNN_msda \t << \t", test_transferability1("cnn_msda", "transferability/bert_msda.csv", "transferability/bilstm_msda.csv", 245), "\n")

In [None]:
print("\t\t\t\t\t\t adv_BERT_hard \t\t\t\t\t\t adv_CNN_hard")
print("biLSTM_hard \t << \t", test_transferability("bilstm_hard", "transferability/bert_hard.csv", "transferability/cnn_hard.csv", 245), "\n")

print("\t\t\t\t\t\t adv_BERT_msda \t\t\t\t\t\t adv_biLSTM_msda")
print("biLSTM_msda \t << \t", test_transferability("bilstm_msda", "transferability/bert_msda.csv", "transferability/cnn_msda.csv", 245), "\n")

print("\t\t\tadv_BERT_hard \t adv_CNN_hard") 
print("biLSTM_hard \t << \t", test_transferability1("bilstm_hard", "transferability/bert_hard.csv", "transferability/cnn_hard.csv", 245), "\n")

print("\t\t\tadv_BERT_msda \t adv_CNN_msda") 
print("biLSTM_msda \t << \t", test_transferability1("bilstm_msda", "transferability/bert_msda.csv", "transferability/cnn_msda.csv", 245), "\n")