# Adverserial Attack Evaluation
After normal training and adversarial training we evaluate all of our models on their robustness. We test them on four different attacks, namely

- a custom attack
- BAE attack from textattack
- Textfooler attack from textattack



Following naming will be used below:
- <strong>Pre-Trained Model:</strong> This is the [RoBERTa model ](https://huggingface.co/docs/transformers/model_doc/roberta) model from Huggingface
- <strong>Initial Hate Speech Model:</strong> This is our RoBERTa model, which we trained on the Hate speech data set.
- <strong>Trained Hate Speech Model:</strong> RoBERTa model, which was trained using adversarial training
    - <strong>Custom Trained:</strong> RoBERTa model, which was trained using the custom attack
    - <strong>Custom Finetuned:</strong> Initial Hate Speech Model, which was finetuned using the custom attack
    - <strong>BAE Trained:</strong> RoBERTa model, which was trained using the BAE attack


## Install

In [None]:
# The kernel needs to be restarted after the pip installs
# It appears that textattack has dependency conflicts if not run on an ARM chip.
!pip3 install transformers
!pip3 install textattack
!pip3 install --force-reinstall textattack # force is often needed due to dependency conflicts
!pip3 install --upgrade tensorflow
!pip3 install sentence_transformers

## Import

In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import string
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.wsd import lesk
nltk.download('stopwords')
nltk.download('punkt')

# textattack packages
import textattack
from textattack.constraints.pre_transformation import RepeatModification, StopwordModification
from textattack.constraints.semantics import WordEmbeddingDistance

# transformers packages
from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaConfig
from transformers import RobertaTokenizer, RobertaForSequenceClassification

from torch.utils.data import DataLoader, TensorDataset

from tqdm import tqdm

from trainer import Trainer


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/simonzollhoefer/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/simonzollhoefer/nltk_data...
[nltk_data]   Package punkt is already up-to-date!




#### Data cleaning
Since the data needs to be cleaned for the attack, we defined the following function.

In [None]:
#this is copy from https://www.kaggle.com/code/soumyakushwaha/ethicalcommunicationai
# ----------------------------------------
stopword = set(stopwords.words('english'))

def clean_text(text):
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub(r"\@w+|\#",'',text)
    text = re.sub(r"[^\w\s]",'',text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    tweet_tokens = word_tokenize(text)
    filtered_tweets=[w for w in tweet_tokens if not w in stopword] #removing stopwords
    return " ".join(filtered_tweets)
#--------------------------------------------------------------------------------------

#### Load Dataset


In [13]:
# Constants
SEED = 42
BATCH_SIZE = 32
LEARNING_RATE = 1e-5
MAX_TEXT_LENGTH = 512
EPOCHS = 10
MODEL_PATH = 'roberta_model.bin'
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Set seeds
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)


labeled_data = pd.read_csv('./datasets/hate_speech_data.csv')
# Hate Speech and Offensive Language Data: 25.3k total entries.
# - Class 0: 1,430 entries (hate speech)
# - Class 1: 19,190 entries (offensive language)
# - Class 2: 4,163 entries (neither)

# Processing labeled hate speech dataset
hate_offensive_data = labeled_data[labeled_data['class'] != 2].copy()
hate_offensive_data.loc[:, 'category'] = hate_offensive_data['class'].replace([0, 1], 1)
hate_offensive_data = hate_offensive_data.rename(columns={'tweet': 'text'})

# Test 1 ---
# Select data for each class
hate_speech_data = labeled_data[labeled_data['class'] == 0].copy()
offensive_data = labeled_data[labeled_data['class'] == 1].copy()
neither_data = labeled_data[labeled_data['class'] == 2].copy()
sample_size = len(hate_speech_data)
offensive_sample = offensive_data.sample(n=sample_size, random_state=SEED)
neither_sample = neither_data.sample(n=sample_size, random_state=SEED)
hate_speech_data['category'] = 1
offensive_sample['category'] = 1
neither_sample['category'] = 0
sampled_data = pd.concat([hate_speech_data, offensive_sample, neither_sample], ignore_index=True)[['tweet', 'category']]
sampled_data.rename(columns={'tweet': 'text', 'category': 'label'}, inplace=True)
sampled_data['text'] = sampled_data['text'].apply(clean_text)  # Assuming clean_text is a defined function
train_data, intermediate_data = train_test_split(sampled_data, test_size=0.3, random_state=SEED)
validation_data, test_data = train_test_split(intermediate_data, test_size=0.5, random_state=SEED)
train_tokens = tokenizer(train_data['text'].tolist(), padding=True, truncation=True, max_length=MAX_TEXT_LENGTH, return_tensors='pt')
validation_tokens = tokenizer(validation_data['text'].tolist(), padding=True, truncation=True, max_length=MAX_TEXT_LENGTH, return_tensors='pt')
test_tokens = tokenizer(test_data['text'].tolist(), padding=True, truncation=True, max_length=MAX_TEXT_LENGTH, return_tensors='pt')
print(f"New Train data shape: {train_data.shape}")
print(f"New Validation data shape: {validation_data.shape}")
print(f"New Test data shape: {test_data.shape}")


New Train data shape: (3003, 2)
New Validation data shape: (643, 2)
New Test data shape: (644, 2)


#### Method to load models

In [14]:
def load_model(file_name):
    config = RobertaConfig()
    config.num_labels = 2
    roberta_base_config = {
      "architectures": [
        "RobertaForMaskedLM"
      ],
      "attention_probs_dropout_prob": 0.1,
      "bos_token_id": 0,
      "eos_token_id": 2,
      "hidden_act": "gelu",
      "hidden_dropout_prob": 0.1,
      "hidden_size": 768,
      "initializer_range": 0.02,
      "intermediate_size": 3072,
      "layer_norm_eps": 1e-05,
      "max_position_embeddings": 514,
      "model_type": "roberta",
      "num_attention_heads": 12,
      "num_hidden_layers": 12,
      "pad_token_id": 1,
      "type_vocab_size": 1,
      "vocab_size": 50265
    }

    for key in roberta_base_config.keys():
        setattr(config, key, roberta_base_config[key])

    model = RobertaForSequenceClassification(config)
    map_location=torch.device('cpu')
    model.load_state_dict(torch.load(file_name, map_location=map_location))
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    model.eval()
    model.to(map_location)
    return model, tokenizer

## Attack Setup
After loading the initial models, we can attack it. To do so we try different attacks:

- a custom attack
- Bae attack from textattack
- TextFooler attack from textattack



### Custom Attacker

In [None]:
ATTACK_SEED = 71

def create_custom_attacker(model_wrapper, dataset, num_examples=20, nr=0):

    # Define custom attack based on https://textattack.readthedocs.io/en/latest/api/attack.html used for training loop

    #UntagetedClassification: An untargeted attack on classification models which attempts
    #to minimize the score of the correct label until it is no longer the predicted label.
    goal_function = textattack.goal_functions.UntargetedClassification(model_wrapper)

    constraints = [
        RepeatModification(), # prevents the same word from being modified multiple times
        StopwordModification(), # controls the modification of stopwords (e.g., "the," "is," "and")
        WordEmbeddingDistance(min_cos_sim=0.9), # measures the cosine similarity between word embeddings to ensure that the replacement word is semantically similar
    ]

    transformation = textattack.transformations.word_swaps.word_swap_embedding.WordSwapEmbedding(max_candidates=50) # (50 is default)
    search_method = textattack.search_methods.GreedyWordSwapWIR(wir_method="delete")
    custom_attack = textattack.Attack(goal_function, constraints, transformation, search_method) # perform the attack

    attack_args = textattack.AttackArgs(num_examples=num_examples, log_to_csv=f"log_{nr}.csv", checkpoint_interval=5, checkpoint_dir="checkpoints", disable_stdout=True)
    custom_attacker = textattack.Attacker(custom_attack, dataset, attack_args)

    return custom_attacker


### BAE Attacker

In [None]:
def create_bae_attacker(model_wrapper, dataset, num_examples=20, nr=0):

    bae_attack = textattack.attack_recipes.bae_garg_2019.BAEGarg2019.build(model_wrapper)

    # Attack 20 samples with CSV logging and checkpoint saved every 5 interval
    attack_args = textattack.AttackArgs(num_examples=num_examples, log_to_csv=f"log_{nr}.csv", checkpoint_interval=5, checkpoint_dir="checkpoints", disable_stdout=True)
    bae_attacker = textattack.Attacker(bae_attack, dataset, attack_args)
    return bae_attacker


### Textfooler Attacker

In [None]:
def create_textfooler_attacker(model_wrapper, dataset, num_examples=20, nr=0):

    textfooler_attack = textattack.attack_recipes.textfooler_jin_2019.TextFoolerJin2019.build(model_wrapper)
    # Attack 20 samples with CSV logging and checkpoint saved every 5 interval
    attack_args = textattack.AttackArgs(num_examples=num_examples, log_to_csv=f"log_{nr}.csv", checkpoint_interval=5, checkpoint_dir="checkpoints", disable_stdout=True)
    textfooler_attacker = textattack.Attacker(textfooler_attack, dataset, attack_args)
    return textfooler_attacker

### BERT Attacker

In [None]:
def create_bert_attacker(model_wrapper, dataset, num_examples=20, nr=0):

    bert_attack = textattack.attack_recipes.bert_attack_li_2020.BERTAttackLi2020.build(model_wrapper)

    # Attack 20 samples with CSV logging and checkpoint saved every 5 interval
    attack_args = textattack.AttackArgs(num_examples=num_examples, log_to_csv=f"log_{nr}.csv", checkpoint_interval=5, checkpoint_dir="checkpoints", disable_stdout=True, query_budget=100, num_workers_per_device=2)
    bert_attacker = textattack.Attacker(bert_attack, dataset, attack_args)
    return bert_attacker

## Evaluate the Models on Adversarial Attacks


In [None]:
# global variables

# Run attack with defined dataset
num_examples = 300

sample_size = math.ceil(num_examples/2)

balanced_dataset = validation_data.groupby("label").sample(n=sample_size, random_state=SEED)
balanced_dataset = balanced_dataset.sample(frac=1).reset_index(drop=True)

temp = list(balanced_dataset.itertuples(index=False, name=None))
dataset = textattack.datasets.Dataset(temp)

num_examples = 300

#### Initial Hate Speech Model

As the first step we want to get a baseline of the accuracy under attack of our Initial Hate Speech Model (the training of this model is done in notebook inital_hate_speech_model_training.ipynb).

In [None]:
initial_hate_speech_model, tokenizer = load_model('initial_hate_speech_model.bin')
initial_hate_speech_model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(initial_hate_speech_model, tokenizer)

In [None]:
# Custom attack on Initial Hate Speech Model
custom_attacker = create_custom_attacker(initial_hate_speech_model_wrapper, dataset, num_examples=num_examples, nr=0)
custom_attacker.attack_dataset()

textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_0.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.9
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): RepeatModification
    (2): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 0 / 5 / 0 / 5:   2%|▏         | 5/300 [00:11<11:30,  2.34s/it]textattack: Saving checkpoint under "checkpoints/1698240425872.ta.chkpt" at 2023-10-25 21:27:05 after 5 attacks.







[Succeeded / Failed / Skipped / Total] 0 / 9 / 1 / 10:   3%|▎         | 10/300 [00:26<12:45,  2.64s/it]textattack: Saving checkpoint under "checkpoints/1698240440547.ta.chkpt" at 2023-10-25 21:27:20 after 10 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 10 / 3 / 15:   5%|▌         | 15/300 [00:31<10:03,  2.12s/it]textattack: Saving checkpoint under "checkpoints/1698240445926.ta.chkpt" at 2023-10-25 21:27:25 after 15 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 15 / 3 / 20:   7%|▋         | 20/300 [00:53<12:23,  2.65s/it]textattack: Saving checkpoint under "checkpoints/1698240467251.ta.chkpt" at 2023-10-25 21:27:47 after 20 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 20 / 3 / 25:   8%|▊         | 25/300 [01:12<13:13,  2.88s/it]textattack: Saving checkpoint under "checkpoints/1698240486255.ta.chkpt" at 2023-10-25 21:28:06 after 25 attacks.
[Succeeded / Failed / Skipped / Total] 2 / 20 / 4 / 26:   9%|▊         | 26/300 [01:12<12:41,  2.78s/it]






[Succeeded / Failed / Skipped / Total] 2 / 24 / 4 / 30:  10%|█         | 30/300 [01:21<12:12,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698240495513.ta.chkpt" at 2023-10-25 21:28:15 after 30 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 27 / 5 / 35:  12%|█▏        | 35/300 [01:31<11:33,  2.62s/it]textattack: Saving checkpoint under "checkpoints/1698240505796.ta.chkpt" at 2023-10-25 21:28:25 after 35 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 29 / 7 / 40:  13%|█▎        | 40/300 [01:42<11:03,  2.55s/it]textattack: Saving checkpoint under "checkpoints/1698240516272.ta.chkpt" at 2023-10-25 21:28:36 after 40 attacks.







[Succeeded / Failed / Skipped / Total] 5 / 33 / 7 / 45:  15%|█▌        | 45/300 [01:57<11:05,  2.61s/it]textattack: Saving checkpoint under "checkpoints/1698240531685.ta.chkpt" at 2023-10-25 21:28:51 after 45 attacks.







[Succeeded / Failed / Skipped / Total] 6 / 37 / 7 / 50:  17%|█▋        | 50/300 [02:14<11:11,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698240548505.ta.chkpt" at 2023-10-25 21:29:08 after 50 attacks.







[Succeeded / Failed / Skipped / Total] 6 / 42 / 7 / 55:  18%|█▊        | 55/300 [02:28<11:00,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698240562397.ta.chkpt" at 2023-10-25 21:29:22 after 55 attacks.







[Succeeded / Failed / Skipped / Total] 7 / 46 / 7 / 60:  20%|██        | 60/300 [02:38<10:34,  2.64s/it]textattack: Saving checkpoint under "checkpoints/1698240572820.ta.chkpt" at 2023-10-25 21:29:32 after 60 attacks.







[Succeeded / Failed / Skipped / Total] 7 / 50 / 8 / 65:  22%|██▏       | 65/300 [02:47<10:06,  2.58s/it]textattack: Saving checkpoint under "checkpoints/1698240581830.ta.chkpt" at 2023-10-25 21:29:41 after 65 attacks.
[Succeeded / Failed / Skipped / Total] 7 / 50 / 9 / 66:  22%|██▏       | 66/300 [02:47<09:55,  2.54s/it]






[Succeeded / Failed / Skipped / Total] 8 / 51 / 11 / 70:  23%|██▎       | 70/300 [02:57<09:44,  2.54s/it]textattack: Saving checkpoint under "checkpoints/1698240592127.ta.chkpt" at 2023-10-25 21:29:52 after 70 attacks.







[Succeeded / Failed / Skipped / Total] 8 / 55 / 12 / 75:  25%|██▌       | 75/300 [03:11<09:35,  2.56s/it]textattack: Saving checkpoint under "checkpoints/1698240606137.ta.chkpt" at 2023-10-25 21:30:06 after 75 attacks.







[Succeeded / Failed / Skipped / Total] 8 / 58 / 14 / 80:  27%|██▋       | 80/300 [03:24<09:21,  2.55s/it]textattack: Saving checkpoint under "checkpoints/1698240618172.ta.chkpt" at 2023-10-25 21:30:18 after 80 attacks.







[Succeeded / Failed / Skipped / Total] 8 / 63 / 14 / 85:  28%|██▊       | 85/300 [03:43<09:25,  2.63s/it]textattack: Saving checkpoint under "checkpoints/1698240637803.ta.chkpt" at 2023-10-25 21:30:37 after 85 attacks.







[Succeeded / Failed / Skipped / Total] 8 / 67 / 15 / 90:  30%|███       | 90/300 [03:54<09:07,  2.61s/it]textattack: Saving checkpoint under "checkpoints/1698240649005.ta.chkpt" at 2023-10-25 21:30:49 after 90 attacks.







[Succeeded / Failed / Skipped / Total] 9 / 71 / 15 / 95:  32%|███▏      | 95/300 [04:15<09:11,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698240669808.ta.chkpt" at 2023-10-25 21:31:09 after 95 attacks.







[Succeeded / Failed / Skipped / Total] 10 / 75 / 15 / 100:  33%|███▎      | 100/300 [04:29<08:58,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698240683178.ta.chkpt" at 2023-10-25 21:31:23 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 79 / 15 / 105:  35%|███▌      | 105/300 [04:50<09:00,  2.77s/it]textattack: Saving checkpoint under "checkpoints/1698240705112.ta.chkpt" at 2023-10-25 21:31:45 after 105 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 81 / 18 / 110:  37%|███▋      | 110/300 [04:59<08:38,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698240714109.ta.chkpt" at 2023-10-25 21:31:54 after 110 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 85 / 19 / 115:  38%|███▊      | 115/300 [05:10<08:19,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698240724895.ta.chkpt" at 2023-10-25 21:32:04 after 115 attacks.







[Succeeded / Failed / Skipped / Total] 12 / 88 / 20 / 120:  40%|████      | 120/300 [05:24<08:07,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698240738940.ta.chkpt" at 2023-10-25 21:32:18 after 120 attacks.







[Succeeded / Failed / Skipped / Total] 12 / 93 / 20 / 125:  42%|████▏     | 125/300 [05:38<07:54,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698240753109.ta.chkpt" at 2023-10-25 21:32:33 after 125 attacks.







[Succeeded / Failed / Skipped / Total] 14 / 96 / 20 / 130:  43%|████▎     | 130/300 [05:55<07:44,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698240769289.ta.chkpt" at 2023-10-25 21:32:49 after 130 attacks.







[Succeeded / Failed / Skipped / Total] 15 / 100 / 20 / 135:  45%|████▌     | 135/300 [06:08<07:30,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698240782766.ta.chkpt" at 2023-10-25 21:33:02 after 135 attacks.







[Succeeded / Failed / Skipped / Total] 16 / 104 / 20 / 140:  47%|████▋     | 140/300 [06:14<07:08,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698240788894.ta.chkpt" at 2023-10-25 21:33:08 after 140 attacks.







[Succeeded / Failed / Skipped / Total] 17 / 108 / 20 / 145:  48%|████▊     | 145/300 [06:26<06:53,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698240800672.ta.chkpt" at 2023-10-25 21:33:20 after 145 attacks.







[Succeeded / Failed / Skipped / Total] 18 / 111 / 21 / 150:  50%|█████     | 150/300 [06:42<06:42,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698240816479.ta.chkpt" at 2023-10-25 21:33:36 after 150 attacks.







[Succeeded / Failed / Skipped / Total] 20 / 114 / 21 / 155:  52%|█████▏    | 155/300 [06:55<06:29,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698240830142.ta.chkpt" at 2023-10-25 21:33:50 after 155 attacks.







[Succeeded / Failed / Skipped / Total] 20 / 119 / 21 / 160:  53%|█████▎    | 160/300 [07:14<06:20,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698240848796.ta.chkpt" at 2023-10-25 21:34:08 after 160 attacks.







[Succeeded / Failed / Skipped / Total] 21 / 122 / 22 / 165:  55%|█████▌    | 165/300 [07:24<06:03,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698240858865.ta.chkpt" at 2023-10-25 21:34:18 after 165 attacks.







[Succeeded / Failed / Skipped / Total] 22 / 126 / 22 / 170:  57%|█████▋    | 170/300 [07:41<05:53,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698240875847.ta.chkpt" at 2023-10-25 21:34:35 after 170 attacks.







[Succeeded / Failed / Skipped / Total] 22 / 130 / 23 / 175:  58%|█████▊    | 175/300 [07:56<05:40,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698240890977.ta.chkpt" at 2023-10-25 21:34:50 after 175 attacks.







[Succeeded / Failed / Skipped / Total] 23 / 134 / 23 / 180:  60%|██████    | 180/300 [08:04<05:22,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698240898522.ta.chkpt" at 2023-10-25 21:34:58 after 180 attacks.







[Succeeded / Failed / Skipped / Total] 25 / 136 / 24 / 185:  62%|██████▏   | 185/300 [08:14<05:07,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698240908226.ta.chkpt" at 2023-10-25 21:35:08 after 185 attacks.
[Succeeded / Failed / Skipped / Total] 25 / 136 / 25 / 186:  62%|██████▏   | 186/300 [08:14<05:02,  2.66s/it]






[Succeeded / Failed / Skipped / Total] 26 / 138 / 26 / 190:  63%|██████▎   | 190/300 [08:24<04:52,  2.66s/it]textattack: Saving checkpoint under "checkpoints/1698240918653.ta.chkpt" at 2023-10-25 21:35:18 after 190 attacks.







[Succeeded / Failed / Skipped / Total] 28 / 141 / 26 / 195:  65%|██████▌   | 195/300 [08:36<04:37,  2.65s/it]textattack: Saving checkpoint under "checkpoints/1698240930442.ta.chkpt" at 2023-10-25 21:35:30 after 195 attacks.







[Succeeded / Failed / Skipped / Total] 30 / 144 / 26 / 200:  67%|██████▋   | 200/300 [08:51<04:25,  2.66s/it]textattack: Saving checkpoint under "checkpoints/1698240945543.ta.chkpt" at 2023-10-25 21:35:45 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 31 / 148 / 26 / 205:  68%|██████▊   | 205/300 [09:05<04:12,  2.66s/it]textattack: Saving checkpoint under "checkpoints/1698240959200.ta.chkpt" at 2023-10-25 21:35:59 after 205 attacks.







[Succeeded / Failed / Skipped / Total] 31 / 152 / 27 / 210:  70%|███████   | 210/300 [09:11<03:56,  2.63s/it]textattack: Saving checkpoint under "checkpoints/1698240965518.ta.chkpt" at 2023-10-25 21:36:05 after 210 attacks.







[Succeeded / Failed / Skipped / Total] 31 / 157 / 27 / 215:  72%|███████▏  | 215/300 [09:27<03:44,  2.64s/it]textattack: Saving checkpoint under "checkpoints/1698240981869.ta.chkpt" at 2023-10-25 21:36:21 after 215 attacks.







[Succeeded / Failed / Skipped / Total] 31 / 162 / 27 / 220:  73%|███████▎  | 220/300 [09:45<03:32,  2.66s/it]textattack: Saving checkpoint under "checkpoints/1698240999802.ta.chkpt" at 2023-10-25 21:36:39 after 220 attacks.







[Succeeded / Failed / Skipped / Total] 32 / 166 / 27 / 225:  75%|███████▌  | 225/300 [10:03<03:21,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698241018079.ta.chkpt" at 2023-10-25 21:36:58 after 225 attacks.







[Succeeded / Failed / Skipped / Total] 32 / 171 / 27 / 230:  77%|███████▋  | 230/300 [10:19<03:08,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698241033703.ta.chkpt" at 2023-10-25 21:37:13 after 230 attacks.







[Succeeded / Failed / Skipped / Total] 32 / 175 / 28 / 235:  78%|███████▊  | 235/300 [10:29<02:54,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698241043696.ta.chkpt" at 2023-10-25 21:37:23 after 235 attacks.
[Succeeded / Failed / Skipped / Total] 32 / 175 / 29 / 236:  79%|███████▊  | 236/300 [10:29<02:50,  2.67s/it]






[Succeeded / Failed / Skipped / Total] 32 / 179 / 29 / 240:  80%|████████  | 240/300 [10:35<02:38,  2.65s/it]textattack: Saving checkpoint under "checkpoints/1698241049846.ta.chkpt" at 2023-10-25 21:37:29 after 240 attacks.







[Succeeded / Failed / Skipped / Total] 32 / 184 / 29 / 245:  82%|████████▏ | 245/300 [10:49<02:25,  2.65s/it]textattack: Saving checkpoint under "checkpoints/1698241063351.ta.chkpt" at 2023-10-25 21:37:43 after 245 attacks.







[Succeeded / Failed / Skipped / Total] 33 / 188 / 29 / 250:  83%|████████▎ | 250/300 [11:08<02:13,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698241082425.ta.chkpt" at 2023-10-25 21:38:02 after 250 attacks.







[Succeeded / Failed / Skipped / Total] 33 / 193 / 29 / 255:  85%|████████▌ | 255/300 [11:21<02:00,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698241095472.ta.chkpt" at 2023-10-25 21:38:15 after 255 attacks.







[Succeeded / Failed / Skipped / Total] 34 / 197 / 29 / 260:  87%|████████▋ | 260/300 [11:36<01:47,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698241110405.ta.chkpt" at 2023-10-25 21:38:30 after 260 attacks.







[Succeeded / Failed / Skipped / Total] 35 / 199 / 31 / 265:  88%|████████▊ | 265/300 [11:43<01:32,  2.65s/it]textattack: Saving checkpoint under "checkpoints/1698241117684.ta.chkpt" at 2023-10-25 21:38:37 after 265 attacks.







[Succeeded / Failed / Skipped / Total] 36 / 202 / 32 / 270:  90%|█████████ | 270/300 [11:58<01:19,  2.66s/it]textattack: Saving checkpoint under "checkpoints/1698241132706.ta.chkpt" at 2023-10-25 21:38:52 after 270 attacks.







[Succeeded / Failed / Skipped / Total] 37 / 205 / 33 / 275:  92%|█████████▏| 275/300 [12:11<01:06,  2.66s/it]textattack: Saving checkpoint under "checkpoints/1698241145244.ta.chkpt" at 2023-10-25 21:39:05 after 275 attacks.







[Succeeded / Failed / Skipped / Total] 38 / 209 / 33 / 280:  93%|█████████▎| 280/300 [12:22<00:53,  2.65s/it]textattack: Saving checkpoint under "checkpoints/1698241157047.ta.chkpt" at 2023-10-25 21:39:17 after 280 attacks.







[Succeeded / Failed / Skipped / Total] 38 / 214 / 33 / 285:  95%|█████████▌| 285/300 [12:41<00:40,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698241175786.ta.chkpt" at 2023-10-25 21:39:35 after 285 attacks.







[Succeeded / Failed / Skipped / Total] 39 / 218 / 33 / 290:  97%|█████████▋| 290/300 [12:57<00:26,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698241191438.ta.chkpt" at 2023-10-25 21:39:51 after 290 attacks.







[Succeeded / Failed / Skipped / Total] 39 / 223 / 33 / 295:  98%|█████████▊| 295/300 [13:15<00:13,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698241209637.ta.chkpt" at 2023-10-25 21:40:09 after 295 attacks.







[Succeeded / Failed / Skipped / Total] 39 / 227 / 34 / 300: 100%|██████████| 300/300 [13:24<00:00,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698241218206.ta.chkpt" at 2023-10-25 21:40:18 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 39 / 227 / 34 / 300: 100%|██████████| 300/300 [13:24<00:00,  2.68s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 39     |
| Number of failed attacks:     | 227    |
| Number of skipped attacks:    | 34     |
| Original accuracy:            | 88.67% |
| Accuracy under attack:        | 75.67% |
| Attack success rate:          | 14.66% |
| Average perturbed word %:     | 21.59% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 18.17  |
+-------------------------------+--------+





[<textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bab29250>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x35aebd150>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb2f8150>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb2e01d0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb63b4d0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb2f8350>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x35aefe250>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bac87b90>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb63bf50>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x2bb109150>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x35af1fb10>,
 <textattack.attack_results.successful_attack_result.Su

In [None]:
# BAE attack on Initial Hate Speech Model
bae_attacker = create_bae_attacker(initial_hate_speech_model_wrapper, dataset, num_examples=num_examples, nr=1)
bae_attacker.attack_dataset()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_1.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapMaskedLM(
    (method):  bae
    (masked_lm_name):  BertForMaskedLM
    (max_length):  512
    (max_candidates):  50
    (min_confidence):  0.0
  )
  (constraints): 
    (0): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (1): UniversalSentenceEncoder(
        (metric):  cosine
        (threshold):  0.936338023
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  True
      )
    (2): RepeatModification
    (3): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 78 / 7 / 15 / 100:  33%|███▎      | 100/300 [15:20<30:41,  9.21s/it]textattack: Saving checkpoint under "checkpoints/1698244773254.ta.chkpt" at 2023-10-25 22:39:33 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 160 / 14 / 26 / 200:  67%|██████▋   | 200/300 [30:28<15:14,  9.14s/it]textattack: Saving checkpoint under "checkpoints/1698245681460.ta.chkpt" at 2023-10-25 22:54:41 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 244 / 22 / 34 / 300: 100%|██████████| 300/300 [46:41<00:00,  9.34s/it]textattack: Saving checkpoint under "checkpoints/1698246654551.ta.chkpt" at 2023-10-25 23:10:54 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 244 / 22 / 34 / 300: 100%|██████████| 300/300 [46:41<00:00,  9.34s/it]






+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 244    |
| Number of failed attacks:     | 22     |
| Number of skipped attacks:    | 34     |
| Original accuracy:            | 88.67% |
| Accuracy under attack:        | 7.33%  |
| Attack success rate:          | 91.73% |
| Average perturbed word %:     | 31.7%  |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 66.03  |
+-------------------------------+--------+


[<textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2bc1a8790>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x321378f50>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3a60cbdd0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x29aeb3c50>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x321374d50>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x320572750>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2bc1a1fd0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3213792d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x320573290>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x320553910>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult 

In [None]:
# Textfooler attack on Initial Hate Speech Model
textfooler_attacker = create_textfooler_attacker(initial_hate_speech_model_wrapper, dataset, num_examples=num_examples, nr=2)
textfooler_attacker.attack_dataset()

textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_2.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.5
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (2): UniversalSentenceEncoder(
        (metric):  angular
        (threshold):  0.840845057
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  False
      )
    (3): RepeatModification
    (4): StopwordModification
    (5): InputColumnModification(
        (matching_column_labels):  ['premise', 'hypothesis']
       

[Succeeded / Failed / Skipped / Total] 75 / 10 / 15 / 100:  33%|███▎      | 100/300 [17:48<35:36, 10.68s/it]textattack: Saving checkpoint under "checkpoints/1698247725528.ta.chkpt" at 2023-10-25 23:28:45 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 157 / 17 / 26 / 200:  67%|██████▋   | 200/300 [35:24<17:42, 10.62s/it]textattack: Saving checkpoint under "checkpoints/1698248782208.ta.chkpt" at 2023-10-25 23:46:22 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 243 / 23 / 34 / 300: 100%|██████████| 300/300 [51:20<00:00, 10.27s/it]textattack: Saving checkpoint under "checkpoints/1698249738265.ta.chkpt" at 2023-10-26 00:02:18 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 243 / 23 / 34 / 300: 100%|██████████| 300/300 [51:20<00:00, 10.27s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 243    |
| Number of failed attacks:     | 23     |
| Number of skipped attacks:    | 34     |
| Original accuracy:            | 88.67% |
| Accuracy under attack:        | 7.67%  |
| Attack success rate:          | 91.35% |
| Average perturbed word %:     | 31.06% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 81.98  |
+-------------------------------+--------+





[<textattack.attack_results.failed_attack_result.FailedAttackResult at 0x447e183d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2bc4cd990>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3efb393d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x447bce710>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x434119050>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x4341934d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x4341d08d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3213cd3d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x321f48550>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x321888fd0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x434

#### Custom Trained Model

Now we test our trained models and check for their new accurancy under attack. The training of the models is done in the notebook (adversarial_training.ipynb).

In [None]:
custom_trained_model, tokenizer = load_model('custom_trained_model.bin')
custom_trained_model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(custom_trained_model, tokenizer)

In [None]:
# Custom attack on Custom Trained Model
custom_attacker = create_custom_attacker(custom_trained_model_wrapper, dataset, num_examples=num_examples, nr=4)
custom_attacker.attack_dataset()

textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_4.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.9
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): RepeatModification
    (2): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 0 / 5 / 0 / 5:   2%|▏         | 5/300 [00:10<10:46,  2.19s/it]textattack: Saving checkpoint under "checkpoints/1698241231949.ta.chkpt" at 2023-10-25 21:40:31 after 5 attacks.







[Succeeded / Failed / Skipped / Total] 0 / 9 / 1 / 10:   3%|▎         | 10/300 [00:25<12:12,  2.53s/it]textattack: Saving checkpoint under "checkpoints/1698241246242.ta.chkpt" at 2023-10-25 21:40:46 after 10 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 11 / 2 / 15:   5%|▌         | 15/300 [00:32<10:22,  2.18s/it]textattack: Saving checkpoint under "checkpoints/1698241253731.ta.chkpt" at 2023-10-25 21:40:53 after 15 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 16 / 2 / 20:   7%|▋         | 20/300 [00:53<12:28,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698241274416.ta.chkpt" at 2023-10-25 21:41:14 after 20 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 21 / 2 / 25:   8%|▊         | 25/300 [01:12<13:15,  2.89s/it]textattack: Saving checkpoint under "checkpoints/1698241293294.ta.chkpt" at 2023-10-25 21:41:33 after 25 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 26 / 2 / 30:  10%|█         | 30/300 [01:21<12:12,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698241302385.ta.chkpt" at 2023-10-25 21:41:42 after 30 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 29 / 3 / 35:  12%|█▏        | 35/300 [01:32<11:37,  2.63s/it]textattack: Saving checkpoint under "checkpoints/1698241313124.ta.chkpt" at 2023-10-25 21:41:53 after 35 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 32 / 5 / 40:  13%|█▎        | 40/300 [01:43<11:11,  2.58s/it]textattack: Saving checkpoint under "checkpoints/1698241324312.ta.chkpt" at 2023-10-25 21:42:04 after 40 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 36 / 5 / 45:  15%|█▌        | 45/300 [01:59<11:19,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698241340940.ta.chkpt" at 2023-10-25 21:42:20 after 45 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 41 / 5 / 50:  17%|█▋        | 50/300 [02:19<11:37,  2.79s/it]textattack: Saving checkpoint under "checkpoints/1698241360549.ta.chkpt" at 2023-10-25 21:42:40 after 50 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 46 / 5 / 55:  18%|█▊        | 55/300 [02:33<11:25,  2.80s/it]textattack: Saving checkpoint under "checkpoints/1698241374777.ta.chkpt" at 2023-10-25 21:42:54 after 55 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 51 / 5 / 60:  20%|██        | 60/300 [02:45<11:01,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698241386390.ta.chkpt" at 2023-10-25 21:43:06 after 60 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 55 / 6 / 65:  22%|██▏       | 65/300 [02:54<10:30,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698241395458.ta.chkpt" at 2023-10-25 21:43:15 after 65 attacks.
[Succeeded / Failed / Skipped / Total] 4 / 55 / 7 / 66:  22%|██▏       | 66/300 [02:54<10:19,  2.65s/it]






[Succeeded / Failed / Skipped / Total] 4 / 57 / 9 / 70:  23%|██▎       | 70/300 [03:06<10:13,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698241407804.ta.chkpt" at 2023-10-25 21:43:27 after 70 attacks.







[Succeeded / Failed / Skipped / Total] 5 / 61 / 9 / 75:  25%|██▌       | 75/300 [03:23<10:11,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698241424773.ta.chkpt" at 2023-10-25 21:43:44 after 75 attacks.







[Succeeded / Failed / Skipped / Total] 5 / 64 / 11 / 80:  27%|██▋       | 80/300 [03:36<09:54,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698241437194.ta.chkpt" at 2023-10-25 21:43:57 after 80 attacks.







[Succeeded / Failed / Skipped / Total] 5 / 69 / 11 / 85:  28%|██▊       | 85/300 [03:55<09:56,  2.77s/it]textattack: Saving checkpoint under "checkpoints/1698241456828.ta.chkpt" at 2023-10-25 21:44:16 after 85 attacks.







[Succeeded / Failed / Skipped / Total] 5 / 72 / 13 / 90:  30%|███       | 90/300 [04:04<09:31,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698241465891.ta.chkpt" at 2023-10-25 21:44:25 after 90 attacks.







[Succeeded / Failed / Skipped / Total] 7 / 75 / 13 / 95:  32%|███▏      | 95/300 [04:23<09:29,  2.78s/it]textattack: Saving checkpoint under "checkpoints/1698241484926.ta.chkpt" at 2023-10-25 21:44:44 after 95 attacks.







[Succeeded / Failed / Skipped / Total] 7 / 80 / 13 / 100:  33%|███▎      | 100/300 [04:38<09:16,  2.78s/it]textattack: Saving checkpoint under "checkpoints/1698241499030.ta.chkpt" at 2023-10-25 21:44:59 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 7 / 84 / 14 / 105:  35%|███▌      | 105/300 [04:54<09:07,  2.81s/it]textattack: Saving checkpoint under "checkpoints/1698241515736.ta.chkpt" at 2023-10-25 21:45:15 after 105 attacks.







[Succeeded / Failed / Skipped / Total] 7 / 86 / 17 / 110:  37%|███▋      | 110/300 [05:03<08:44,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698241524480.ta.chkpt" at 2023-10-25 21:45:24 after 110 attacks.







[Succeeded / Failed / Skipped / Total] 8 / 89 / 18 / 115:  38%|███▊      | 115/300 [05:14<08:25,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698241535249.ta.chkpt" at 2023-10-25 21:45:35 after 115 attacks.







[Succeeded / Failed / Skipped / Total] 8 / 93 / 19 / 120:  40%|████      | 120/300 [05:27<08:10,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698241548218.ta.chkpt" at 2023-10-25 21:45:48 after 120 attacks.







[Succeeded / Failed / Skipped / Total] 8 / 98 / 19 / 125:  42%|████▏     | 125/300 [05:41<07:58,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698241562441.ta.chkpt" at 2023-10-25 21:46:02 after 125 attacks.







[Succeeded / Failed / Skipped / Total] 10 / 101 / 19 / 130:  43%|████▎     | 130/300 [05:58<07:49,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698241579754.ta.chkpt" at 2023-10-25 21:46:19 after 130 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 105 / 19 / 135:  45%|████▌     | 135/300 [06:12<07:35,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698241593394.ta.chkpt" at 2023-10-25 21:46:33 after 135 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 110 / 19 / 140:  47%|████▋     | 140/300 [06:20<07:14,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698241601159.ta.chkpt" at 2023-10-25 21:46:41 after 140 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 115 / 19 / 145:  48%|████▊     | 145/300 [06:32<06:59,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698241613046.ta.chkpt" at 2023-10-25 21:46:53 after 145 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 119 / 20 / 150:  50%|█████     | 150/300 [06:47<06:47,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698241628812.ta.chkpt" at 2023-10-25 21:47:08 after 150 attacks.







[Succeeded / Failed / Skipped / Total] 13 / 122 / 20 / 155:  52%|█████▏    | 155/300 [07:01<06:34,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698241642237.ta.chkpt" at 2023-10-25 21:47:22 after 155 attacks.







[Succeeded / Failed / Skipped / Total] 13 / 127 / 20 / 160:  53%|█████▎    | 160/300 [07:19<06:24,  2.75s/it]textattack: Saving checkpoint under "checkpoints/1698241660956.ta.chkpt" at 2023-10-25 21:47:40 after 160 attacks.







[Succeeded / Failed / Skipped / Total] 15 / 129 / 21 / 165:  55%|█████▌    | 165/300 [07:29<06:08,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698241670810.ta.chkpt" at 2023-10-25 21:47:50 after 165 attacks.







[Succeeded / Failed / Skipped / Total] 16 / 133 / 21 / 170:  57%|█████▋    | 170/300 [07:45<05:56,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698241686972.ta.chkpt" at 2023-10-25 21:48:06 after 170 attacks.







[Succeeded / Failed / Skipped / Total] 16 / 137 / 22 / 175:  58%|█████▊    | 175/300 [08:01<05:43,  2.75s/it]textattack: Saving checkpoint under "checkpoints/1698241702426.ta.chkpt" at 2023-10-25 21:48:22 after 175 attacks.







[Succeeded / Failed / Skipped / Total] 17 / 141 / 22 / 180:  60%|██████    | 180/300 [08:09<05:26,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698241710096.ta.chkpt" at 2023-10-25 21:48:30 after 180 attacks.







[Succeeded / Failed / Skipped / Total] 19 / 144 / 22 / 185:  62%|██████▏   | 185/300 [08:22<05:12,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698241723216.ta.chkpt" at 2023-10-25 21:48:43 after 185 attacks.







[Succeeded / Failed / Skipped / Total] 20 / 146 / 24 / 190:  63%|██████▎   | 190/300 [08:33<04:57,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698241734346.ta.chkpt" at 2023-10-25 21:48:54 after 190 attacks.







[Succeeded / Failed / Skipped / Total] 21 / 150 / 24 / 195:  65%|██████▌   | 195/300 [08:46<04:43,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698241747257.ta.chkpt" at 2023-10-25 21:49:07 after 195 attacks.
[Succeeded / Failed / Skipped / Total] 21 / 150 / 25 / 196:  65%|██████▌   | 196/300 [08:46<04:39,  2.69s/it]






[Succeeded / Failed / Skipped / Total] 22 / 153 / 25 / 200:  67%|██████▋   | 200/300 [09:01<04:30,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698241762477.ta.chkpt" at 2023-10-25 21:49:22 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 22 / 158 / 25 / 205:  68%|██████▊   | 205/300 [09:15<04:17,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698241776144.ta.chkpt" at 2023-10-25 21:49:36 after 205 attacks.







[Succeeded / Failed / Skipped / Total] 22 / 162 / 26 / 210:  70%|███████   | 210/300 [09:21<04:00,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698241782126.ta.chkpt" at 2023-10-25 21:49:42 after 210 attacks.







[Succeeded / Failed / Skipped / Total] 23 / 166 / 26 / 215:  72%|███████▏  | 215/300 [09:37<03:48,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698241798329.ta.chkpt" at 2023-10-25 21:49:58 after 215 attacks.







[Succeeded / Failed / Skipped / Total] 24 / 170 / 26 / 220:  73%|███████▎  | 220/300 [09:55<03:36,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698241816359.ta.chkpt" at 2023-10-25 21:50:16 after 220 attacks.







[Succeeded / Failed / Skipped / Total] 25 / 174 / 26 / 225:  75%|███████▌  | 225/300 [10:14<03:24,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698241835328.ta.chkpt" at 2023-10-25 21:50:35 after 225 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 178 / 26 / 230:  77%|███████▋  | 230/300 [10:29<03:11,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698241850673.ta.chkpt" at 2023-10-25 21:50:50 after 230 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 182 / 27 / 235:  78%|███████▊  | 235/300 [10:39<02:56,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698241860659.ta.chkpt" at 2023-10-25 21:51:00 after 235 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 187 / 27 / 240:  80%|████████  | 240/300 [10:47<02:41,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698241868120.ta.chkpt" at 2023-10-25 21:51:08 after 240 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 192 / 27 / 245:  82%|████████▏ | 245/300 [11:00<02:28,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698241881658.ta.chkpt" at 2023-10-25 21:51:21 after 245 attacks.







[Succeeded / Failed / Skipped / Total] 27 / 196 / 27 / 250:  83%|████████▎ | 250/300 [11:21<02:16,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698241902076.ta.chkpt" at 2023-10-25 21:51:42 after 250 attacks.







[Succeeded / Failed / Skipped / Total] 28 / 200 / 27 / 255:  85%|████████▌ | 255/300 [11:34<02:02,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698241915113.ta.chkpt" at 2023-10-25 21:51:55 after 255 attacks.







[Succeeded / Failed / Skipped / Total] 29 / 204 / 27 / 260:  87%|████████▋ | 260/300 [11:49<01:49,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698241930268.ta.chkpt" at 2023-10-25 21:52:10 after 260 attacks.







[Succeeded / Failed / Skipped / Total] 29 / 208 / 28 / 265:  88%|████████▊ | 265/300 [11:58<01:34,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698241939856.ta.chkpt" at 2023-10-25 21:52:19 after 265 attacks.







[Succeeded / Failed / Skipped / Total] 29 / 212 / 29 / 270:  90%|█████████ | 270/300 [12:18<01:22,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698241959210.ta.chkpt" at 2023-10-25 21:52:39 after 270 attacks.







[Succeeded / Failed / Skipped / Total] 29 / 216 / 30 / 275:  92%|█████████▏| 275/300 [12:31<01:08,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698241972476.ta.chkpt" at 2023-10-25 21:52:52 after 275 attacks.







[Succeeded / Failed / Skipped / Total] 29 / 221 / 30 / 280:  93%|█████████▎| 280/300 [12:43<00:54,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698241984208.ta.chkpt" at 2023-10-25 21:53:04 after 280 attacks.







[Succeeded / Failed / Skipped / Total] 30 / 225 / 30 / 285:  95%|█████████▌| 285/300 [13:01<00:41,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698242002160.ta.chkpt" at 2023-10-25 21:53:22 after 285 attacks.







[Succeeded / Failed / Skipped / Total] 31 / 229 / 30 / 290:  97%|█████████▋| 290/300 [13:17<00:27,  2.75s/it]textattack: Saving checkpoint under "checkpoints/1698242018703.ta.chkpt" at 2023-10-25 21:53:38 after 290 attacks.







[Succeeded / Failed / Skipped / Total] 31 / 234 / 30 / 295:  98%|█████████▊| 295/300 [13:36<00:13,  2.77s/it]textattack: Saving checkpoint under "checkpoints/1698242037289.ta.chkpt" at 2023-10-25 21:53:57 after 295 attacks.







[Succeeded / Failed / Skipped / Total] 32 / 237 / 31 / 300: 100%|██████████| 300/300 [13:45<00:00,  2.75s/it]textattack: Saving checkpoint under "checkpoints/1698242046077.ta.chkpt" at 2023-10-25 21:54:06 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 32 / 237 / 31 / 300: 100%|██████████| 300/300 [13:45<00:00,  2.75s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 32     |
| Number of failed attacks:     | 237    |
| Number of skipped attacks:    | 31     |
| Original accuracy:            | 89.67% |
| Accuracy under attack:        | 79.0%  |
| Attack success rate:          | 11.9%  |
| Average perturbed word %:     | 24.11% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 18.38  |
+-------------------------------+--------+





[<textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb61ad90>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2ba97fd50>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2ba5f4410>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb163a90>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x35aeed950>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x31879c390>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x3188ff350>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x3187bb410>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x3188e6e50>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x3188e6450>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3188b7910>,
 <textattack.attack_results.failed_attack_result.Failed

In [None]:
# BAE attack on Custom Trained Model
bae_attacker = create_bae_attacker(custom_trained_model_wrapper, dataset, num_examples=num_examples, nr=5)
bae_attacker.attack_dataset()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_5.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapMaskedLM(
    (method):  bae
    (masked_lm_name):  BertForMaskedLM
    (max_length):  512
    (max_candidates):  50
    (min_confidence):  0.0
  )
  (constraints): 
    (0): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (1): UniversalSentenceEncoder(
        (metric):  cosine
        (threshold):  0.936338023
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  True
      )
    (2): RepeatModification
    (3): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 79 / 8 / 13 / 100:  33%|███▎      | 100/300 [15:35<31:10,  9.35s/it]textattack: Saving checkpoint under "checkpoints/1698250678254.ta.chkpt" at 2023-10-26 00:17:58 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 160 / 15 / 25 / 200:  67%|██████▋   | 200/300 [30:07<15:03,  9.04s/it]textattack: Saving checkpoint under "checkpoints/1698251550509.ta.chkpt" at 2023-10-26 00:32:30 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 245 / 24 / 31 / 300: 100%|██████████| 300/300 [44:55<00:00,  8.98s/it]textattack: Saving checkpoint under "checkpoints/1698252438179.ta.chkpt" at 2023-10-26 00:47:18 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 245 / 24 / 31 / 300: 100%|██████████| 300/300 [44:55<00:00,  8.98s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 245    |
| Number of failed attacks:     | 24     |
| Number of skipped attacks:    | 31     |
| Original accuracy:            | 89.67% |
| Accuracy under attack:        | 8.0%   |
| Attack success rate:          | 91.08% |
| Average perturbed word %:     | 31.11% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 65.36  |
+-------------------------------+--------+





[<textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2bb641910>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2bb87f9d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x447db8490>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x35cee6090>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x35cc3fb50>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x321b4aa90>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x321b5b050>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3219c01d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3213c99d0>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x35cf33750>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult 

In [None]:
# Textfooler attack on Custom Trained Model
textfooler_attacker = create_textfooler_attacker(custom_trained_model_wrapper, dataset, num_examples=num_examples, nr=6)
textfooler_attacker.attack_dataset()

textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_6.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.5
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (2): UniversalSentenceEncoder(
        (metric):  angular
        (threshold):  0.840845057
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  False
      )
    (3): RepeatModification
    (4): StopwordModification
    (5): InputColumnModification(
        (matching_column_labels):  ['premise', 'hypothesis']
       

[Succeeded / Failed / Skipped / Total] 76 / 11 / 13 / 100:  33%|███▎      | 100/300 [18:29<36:58, 11.09s/it]textattack: Saving checkpoint under "checkpoints/1698253547906.ta.chkpt" at 2023-10-26 01:05:47 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 156 / 19 / 25 / 200:  67%|██████▋   | 200/300 [35:06<17:33, 10.53s/it]textattack: Saving checkpoint under "checkpoints/1698254545032.ta.chkpt" at 2023-10-26 01:22:25 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 241 / 28 / 31 / 300: 100%|██████████| 300/300 [52:46<00:00, 10.55s/it]textattack: Saving checkpoint under "checkpoints/1698255604472.ta.chkpt" at 2023-10-26 01:40:04 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 241 / 28 / 31 / 300: 100%|██████████| 300/300 [52:46<00:00, 10.55s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 241    |
| Number of failed attacks:     | 28     |
| Number of skipped attacks:    | 31     |
| Original accuracy:            | 89.67% |
| Accuracy under attack:        | 9.33%  |
| Attack success rate:          | 89.59% |
| Average perturbed word %:     | 31.85% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 82.65  |
+-------------------------------+--------+





[<textattack.attack_results.failed_attack_result.FailedAttackResult at 0x38f05c190>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x321a9b1d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3a60d81d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3219a55d0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x3a60d5390>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x35b0f8b90>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3219f4150>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3f8451c90>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x38f02a250>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x35cc05310>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x38f079a50>,

#### Custom Finetuned Model

In [None]:
custom_finetuned_model, tokenizer = load_model('custom_finetuned_model.bin')
custom_finetuned_model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(custom_finetuned_model, tokenizer)

In [None]:
# Custom attack on Custom Finetuned Model
custom_attacker = create_custom_attacker(custom_finetuned_model_wrapper, dataset, num_examples=num_examples, nr=8)
custom_attacker.attack_dataset()

textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_8.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.9
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): RepeatModification
    (2): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 0 / 5 / 0 / 5:   2%|▏         | 5/300 [00:11<11:13,  2.28s/it]textattack: Saving checkpoint under "checkpoints/1698242060508.ta.chkpt" at 2023-10-25 21:54:20 after 5 attacks.







[Succeeded / Failed / Skipped / Total] 0 / 9 / 1 / 10:   3%|▎         | 10/300 [00:25<12:26,  2.57s/it]textattack: Saving checkpoint under "checkpoints/1698242074810.ta.chkpt" at 2023-10-25 21:54:34 after 10 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 11 / 2 / 15:   5%|▌         | 15/300 [00:33<10:29,  2.21s/it]textattack: Saving checkpoint under "checkpoints/1698242082191.ta.chkpt" at 2023-10-25 21:54:42 after 15 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 16 / 2 / 20:   7%|▋         | 20/300 [00:53<12:33,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698242102874.ta.chkpt" at 2023-10-25 21:55:02 after 20 attacks.







[Succeeded / Failed / Skipped / Total] 2 / 21 / 2 / 25:   8%|▊         | 25/300 [01:12<13:19,  2.91s/it]textattack: Saving checkpoint under "checkpoints/1698242121731.ta.chkpt" at 2023-10-25 21:55:21 after 25 attacks.
[Succeeded / Failed / Skipped / Total] 2 / 21 / 3 / 26:   9%|▊         | 26/300 [01:12<12:47,  2.80s/it]






[Succeeded / Failed / Skipped / Total] 2 / 25 / 3 / 30:  10%|█         | 30/300 [01:21<12:14,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698242130668.ta.chkpt" at 2023-10-25 21:55:30 after 30 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 29 / 3 / 35:  12%|█▏        | 35/300 [01:33<11:51,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698242143082.ta.chkpt" at 2023-10-25 21:55:43 after 35 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 32 / 5 / 40:  13%|█▎        | 40/300 [01:44<11:20,  2.62s/it]textattack: Saving checkpoint under "checkpoints/1698242153758.ta.chkpt" at 2023-10-25 21:55:53 after 40 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 37 / 5 / 45:  15%|█▌        | 45/300 [02:01<11:29,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698242170812.ta.chkpt" at 2023-10-25 21:56:10 after 45 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 42 / 5 / 50:  17%|█▋        | 50/300 [02:20<11:43,  2.81s/it]textattack: Saving checkpoint under "checkpoints/1698242189708.ta.chkpt" at 2023-10-25 21:56:29 after 50 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 46 / 5 / 55:  18%|█▊        | 55/300 [02:33<11:25,  2.80s/it]textattack: Saving checkpoint under "checkpoints/1698242203035.ta.chkpt" at 2023-10-25 21:56:43 after 55 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 51 / 5 / 60:  20%|██        | 60/300 [02:45<11:00,  2.75s/it]textattack: Saving checkpoint under "checkpoints/1698242214285.ta.chkpt" at 2023-10-25 21:56:54 after 60 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 55 / 6 / 65:  22%|██▏       | 65/300 [02:54<10:29,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698242223143.ta.chkpt" at 2023-10-25 21:57:03 after 65 attacks.
[Succeeded / Failed / Skipped / Total] 4 / 55 / 7 / 66:  22%|██▏       | 66/300 [02:54<10:17,  2.64s/it]






[Succeeded / Failed / Skipped / Total] 5 / 57 / 8 / 70:  23%|██▎       | 70/300 [03:10<10:25,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698242239598.ta.chkpt" at 2023-10-25 21:57:19 after 70 attacks.
[Succeeded / Failed / Skipped / Total] 5 / 57 / 9 / 71:  24%|██▎       | 71/300 [03:10<10:15,  2.69s/it]






[Succeeded / Failed / Skipped / Total] 5 / 61 / 9 / 75:  25%|██▌       | 75/300 [03:24<10:14,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242253767.ta.chkpt" at 2023-10-25 21:57:33 after 75 attacks.







[Succeeded / Failed / Skipped / Total] 5 / 64 / 11 / 80:  27%|██▋       | 80/300 [03:36<09:55,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698242265608.ta.chkpt" at 2023-10-25 21:57:45 after 80 attacks.







[Succeeded / Failed / Skipped / Total] 6 / 68 / 11 / 85:  28%|██▊       | 85/300 [03:54<09:52,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698242283506.ta.chkpt" at 2023-10-25 21:58:03 after 85 attacks.







[Succeeded / Failed / Skipped / Total] 8 / 71 / 11 / 90:  30%|███       | 90/300 [04:09<09:41,  2.77s/it]textattack: Saving checkpoint under "checkpoints/1698242298487.ta.chkpt" at 2023-10-25 21:58:18 after 90 attacks.







[Succeeded / Failed / Skipped / Total] 9 / 74 / 12 / 95:  32%|███▏      | 95/300 [04:25<09:33,  2.80s/it]textattack: Saving checkpoint under "checkpoints/1698242314891.ta.chkpt" at 2023-10-25 21:58:34 after 95 attacks.







[Succeeded / Failed / Skipped / Total] 10 / 78 / 12 / 100:  33%|███▎      | 100/300 [04:38<09:17,  2.79s/it]textattack: Saving checkpoint under "checkpoints/1698242327630.ta.chkpt" at 2023-10-25 21:58:47 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 82 / 12 / 105:  35%|███▌      | 105/300 [05:00<09:18,  2.86s/it]textattack: Saving checkpoint under "checkpoints/1698242349808.ta.chkpt" at 2023-10-25 21:59:09 after 105 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 84 / 15 / 110:  37%|███▋      | 110/300 [05:09<08:54,  2.81s/it]textattack: Saving checkpoint under "checkpoints/1698242358425.ta.chkpt" at 2023-10-25 21:59:18 after 110 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 88 / 16 / 115:  38%|███▊      | 115/300 [05:19<08:34,  2.78s/it]textattack: Saving checkpoint under "checkpoints/1698242368874.ta.chkpt" at 2023-10-25 21:59:28 after 115 attacks.







[Succeeded / Failed / Skipped / Total] 12 / 91 / 17 / 120:  40%|████      | 120/300 [05:32<08:18,  2.77s/it]textattack: Saving checkpoint under "checkpoints/1698242381504.ta.chkpt" at 2023-10-25 21:59:41 after 120 attacks.







[Succeeded / Failed / Skipped / Total] 13 / 95 / 17 / 125:  42%|████▏     | 125/300 [05:44<08:02,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698242394024.ta.chkpt" at 2023-10-25 21:59:54 after 125 attacks.







[Succeeded / Failed / Skipped / Total] 14 / 99 / 17 / 130:  43%|████▎     | 130/300 [06:01<07:52,  2.78s/it]textattack: Saving checkpoint under "checkpoints/1698242410721.ta.chkpt" at 2023-10-25 22:00:10 after 130 attacks.







[Succeeded / Failed / Skipped / Total] 15 / 103 / 17 / 135:  45%|████▌     | 135/300 [06:15<07:38,  2.78s/it]textattack: Saving checkpoint under "checkpoints/1698242424128.ta.chkpt" at 2023-10-25 22:00:24 after 135 attacks.







[Succeeded / Failed / Skipped / Total] 15 / 108 / 17 / 140:  47%|████▋     | 140/300 [06:22<07:16,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242431421.ta.chkpt" at 2023-10-25 22:00:31 after 140 attacks.







[Succeeded / Failed / Skipped / Total] 15 / 113 / 17 / 145:  48%|████▊     | 145/300 [06:33<07:01,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698242443073.ta.chkpt" at 2023-10-25 22:00:43 after 145 attacks.







[Succeeded / Failed / Skipped / Total] 15 / 117 / 18 / 150:  50%|█████     | 150/300 [06:49<06:49,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242458715.ta.chkpt" at 2023-10-25 22:00:58 after 150 attacks.







[Succeeded / Failed / Skipped / Total] 16 / 121 / 18 / 155:  52%|█████▏    | 155/300 [07:04<06:37,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698242473531.ta.chkpt" at 2023-10-25 22:01:13 after 155 attacks.







[Succeeded / Failed / Skipped / Total] 16 / 126 / 18 / 160:  53%|█████▎    | 160/300 [07:22<06:27,  2.77s/it]textattack: Saving checkpoint under "checkpoints/1698242491854.ta.chkpt" at 2023-10-25 22:01:31 after 160 attacks.







[Succeeded / Failed / Skipped / Total] 17 / 129 / 19 / 165:  55%|█████▌    | 165/300 [07:32<06:10,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698242501836.ta.chkpt" at 2023-10-25 22:01:41 after 165 attacks.







[Succeeded / Failed / Skipped / Total] 17 / 134 / 19 / 170:  57%|█████▋    | 170/300 [07:49<05:58,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698242518300.ta.chkpt" at 2023-10-25 22:01:58 after 170 attacks.







[Succeeded / Failed / Skipped / Total] 17 / 139 / 19 / 175:  58%|█████▊    | 175/300 [08:05<05:46,  2.77s/it]textattack: Saving checkpoint under "checkpoints/1698242534172.ta.chkpt" at 2023-10-25 22:02:14 after 175 attacks.







[Succeeded / Failed / Skipped / Total] 17 / 144 / 19 / 180:  60%|██████    | 180/300 [08:12<05:28,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698242541924.ta.chkpt" at 2023-10-25 22:02:21 after 180 attacks.







[Succeeded / Failed / Skipped / Total] 17 / 148 / 20 / 185:  62%|██████▏   | 185/300 [08:23<05:12,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698242552256.ta.chkpt" at 2023-10-25 22:02:32 after 185 attacks.
[Succeeded / Failed / Skipped / Total] 17 / 148 / 21 / 186:  62%|██████▏   | 186/300 [08:23<05:08,  2.71s/it]






[Succeeded / Failed / Skipped / Total] 19 / 149 / 22 / 190:  63%|██████▎   | 190/300 [08:33<04:57,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698242562941.ta.chkpt" at 2023-10-25 22:02:42 after 190 attacks.







[Succeeded / Failed / Skipped / Total] 20 / 153 / 22 / 195:  65%|██████▌   | 195/300 [08:48<04:44,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698242577953.ta.chkpt" at 2023-10-25 22:02:57 after 195 attacks.







[Succeeded / Failed / Skipped / Total] 21 / 157 / 22 / 200:  67%|██████▋   | 200/300 [09:05<04:32,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242594230.ta.chkpt" at 2023-10-25 22:03:14 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 21 / 162 / 22 / 205:  68%|██████▊   | 205/300 [09:18<04:18,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242607735.ta.chkpt" at 2023-10-25 22:03:27 after 205 attacks.







[Succeeded / Failed / Skipped / Total] 21 / 166 / 23 / 210:  70%|███████   | 210/300 [09:24<04:01,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698242613507.ta.chkpt" at 2023-10-25 22:03:33 after 210 attacks.







[Succeeded / Failed / Skipped / Total] 22 / 170 / 23 / 215:  72%|███████▏  | 215/300 [09:40<03:49,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698242629430.ta.chkpt" at 2023-10-25 22:03:49 after 215 attacks.







[Succeeded / Failed / Skipped / Total] 22 / 175 / 23 / 220:  73%|███████▎  | 220/300 [09:57<03:37,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698242647072.ta.chkpt" at 2023-10-25 22:04:07 after 220 attacks.







[Succeeded / Failed / Skipped / Total] 22 / 180 / 23 / 225:  75%|███████▌  | 225/300 [10:16<03:25,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698242665603.ta.chkpt" at 2023-10-25 22:04:25 after 225 attacks.







[Succeeded / Failed / Skipped / Total] 22 / 185 / 23 / 230:  77%|███████▋  | 230/300 [10:31<03:12,  2.75s/it]textattack: Saving checkpoint under "checkpoints/1698242680936.ta.chkpt" at 2023-10-25 22:04:40 after 230 attacks.







[Succeeded / Failed / Skipped / Total] 22 / 189 / 24 / 235:  78%|███████▊  | 235/300 [10:41<02:57,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242690690.ta.chkpt" at 2023-10-25 22:04:50 after 235 attacks.
[Succeeded / Failed / Skipped / Total] 22 / 189 / 25 / 236:  79%|███████▊  | 236/300 [10:41<02:54,  2.72s/it]






[Succeeded / Failed / Skipped / Total] 22 / 193 / 25 / 240:  80%|████████  | 240/300 [10:47<02:41,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698242696827.ta.chkpt" at 2023-10-25 22:04:56 after 240 attacks.







[Succeeded / Failed / Skipped / Total] 23 / 197 / 25 / 245:  82%|████████▏ | 245/300 [11:00<02:28,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698242709994.ta.chkpt" at 2023-10-25 22:05:09 after 245 attacks.







[Succeeded / Failed / Skipped / Total] 23 / 202 / 25 / 250:  83%|████████▎ | 250/300 [11:22<02:16,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242731170.ta.chkpt" at 2023-10-25 22:05:31 after 250 attacks.







[Succeeded / Failed / Skipped / Total] 24 / 206 / 25 / 255:  85%|████████▌ | 255/300 [11:34<02:02,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698242743557.ta.chkpt" at 2023-10-25 22:05:43 after 255 attacks.







[Succeeded / Failed / Skipped / Total] 24 / 211 / 25 / 260:  87%|████████▋ | 260/300 [11:50<01:49,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242759395.ta.chkpt" at 2023-10-25 22:05:59 after 260 attacks.
[Succeeded / Failed / Skipped / Total] 24 / 211 / 26 / 261:  87%|████████▋ | 261/300 [11:50<01:46,  2.72s/it]






[Succeeded / Failed / Skipped / Total] 24 / 214 / 27 / 265:  88%|████████▊ | 265/300 [11:57<01:34,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698242766873.ta.chkpt" at 2023-10-25 22:06:06 after 265 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 217 / 27 / 270:  90%|█████████ | 270/300 [12:19<01:22,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698242789022.ta.chkpt" at 2023-10-25 22:06:29 after 270 attacks.
[Succeeded / Failed / Skipped / Total] 26 / 217 / 28 / 271:  90%|█████████ | 271/300 [12:20<01:19,  2.73s/it]






[Succeeded / Failed / Skipped / Total] 26 / 221 / 28 / 275:  92%|█████████▏| 275/300 [12:32<01:08,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698242801826.ta.chkpt" at 2023-10-25 22:06:41 after 275 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 226 / 28 / 280:  93%|█████████▎| 280/300 [12:44<00:54,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242813124.ta.chkpt" at 2023-10-25 22:06:53 after 280 attacks.







[Succeeded / Failed / Skipped / Total] 27 / 230 / 28 / 285:  95%|█████████▌| 285/300 [13:02<00:41,  2.74s/it]textattack: Saving checkpoint under "checkpoints/1698242831335.ta.chkpt" at 2023-10-25 22:07:11 after 285 attacks.







[Succeeded / Failed / Skipped / Total] 27 / 235 / 28 / 290:  97%|█████████▋| 290/300 [13:18<00:27,  2.75s/it]textattack: Saving checkpoint under "checkpoints/1698242847457.ta.chkpt" at 2023-10-25 22:07:27 after 290 attacks.







[Succeeded / Failed / Skipped / Total] 27 / 240 / 28 / 295:  98%|█████████▊| 295/300 [13:36<00:13,  2.77s/it]textattack: Saving checkpoint under "checkpoints/1698242865622.ta.chkpt" at 2023-10-25 22:07:45 after 295 attacks.







[Succeeded / Failed / Skipped / Total] 27 / 244 / 29 / 300: 100%|██████████| 300/300 [13:44<00:00,  2.75s/it]textattack: Saving checkpoint under "checkpoints/1698242874006.ta.chkpt" at 2023-10-25 22:07:54 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 27 / 244 / 29 / 300: 100%|██████████| 300/300 [13:44<00:00,  2.75s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 27     |
| Number of failed attacks:     | 244    |
| Number of skipped attacks:    | 29     |
| Original accuracy:            | 90.33% |
| Accuracy under attack:        | 81.33% |
| Attack success rate:          | 9.96%  |
| Average perturbed word %:     | 22.75% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 18.66  |
+-------------------------------+--------+





[<textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2cd45d450>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x31888b590>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x307ec31d0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb2d59d0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2cd506910>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x318edc390>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x318ecaf10>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x31874fcd0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x318ead690>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x2bb2d4310>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x318e7e210>,
 <textattack.attack_results.failed_attack_result.Failed

In [None]:
# BAE attack on Custom Finetuned Model
bae_attacker = create_bae_attacker(custom_finetuned_model_wrapper, dataset, num_examples=num_examples, nr=9)
bae_attacker.attack_dataset()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_9.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapMaskedLM(
    (method):  bae
    (masked_lm_name):  BertForMaskedLM
    (max_length):  512
    (max_candidates):  50
    (min_confidence):  0.0
  )
  (constraints): 
    (0): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (1): UniversalSentenceEncoder(
        (metric):  cosine
        (threshold):  0.936338023
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  True
      )
    (2): RepeatModification
    (3): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 83 / 5 / 12 / 100:  33%|███▎      | 100/300 [15:24<30:48,  9.24s/it]textattack: Saving checkpoint under "checkpoints/1698256613208.ta.chkpt" at 2023-10-26 01:56:53 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 168 / 10 / 22 / 200:  67%|██████▋   | 200/300 [30:09<15:04,  9.05s/it]textattack: Saving checkpoint under "checkpoints/1698257499039.ta.chkpt" at 2023-10-26 02:11:39 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 254 / 17 / 29 / 300: 100%|██████████| 300/300 [45:04<00:00,  9.02s/it]textattack: Saving checkpoint under "checkpoints/1698258393632.ta.chkpt" at 2023-10-26 02:26:33 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 254 / 17 / 29 / 300: 100%|██████████| 300/300 [45:04<00:00,  9.02s/it]






+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 254    |
| Number of failed attacks:     | 17     |
| Number of skipped attacks:    | 29     |
| Original accuracy:            | 90.33% |
| Accuracy under attack:        | 5.67%  |
| Attack success rate:          | 93.73% |
| Average perturbed word %:     | 30.56% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 61.2   |
+-------------------------------+--------+


[<textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3254f8bd0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2b3c38b90>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2b97d5e90>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2b71bae10>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x104e87490>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2b805ecd0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3a1036c90>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x324ad8990>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2b80e20d0>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x2b746a490>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult 

In [None]:
# Textfooler attack on Custom Finetuned Model
textfooler_attacker = create_textfooler_attacker(custom_finetuned_model_wrapper, dataset, num_examples=num_examples, nr=10)
textfooler_attacker.attack_dataset()

textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_10.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.5
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (2): UniversalSentenceEncoder(
        (metric):  angular
        (threshold):  0.840845057
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  False
      )
    (3): RepeatModification
    (4): StopwordModification
    (5): InputColumnModification(
        (matching_column_labels):  ['premise', 'hypothesis']
       

[Succeeded / Failed / Skipped / Total] 81 / 7 / 12 / 100:  33%|███▎      | 100/300 [17:58<35:57, 10.79s/it]textattack: Saving checkpoint under "checkpoints/1698259474766.ta.chkpt" at 2023-10-26 02:44:34 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 165 / 13 / 22 / 200:  67%|██████▋   | 200/300 [34:51<17:25, 10.46s/it]textattack: Saving checkpoint under "checkpoints/1698260487298.ta.chkpt" at 2023-10-26 03:01:27 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 249 / 22 / 29 / 300: 100%|██████████| 300/300 [52:34<00:00, 10.52s/it]textattack: Saving checkpoint under "checkpoints/1698261550736.ta.chkpt" at 2023-10-26 03:19:10 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 249 / 22 / 29 / 300: 100%|██████████| 300/300 [52:34<00:00, 10.52s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 249    |
| Number of failed attacks:     | 22     |
| Number of skipped attacks:    | 29     |
| Original accuracy:            | 90.33% |
| Accuracy under attack:        | 7.33%  |
| Attack success rate:          | 91.88% |
| Average perturbed word %:     | 29.4%  |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 79.11  |
+-------------------------------+--------+





[<textattack.attack_results.failed_attack_result.FailedAttackResult at 0x3a0dca790>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2968b0410>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3254f1850>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3a0dc8410>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x4383b7990>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3e9955f90>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3e9957590>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x4381ed750>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3936d5a50>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x3e9961410>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x438162e50>,

#### Textfooler Trained Model

In [None]:
textfooler_trained_model, tokenizer = load_model('textfooler_trained_model.bin')
textfooler_trained_model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(textfooler_trained_model, tokenizer)

In [None]:
# Custom attack on Textfooler Trained Model
custom_attacker = create_custom_attacker(textfooler_trained_model_wrapper, dataset, num_examples=num_examples, nr=12)
custom_attacker.attack_dataset()

textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_12.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.9
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): RepeatModification
    (2): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 0 / 5 / 0 / 5:   2%|▏         | 5/300 [00:11<10:58,  2.23s/it]textattack: Saving checkpoint under "checkpoints/1698242888113.ta.chkpt" at 2023-10-25 22:08:08 after 5 attacks.







[Succeeded / Failed / Skipped / Total] 0 / 9 / 1 / 10:   3%|▎         | 10/300 [00:25<12:18,  2.55s/it]textattack: Saving checkpoint under "checkpoints/1698242902404.ta.chkpt" at 2023-10-25 22:08:22 after 10 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 10 / 2 / 15:   5%|▌         | 15/300 [00:32<10:10,  2.14s/it]textattack: Saving checkpoint under "checkpoints/1698242909081.ta.chkpt" at 2023-10-25 22:08:29 after 15 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 15 / 2 / 20:   7%|▋         | 20/300 [00:53<12:26,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698242930270.ta.chkpt" at 2023-10-25 22:08:50 after 20 attacks.







[Succeeded / Failed / Skipped / Total] 3 / 20 / 2 / 25:   8%|▊         | 25/300 [01:12<13:18,  2.90s/it]textattack: Saving checkpoint under "checkpoints/1698242949505.ta.chkpt" at 2023-10-25 22:09:09 after 25 attacks.
[Succeeded / Failed / Skipped / Total] 3 / 20 / 3 / 26:   9%|▊         | 26/300 [01:12<12:46,  2.80s/it]






[Succeeded / Failed / Skipped / Total] 3 / 24 / 3 / 30:  10%|█         | 30/300 [01:21<12:16,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698242958753.ta.chkpt" at 2023-10-25 22:09:18 after 30 attacks.







[Succeeded / Failed / Skipped / Total] 4 / 28 / 3 / 35:  12%|█▏        | 35/300 [01:33<11:50,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698242970801.ta.chkpt" at 2023-10-25 22:09:30 after 35 attacks.







[Succeeded / Failed / Skipped / Total] 5 / 30 / 5 / 40:  13%|█▎        | 40/300 [01:44<11:18,  2.61s/it]textattack: Saving checkpoint under "checkpoints/1698242981298.ta.chkpt" at 2023-10-25 22:09:41 after 40 attacks.







[Succeeded / Failed / Skipped / Total] 7 / 33 / 5 / 45:  15%|█▌        | 45/300 [01:58<11:13,  2.64s/it]textattack: Saving checkpoint under "checkpoints/1698242995758.ta.chkpt" at 2023-10-25 22:09:55 after 45 attacks.







[Succeeded / Failed / Skipped / Total] 8 / 37 / 5 / 50:  17%|█▋        | 50/300 [02:15<11:19,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698243012928.ta.chkpt" at 2023-10-25 22:10:12 after 50 attacks.







[Succeeded / Failed / Skipped / Total] 9 / 41 / 5 / 55:  18%|█▊        | 55/300 [02:30<11:08,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698243027077.ta.chkpt" at 2023-10-25 22:10:27 after 55 attacks.







[Succeeded / Failed / Skipped / Total] 9 / 46 / 5 / 60:  20%|██        | 60/300 [02:42<10:48,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698243038986.ta.chkpt" at 2023-10-25 22:10:38 after 60 attacks.







[Succeeded / Failed / Skipped / Total] 9 / 50 / 6 / 65:  22%|██▏       | 65/300 [02:51<10:18,  2.63s/it]textattack: Saving checkpoint under "checkpoints/1698243048079.ta.chkpt" at 2023-10-25 22:10:48 after 65 attacks.
[Succeeded / Failed / Skipped / Total] 9 / 50 / 6 / 65:  22%|██▏       | 66/300 [02:51<10:07,  2.60s/it]






[Succeeded / Failed / Skipped / Total] 9 / 52 / 9 / 70:  23%|██▎       | 70/300 [03:03<10:04,  2.63s/it]textattack: Saving checkpoint under "checkpoints/1698243060825.ta.chkpt" at 2023-10-25 22:11:00 after 70 attacks.
[Succeeded / Failed / Skipped / Total] 9 / 52 / 10 / 71:  24%|██▎       | 71/300 [03:04<09:53,  2.59s/it]






[Succeeded / Failed / Skipped / Total] 10 / 55 / 10 / 75:  25%|██▌       | 75/300 [03:18<09:54,  2.64s/it]textattack: Saving checkpoint under "checkpoints/1698243075173.ta.chkpt" at 2023-10-25 22:11:15 after 75 attacks.







[Succeeded / Failed / Skipped / Total] 10 / 58 / 12 / 80:  27%|██▋       | 80/300 [03:30<09:38,  2.63s/it]textattack: Saving checkpoint under "checkpoints/1698243087462.ta.chkpt" at 2023-10-25 22:11:27 after 80 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 62 / 12 / 85:  28%|██▊       | 85/300 [03:50<09:43,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698243107664.ta.chkpt" at 2023-10-25 22:11:47 after 85 attacks.







[Succeeded / Failed / Skipped / Total] 11 / 66 / 13 / 90:  30%|███       | 90/300 [04:02<09:25,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698243119170.ta.chkpt" at 2023-10-25 22:11:59 after 90 attacks.







[Succeeded / Failed / Skipped / Total] 12 / 70 / 13 / 95:  32%|███▏      | 95/300 [04:22<09:25,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698243138982.ta.chkpt" at 2023-10-25 22:12:18 after 95 attacks.







[Succeeded / Failed / Skipped / Total] 13 / 74 / 13 / 100:  33%|███▎      | 100/300 [04:36<09:12,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698243153270.ta.chkpt" at 2023-10-25 22:12:33 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 14 / 77 / 14 / 105:  35%|███▌      | 105/300 [04:49<08:57,  2.76s/it]textattack: Saving checkpoint under "checkpoints/1698243166608.ta.chkpt" at 2023-10-25 22:12:46 after 105 attacks.







[Succeeded / Failed / Skipped / Total] 14 / 79 / 17 / 110:  37%|███▋      | 110/300 [04:58<08:35,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698243175599.ta.chkpt" at 2023-10-25 22:12:55 after 110 attacks.







[Succeeded / Failed / Skipped / Total] 14 / 83 / 18 / 115:  38%|███▊      | 115/300 [05:09<08:18,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698243186561.ta.chkpt" at 2023-10-25 22:13:06 after 115 attacks.
[Succeeded / Failed / Skipped / Total] 14 / 83 / 19 / 116:  39%|███▊      | 116/300 [05:09<08:11,  2.67s/it]






[Succeeded / Failed / Skipped / Total] 14 / 86 / 20 / 120:  40%|████      | 120/300 [05:21<08:02,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698243198399.ta.chkpt" at 2023-10-25 22:13:18 after 120 attacks.







[Succeeded / Failed / Skipped / Total] 14 / 91 / 20 / 125:  42%|████▏     | 125/300 [05:36<07:50,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698243213110.ta.chkpt" at 2023-10-25 22:13:33 after 125 attacks.







[Succeeded / Failed / Skipped / Total] 15 / 95 / 20 / 130:  43%|████▎     | 130/300 [05:52<07:41,  2.72s/it]textattack: Saving checkpoint under "checkpoints/1698243229947.ta.chkpt" at 2023-10-25 22:13:49 after 130 attacks.







[Succeeded / Failed / Skipped / Total] 15 / 99 / 21 / 135:  45%|████▌     | 135/300 [06:04<07:25,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698243241293.ta.chkpt" at 2023-10-25 22:14:01 after 135 attacks.







[Succeeded / Failed / Skipped / Total] 16 / 103 / 21 / 140:  47%|████▋     | 140/300 [06:10<07:03,  2.65s/it]textattack: Saving checkpoint under "checkpoints/1698243247506.ta.chkpt" at 2023-10-25 22:14:07 after 140 attacks.







[Succeeded / Failed / Skipped / Total] 16 / 108 / 21 / 145:  48%|████▊     | 145/300 [06:22<06:49,  2.64s/it]textattack: Saving checkpoint under "checkpoints/1698243259638.ta.chkpt" at 2023-10-25 22:14:19 after 145 attacks.







[Succeeded / Failed / Skipped / Total] 16 / 112 / 22 / 150:  50%|█████     | 150/300 [06:38<06:38,  2.66s/it]textattack: Saving checkpoint under "checkpoints/1698243275702.ta.chkpt" at 2023-10-25 22:14:35 after 150 attacks.







[Succeeded / Failed / Skipped / Total] 17 / 116 / 22 / 155:  52%|█████▏    | 155/300 [06:54<06:27,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698243291277.ta.chkpt" at 2023-10-25 22:14:51 after 155 attacks.







[Succeeded / Failed / Skipped / Total] 17 / 121 / 22 / 160:  53%|█████▎    | 160/300 [07:13<06:19,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698243310286.ta.chkpt" at 2023-10-25 22:15:10 after 160 attacks.







[Succeeded / Failed / Skipped / Total] 19 / 123 / 23 / 165:  55%|█████▌    | 165/300 [07:23<06:02,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698243320319.ta.chkpt" at 2023-10-25 22:15:20 after 165 attacks.
[Succeeded / Failed / Skipped / Total] 19 / 123 / 24 / 166:  55%|█████▌    | 166/300 [07:23<05:58,  2.67s/it]






[Succeeded / Failed / Skipped / Total] 19 / 127 / 24 / 170:  57%|█████▋    | 170/300 [07:36<05:48,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698243333305.ta.chkpt" at 2023-10-25 22:15:33 after 170 attacks.







[Succeeded / Failed / Skipped / Total] 19 / 132 / 24 / 175:  58%|█████▊    | 175/300 [07:52<05:37,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698243349805.ta.chkpt" at 2023-10-25 22:15:49 after 175 attacks.







[Succeeded / Failed / Skipped / Total] 19 / 137 / 24 / 180:  60%|██████    | 180/300 [08:00<05:20,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698243357859.ta.chkpt" at 2023-10-25 22:15:57 after 180 attacks.







[Succeeded / Failed / Skipped / Total] 20 / 141 / 24 / 185:  62%|██████▏   | 185/300 [08:13<05:07,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698243370939.ta.chkpt" at 2023-10-25 22:16:10 after 185 attacks.
[Succeeded / Failed / Skipped / Total] 20 / 141 / 25 / 186:  62%|██████▏   | 186/300 [08:14<05:02,  2.66s/it]






[Succeeded / Failed / Skipped / Total] 21 / 143 / 26 / 190:  63%|██████▎   | 190/300 [08:25<04:52,  2.66s/it]textattack: Saving checkpoint under "checkpoints/1698243382604.ta.chkpt" at 2023-10-25 22:16:22 after 190 attacks.







[Succeeded / Failed / Skipped / Total] 23 / 146 / 26 / 195:  65%|██████▌   | 195/300 [08:40<04:40,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698243396981.ta.chkpt" at 2023-10-25 22:16:36 after 195 attacks.
[Succeeded / Failed / Skipped / Total] 23 / 146 / 27 / 196:  65%|██████▌   | 196/300 [08:40<04:36,  2.65s/it]






[Succeeded / Failed / Skipped / Total] 24 / 149 / 27 / 200:  67%|██████▋   | 200/300 [08:54<04:27,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698243411269.ta.chkpt" at 2023-10-25 22:16:51 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 152 / 27 / 205:  68%|██████▊   | 205/300 [09:07<04:13,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698243424071.ta.chkpt" at 2023-10-25 22:17:04 after 205 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 156 / 28 / 210:  70%|███████   | 210/300 [09:13<03:57,  2.63s/it]textattack: Saving checkpoint under "checkpoints/1698243430198.ta.chkpt" at 2023-10-25 22:17:10 after 210 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 161 / 28 / 215:  72%|███████▏  | 215/300 [09:30<03:45,  2.65s/it]textattack: Saving checkpoint under "checkpoints/1698243447101.ta.chkpt" at 2023-10-25 22:17:27 after 215 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 166 / 28 / 220:  73%|███████▎  | 220/300 [09:48<03:34,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698243465875.ta.chkpt" at 2023-10-25 22:17:45 after 220 attacks.







[Succeeded / Failed / Skipped / Total] 26 / 171 / 28 / 225:  75%|███████▌  | 225/300 [10:08<03:22,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698243485661.ta.chkpt" at 2023-10-25 22:18:05 after 225 attacks.







[Succeeded / Failed / Skipped / Total] 27 / 175 / 28 / 230:  77%|███████▋  | 230/300 [10:23<03:09,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698243500935.ta.chkpt" at 2023-10-25 22:18:20 after 230 attacks.







[Succeeded / Failed / Skipped / Total] 27 / 180 / 28 / 235:  78%|███████▊  | 235/300 [10:35<02:55,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698243512099.ta.chkpt" at 2023-10-25 22:18:32 after 235 attacks.







[Succeeded / Failed / Skipped / Total] 27 / 184 / 29 / 240:  80%|████████  | 240/300 [10:41<02:40,  2.67s/it]textattack: Saving checkpoint under "checkpoints/1698243518419.ta.chkpt" at 2023-10-25 22:18:38 after 240 attacks.







[Succeeded / Failed / Skipped / Total] 27 / 189 / 29 / 245:  82%|████████▏ | 245/300 [10:55<02:27,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698243532732.ta.chkpt" at 2023-10-25 22:18:52 after 245 attacks.







[Succeeded / Failed / Skipped / Total] 30 / 191 / 29 / 250:  83%|████████▎ | 250/300 [11:14<02:14,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698243551277.ta.chkpt" at 2023-10-25 22:19:11 after 250 attacks.







[Succeeded / Failed / Skipped / Total] 31 / 195 / 29 / 255:  85%|████████▌ | 255/300 [11:27<02:01,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698243564760.ta.chkpt" at 2023-10-25 22:19:24 after 255 attacks.







[Succeeded / Failed / Skipped / Total] 31 / 199 / 30 / 260:  87%|████████▋ | 260/300 [11:40<01:47,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698243577476.ta.chkpt" at 2023-10-25 22:19:37 after 260 attacks.







[Succeeded / Failed / Skipped / Total] 31 / 202 / 32 / 265:  88%|████████▊ | 265/300 [11:48<01:33,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698243585935.ta.chkpt" at 2023-10-25 22:19:45 after 265 attacks.







[Succeeded / Failed / Skipped / Total] 33 / 205 / 32 / 270:  90%|█████████ | 270/300 [12:07<01:20,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698243604209.ta.chkpt" at 2023-10-25 22:20:04 after 270 attacks.







[Succeeded / Failed / Skipped / Total] 34 / 208 / 33 / 275:  92%|█████████▏| 275/300 [12:19<01:07,  2.69s/it]textattack: Saving checkpoint under "checkpoints/1698243616891.ta.chkpt" at 2023-10-25 22:20:16 after 275 attacks.







[Succeeded / Failed / Skipped / Total] 35 / 212 / 33 / 280:  93%|█████████▎| 280/300 [12:31<00:53,  2.68s/it]textattack: Saving checkpoint under "checkpoints/1698243628701.ta.chkpt" at 2023-10-25 22:20:28 after 280 attacks.







[Succeeded / Failed / Skipped / Total] 36 / 216 / 33 / 285:  95%|█████████▌| 285/300 [12:49<00:40,  2.70s/it]textattack: Saving checkpoint under "checkpoints/1698243646915.ta.chkpt" at 2023-10-25 22:20:46 after 285 attacks.







[Succeeded / Failed / Skipped / Total] 37 / 220 / 33 / 290:  97%|█████████▋| 290/300 [13:06<00:27,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698243663807.ta.chkpt" at 2023-10-25 22:21:03 after 290 attacks.







[Succeeded / Failed / Skipped / Total] 37 / 225 / 33 / 295:  98%|█████████▊| 295/300 [13:25<00:13,  2.73s/it]textattack: Saving checkpoint under "checkpoints/1698243682670.ta.chkpt" at 2023-10-25 22:21:22 after 295 attacks.







[Succeeded / Failed / Skipped / Total] 37 / 229 / 34 / 300: 100%|██████████| 300/300 [13:34<00:00,  2.71s/it]textattack: Saving checkpoint under "checkpoints/1698243691419.ta.chkpt" at 2023-10-25 22:21:31 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 37 / 229 / 34 / 300: 100%|██████████| 300/300 [13:34<00:00,  2.71s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 37     |
| Number of failed attacks:     | 229    |
| Number of skipped attacks:    | 34     |
| Original accuracy:            | 88.67% |
| Accuracy under attack:        | 76.33% |
| Attack success rate:          | 13.91% |
| Average perturbed word %:     | 23.73% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 18.01  |
+-------------------------------+--------+





[<textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb2f8990>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x318927750>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x3188897d0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x3190942d0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2bb15a8d0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x307ef3810>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x31b0a9710>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x30775ccd0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x2cd16e290>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x2cd16d990>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2cd16c850>,
 <textattack.attack_results.successful_attack_result.Su

In [None]:
# BAE attack on Textfooler Trained Model
bae_attacker = create_bae_attacker(textfooler_trained_model_wrapper, dataset, num_examples=num_examples, nr=13)
bae_attacker.attack_dataset()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_13.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapMaskedLM(
    (method):  bae
    (masked_lm_name):  BertForMaskedLM
    (max_length):  512
    (max_candidates):  50
    (min_confidence):  0.0
  )
  (constraints): 
    (0): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (1): UniversalSentenceEncoder(
        (metric):  cosine
        (threshold):  0.936338023
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  True
      )
    (2): RepeatModification
    (3): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 80 / 7 / 13 / 100:  33%|███▎      | 100/300 [14:51<29:43,  8.92s/it]textattack: Saving checkpoint under "checkpoints/1698262447390.ta.chkpt" at 2023-10-26 03:34:07 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 162 / 11 / 27 / 200:  67%|██████▋   | 200/300 [29:42<14:51,  8.91s/it]textattack: Saving checkpoint under "checkpoints/1698263337837.ta.chkpt" at 2023-10-26 03:48:57 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 250 / 16 / 34 / 300: 100%|██████████| 300/300 [44:58<00:00,  9.00s/it]textattack: Saving checkpoint under "checkpoints/1698264254363.ta.chkpt" at 2023-10-26 04:04:14 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 250 / 16 / 34 / 300: 100%|██████████| 300/300 [44:58<00:00,  9.00s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 250    |
| Number of failed attacks:     | 16     |
| Number of skipped attacks:    | 34     |
| Original accuracy:            | 88.67% |
| Accuracy under attack:        | 5.33%  |
| Attack success rate:          | 93.98% |
| Average perturbed word %:     | 33.31% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 64.07  |
+-------------------------------+--------+





[<textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2b81f8f50>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3e9960310>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3936d7f50>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3936bb610>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3e975f490>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2b80f1a50>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2b8512150>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x2b86223d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x4390dd990>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x3936a7ed0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult 

In [None]:
# Textfooler attack on Textfooler Trained Model
textfooler_attacker = create_textfooler_attacker(textfooler_trained_model_wrapper, dataset, num_examples=num_examples, nr=14)
textfooler_attacker.attack_dataset()

textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_14.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.5
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (2): UniversalSentenceEncoder(
        (metric):  angular
        (threshold):  0.840845057
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  False
      )
    (3): RepeatModification
    (4): StopwordModification
    (5): InputColumnModification(
        (matching_column_labels):  ['premise', 'hypothesis']
       

[Succeeded / Failed / Skipped / Total] 75 / 13 / 12 / 100:  33%|███▎      | 100/300 [20:22<40:45, 12.23s/it]textattack: Saving checkpoint under "checkpoints/1698294559042.ta.chkpt" at 2023-10-26 12:29:19 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 152 / 23 / 25 / 200:  67%|██████▋   | 200/300 [37:09<18:34, 11.15s/it]textattack: Saving checkpoint under "checkpoints/1698295565712.ta.chkpt" at 2023-10-26 12:46:05 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 235 / 31 / 34 / 300: 100%|██████████| 300/300 [54:51<00:00, 10.97s/it]textattack: Saving checkpoint under "checkpoints/1698296628062.ta.chkpt" at 2023-10-26 13:03:48 after 300 attacks.
[Succeeded / Failed / Skipped / Total] 235 / 31 / 34 / 300: 100%|██████████| 300/300 [54:51<00:00, 10.97s/it]





+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 235    |
| Number of failed attacks:     | 31     |
| Number of skipped attacks:    | 34     |
| Original accuracy:            | 88.67% |
| Accuracy under attack:        | 10.33% |
| Attack success rate:          | 88.35% |
| Average perturbed word %:     | 28.58% |
| Average num. words per input: | 8.53   |
| Avg num queries:              | 78.24  |
+-------------------------------+--------+





[<textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x31717e550>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x325cc7f10>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x3259e8190>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x325be2450>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x317134990>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x325f01990>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x325bda0d0>,
 <textattack.attack_results.skipped_attack_result.SkippedAttackResult at 0x325aae690>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x325f98190>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x325bfdf10>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x325bfe410>,
 <tex

## Standard Evaluation of Models

In [21]:
def test_model_on_data(model, data_loader, data_frame, batch_size, device):
    model.eval()  # Set the model to evaluation mode
    correct_predictions = 0
    total_predictions = 0
    misclassified_entries = []

    # Extract test sentences and labels from the dataframe
    test_sentences = data_frame['text'].tolist()
    test_labels = data_frame['label'].tolist()

    with torch.no_grad():
        for batch_num, batch in enumerate(tqdm(data_loader, desc='Testing', dynamic_ncols=True), 0): # Starting index at 0 for batch_num
            input_ids, attention_mask, labels = [item.to(device) for item in batch]
            outputs = model(input_ids, attention_mask=attention_mask)[0]
            _, predicted = torch.max(outputs.data, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

            # Identify misclassified sentences
            misclassifications = (predicted != labels).nonzero(as_tuple=True)[0].cpu().numpy()
            for idx in misclassifications:
                global_index = idx + batch_num * batch_size  # Calculate global index in test set
                misclassified_entries.append({
                    "index": global_index,
                    "sentence": test_sentences[global_index],
                    "true_label": 'offensive/hatespeech' if test_labels[global_index] == 1 else 'not offensive/hatespeech',
                    "predicted_label": 'offensive/hatespeech' if predicted[idx].item() == 1 else 'not offensive/hatespeech'
                })

    # Display test accuracy
    accuracy = 100 * correct_predictions / total_predictions
    print(f'Test Accuracy: {accuracy:.2f}%')

    # Display misclassified sentences
    #print("\nMisclassified Sentences:")
    #for entry in misclassified_entries:
    #    print(f"Index: {entry['index']}, Sentence: {entry['sentence']}, True Label: {entry['true_label']}, Predicted Label: {entry['predicted_label']}")



In [22]:
# Convert labels to tensors
train_labels_tensor = torch.tensor(train_data['label'].values, dtype=torch.long)
validation_labels_tensor = torch.tensor(validation_data['label'].values, dtype=torch.long)
test_labels_tensor = torch.tensor(test_data['label'].values, dtype=torch.long)

train_dataset = TensorDataset(train_tokens['input_ids'], train_tokens['attention_mask'], train_labels_tensor)
validation_dataset = TensorDataset(validation_tokens['input_ids'], validation_tokens['attention_mask'], validation_labels_tensor)
test_dataset = TensorDataset(test_tokens['input_ids'], test_tokens['attention_mask'], test_labels_tensor)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [23]:
initial_hate_speech_model, tokenizer = load_model('initial_hate_speech_model.bin')
custom_trained_model, tokenizer = load_model('custom_trained_model.bin')
custom_finetuned_model, tokenizer = load_model('custom_finetuned_model.bin')
textfooler_trained_model, tokenizer = load_model('textfooler_trained_model.bin')
test_model_on_data(initial_hate_speech_model, test_dataloader, test_data, BATCH_SIZE, device)

test_model_on_data(custom_trained_model, test_dataloader, test_data, BATCH_SIZE, device)

test_model_on_data(custom_finetuned_model, test_dataloader, test_data, BATCH_SIZE, device)

test_model_on_data(textfooler_trained_model, test_dataloader, test_data, BATCH_SIZE, device)

Testing: 100%|██████████| 21/21 [00:04<00:00,  4.62it/s]


Test Accuracy: 91.30%


Testing: 100%|██████████| 21/21 [00:04<00:00,  4.58it/s]


Test Accuracy: 91.46%


Testing: 100%|██████████| 21/21 [00:04<00:00,  4.83it/s]


Test Accuracy: 91.15%


Testing: 100%|██████████| 21/21 [00:04<00:00,  4.81it/s]

Test Accuracy: 90.68%



