Skip to content

Commit

Permalink
Merge pull request #88 from QData/kuleshov
Browse files Browse the repository at this point in the history
add Kuleshov recipe
  • Loading branch information
jxmorris12 committed May 8, 2020
2 parents 983498b + dd4eda3 commit 53a5c3a
Show file tree
Hide file tree
Showing 60 changed files with 396 additions and 92 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ The first are for classification and entailment attacks:
- **alz-adjusted**: Alzantot's attack adjusted to follow the same constraints as tf-adjusted such that the only difference is the search method.
- **deepwordbug**: Replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers"](https://arxiv.org/abs/1801.04354)).
- **hotflip**: Beam search and gradient-based word swap (["HotFlip: White-Box Adversarial Examples for Text Classification"](https://arxiv.org/abs/1712.06751)
- **kuleshov**: Greedy search and counterfitted embedding swap (["Adversarial Examples for Natural Language Classification Problems"](https://openreview.net/pdf?id=r1QZ3zbAZ)

The final is for translation attacks:
- **seq2sick**: Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper (["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples"](https://arxiv.org/abs/1803.01128)).
Expand Down
3 changes: 2 additions & 1 deletion textattack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

from . import attack_recipes
from . import attack_results
from . import attack_methods
from . import augmentation

from . import constraints
from . import datasets
from . import goal_functions
from . import goal_function_results
from . import loggers
from . import models
from . import search_methods
from . import shared
from . import tokenizers
from . import transformations
1 change: 1 addition & 0 deletions textattack/attack_recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .alzantot_2018_adjusted import Alzantot2018Adjusted
from .deepwordbug_gao_2018 import DeepWordBugGao2018
from .hotflip_ebrahimi_2017 import HotFlipEbrahimi2017
from .kuleshov_2017 import Kuleshov2017
from .seq2sick_cheng_2018_blackbox import Seq2SickCheng2018BlackBox
from .textfooler_jin_2019 import TextFoolerJin2019
from .textfooler_jin_2019_adjusted import TextFoolerJin2019Adjusted
10 changes: 5 additions & 5 deletions textattack/attack_recipes/alzantot_2018.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
ArXiv, abs/1801.00554.
"""

from textattack.attack_methods import GeneticAlgorithm
from textattack.constraints.overlap import WordsPerturbed
from textattack.constraints.grammaticality.language_models import Google1BillionWordsLanguageModel
from textattack.constraints.semantics import WordEmbeddingDistance
from textattack.constraints.semantics.language_models import GoogleLanguageModel
from textattack.transformations.black_box import WordSwapEmbedding
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GeneticAlgorithm
from textattack.transformations import WordSwapEmbedding

def Alzantot2018(model):
#
Expand All @@ -30,7 +30,7 @@ def Alzantot2018(model):
# Maximum words perturbed percentage of 20%
#
constraints.append(
WordsPerturbed(max_percent=20)
WordsPerturbed(max_percent=0.2)
)
#
# Maximum word embedding euclidean distance of 0.5.
Expand All @@ -42,7 +42,7 @@ def Alzantot2018(model):
# Language Model
#
constraints.append(
GoogleLanguageModel(top_n_per_index=4)
Google1BillionWordsLanguageModel(top_n_per_index=4)
)
#
# Goal is untargeted classification
Expand Down
6 changes: 3 additions & 3 deletions textattack/attack_recipes/alzantot_2018_adjusted.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
ArXiv, abs/1801.00554.
"""

from textattack.attack_methods import GeneticAlgorithm
from textattack.constraints.grammaticality import PartOfSpeech, LanguageTool
from textattack.constraints.semantics import WordEmbeddingDistance
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder, BERT
from textattack.constraints.syntax import PartOfSpeech, LanguageTool
from textattack.transformations.black_box import WordSwapEmbedding
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GeneticAlgorithm
from textattack.transformations import WordSwapEmbedding

def Alzantot2018Adjusted(model, SE_thresh=0.98, sentence_encoder='bert'):
#
Expand Down
4 changes: 2 additions & 2 deletions textattack/attack_recipes/deepwordbug_gao_2018.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
"""

from textattack.attack_methods import GreedyWordSwapWIR
from textattack.constraints.overlap import LevenshteinEditDistance
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedyWordSwapWIR
from textattack.transformations import CompositeTransformation
from textattack.transformations.black_box import \
from textattack.transformations import \
WordSwapNeighboringCharacterSwap, \
WordSwapRandomCharacterDeletion, WordSwapRandomCharacterInsertion, \
WordSwapRandomCharacterSubstitution, WordSwapNeighboringCharacterSwap
Expand Down
8 changes: 4 additions & 4 deletions textattack/attack_recipes/hotflip_ebrahimi_2017.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
paper).
"""

from textattack.attack_methods import BeamSearch
from textattack.goal_functions import UntargetedClassification
from textattack.constraints.grammaticality import PartOfSpeech
from textattack.constraints.overlap import WordsPerturbed
from textattack.constraints.semantics import WordEmbeddingDistance
from textattack.constraints.syntax import PartOfSpeech
from textattack.transformations.white_box import GradientBasedWordSwap
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import BeamSearch
from textattack.transformations import GradientBasedWordSwap

def HotFlipEbrahimi2017(model):
#
Expand Down
61 changes: 61 additions & 0 deletions textattack/attack_recipes/kuleshov_2017.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
Kuleshov, V. et al.
Generating Natural Language Adversarial Examples.
https://openreview.net/pdf?id=r1QZ3zbAZ.
"""

from textattack.constraints.overlap import WordsPerturbed
from textattack.constraints.grammaticality.language_models import GPT2
from textattack.constraints.semantics.sentence_encoders import ThoughtVector
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedyWordSwap
from textattack.transformations import WordSwapEmbedding

def Kuleshov2017(model):
#
# "Specifically, in all experiments, we used a target of τ = 0.7,
# a neighborhood size of N = 15, and parameters λ_1 = 0.2 and δ = 0.5; we set
# the syntactic bound to λ_2 = 2 nats for sentiment analysis"

#
# Word swap with top-15 counter-fitted embedding neighbors.
#
transformation = WordSwapEmbedding(max_candidates=15)
#
# Maximum of 50% of words perturbed (δ in the paper).
#
constraints = []
constraints.append(
WordsPerturbed(max_percent=0.5)
)
#
# Maximum thought vector Euclidean distance of λ_1 = 0.2. (eq. 4)
#
constraints.append(
ThoughtVector(embedding_type='paragramcf', threshold=0.2, metric='max_euclidean')
)
#
#
# Maximum language model log-probability difference of λ_2 = 2. (eq. 5)
#
constraints.append(
GPT2(max_log_prob_diff=2.0)
)
#
# Goal is untargeted classification: reduce original probability score
# to below τ = 0.7 (Algorithm 1).
#
goal_function = UntargetedClassification(model, target_max_score=0.7)
#
# Perform word substitution with a genetic algorithm.
#
attack = GreedyWordSwap(goal_function, constraints=constraints,
transformation=transformation)

return attack


# GPT2(max_log_prob_diff=2)
4 changes: 2 additions & 2 deletions textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
"""

from textattack.attack_methods import GreedyWordSwapWIR
from textattack.constraints.overlap import LevenshteinEditDistance
from textattack.goal_functions import NonOverlappingOutput
from textattack.transformations.black_box import WordSwapEmbedding
from textattack.search_methods import GreedyWordSwapWIR
from textattack.transformations import WordSwapEmbedding

def Seq2SickCheng2018BlackBox(model, goal_function='non_overlapping'):
#
Expand Down
8 changes: 4 additions & 4 deletions textattack/attack_recipes/textfooler_jin_2019.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
"""

from textattack.attack_methods import GreedyWordSwapWIR
from textattack.goal_functions import UntargetedClassification
from textattack.constraints.grammaticality import PartOfSpeech
from textattack.constraints.semantics import WordEmbeddingDistance
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder
from textattack.constraints.syntax import PartOfSpeech
from textattack.transformations.black_box import WordSwapEmbedding
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedyWordSwapWIR
from textattack.transformations import WordSwapEmbedding

def TextFoolerJin2019(model):
#
Expand Down
6 changes: 3 additions & 3 deletions textattack/attack_recipes/textfooler_jin_2019_adjusted.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
"""

from textattack.attack_methods import GreedyWordSwapWIR
from textattack.constraints.semantics import WordEmbeddingDistance
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder, BERT
from textattack.constraints.syntax import PartOfSpeech, LanguageTool
from textattack.transformations.black_box import WordSwapEmbedding
from textattack.constraints.grammaticality import PartOfSpeech, LanguageTool
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedyWordSwapWIR
from textattack.transformations import WordSwapEmbedding

def TextFoolerJin2019Adjusted(model, SE_thresh=0.98, sentence_encoder='bert'):
#
Expand Down
6 changes: 3 additions & 3 deletions textattack/augmentation/recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
class WordNetAugmenter(Augmenter):
""" Augments text by replacing with synonyms from the WordNet thesaurus. """
def __init__(self):
from textattack.transformations.black_box import WordSwapWordNet
from textattack.transformations import WordSwapWordNet
transformation = WordSwapWordNet()
super().__init__(transformation, constraints=[])


class EmbeddingAugmenter(Augmenter):
""" Augments text by transforming words with their embeddings. """
def __init__(self):
from textattack.transformations.black_box import WordSwapEmbedding
from textattack.transformations import WordSwapEmbedding
transformation = WordSwapEmbedding(
max_candidates=50, embedding_type='paragramcf'
)
Expand All @@ -28,7 +28,7 @@ class CharSwapAugmenter(Augmenter):
""" Augments words by swapping characters out for other characters. """
def __init__(self):
from textattack.transformations import CompositeTransformation
from textattack.transformations.black_box import \
from textattack.transformations import \
WordSwapNeighboringCharacterSwap, \
WordSwapRandomCharacterDeletion, WordSwapRandomCharacterInsertion, \
WordSwapRandomCharacterSubstitution, WordSwapNeighboringCharacterSwap
Expand Down
2 changes: 1 addition & 1 deletion textattack/constraints/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .constraint import Constraint

from . import grammaticality
from . import semantics
from . import syntax
4 changes: 4 additions & 0 deletions textattack/constraints/grammaticality/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from . import language_models

from .language_tool import LanguageTool
from .part_of_speech import PartOfSpeech
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .google_language_model import Google1BillionWordsLanguageModel
from .gpt2 import GPT2
from .language_model_constraint import LanguageModelConstraint
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .google_language_model import GoogleLanguageModel as Google1BillionWordsLanguageModel
51 changes: 51 additions & 0 deletions textattack/constraints/grammaticality/language_models/gpt2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import torch
from textattack.shared import utils
from transformers import GPT2Tokenizer, GPT2LMHeadModel

from .language_model_constraint import LanguageModelConstraint

class GPT2(LanguageModelConstraint):
""" A constraint based on the GPT-2 language model.
from "Better Language Models and Their Implications"
(openai.com/blog/better-language-models/)
"""
def __init__(self, **kwargs):
self.model = GPT2LMHeadModel.from_pretrained('gpt2')
self.model.to(utils.get_device())
self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
super().__init__(**kwargs)

def get_log_probs_at_index(self, tokenized_text_list, word_index):
""" Gets the probability of the word at index `word_index` according
to GPT-2. Assumes that all items in `tokenized_text_list`
have the same prefix up until `word_index`.
"""
prefix = tokenized_text_list[0].text_until_word_index(word_index)

if not utils.has_letter(prefix):
# This language model perplexity is not defined with respect to
# a word without a prefix. If the prefix is null, just return the
# log-probability 0.0.
return torch.zeros(len(tokenized_text_list), dtype=torch.float)

token_ids = self.tokenizer.encode(prefix)
tokens_tensor = torch.tensor([token_ids])
tokens_tensor = tokens_tensor.to(utils.get_device())

with torch.no_grad():
outputs = self.model(tokens_tensor)
predictions = outputs[0]

probs = []
for tokenized_text in tokenized_text_list:
next_word_ids = self.tokenizer.encode(tokenized_text.words[word_index])
next_word_prob = predictions[0, -1, next_word_ids[0]]
probs.append(next_word_prob)

return probs



Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import math
import torch

from textattack.constraints import Constraint

class LanguageModelConstraint(Constraint):
"""
Determines if two sentences have a swapped word that has a similar
probability according to a language model.
Args:
max_log_prob_diff (float): the maximum difference in log-probability
between x and x_adv
"""

def __init__(self, max_log_prob_diff=None):
if max_log_prob_diff is None:
raise ValueError('Must set max_log_prob_diff')
self.max_log_prob_diff = max_log_prob_diff

def get_log_probs_at_index(self, text_list, word_index):
""" Gets the log-probability of items in `text_list` at index
`word_index` according to a language model.
"""
raise NotImplementedError()

def __call__(self, x, x_adv, original_text=None):
try:
i = x_adv.attack_attrs['modified_word_index']
except AttributeError:
raise AttributeError('Cannot apply language model constraint without `modified_word_index`')

probs = self.get_log_probs_at_index((x, x_adv), i)
if len(probs) != 2:
raise ValueError(f'Error: get_log_probs_at_index returned {len(probs)} values for 2 inputs')
x_prob, x_adv_prob = probs
if self.max_log_prob_diff is None:
x_prob, x_adv_prob = math.log(p1), math.log(p2)
return abs(x_prob - x_adv_prob) <= self.max_log_prob_diff

def extra_repr_keys(self):
return ['max_log_prob_diff']
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import nltk

from textattack.constraints import Constraint
from textattack.shared.tokenized_text import TokenizedText
from textattack.shared import TokenizedText

class PartOfSpeech(Constraint):
""" Constraints word swaps to only swap words with the same part of speech.
Expand Down
5 changes: 4 additions & 1 deletion textattack/constraints/overlap/words_perturbed.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from textattack.constraints import Constraint

class WordsPerturbed(Constraint):
Expand All @@ -6,6 +7,8 @@ class WordsPerturbed(Constraint):
def __init__(self, max_num_words=None, max_percent=None):
if (max_num_words is None) and (max_percent is None):
raise ValueError('must set either max perc or max num words')
if max_percent and not (0 <= max_percent <= 1):
raise ValueError('max perc must be between 0 and 1')
self.max_num_words = max_num_words
self.max_percent = max_percent

Expand All @@ -16,7 +19,7 @@ def __call__(self, x, x_adv, original_text=None):
num_words_diff = len(x_adv.all_words_diff(original_text))
if self.max_percent:
min_num_words = min(len(x_adv.words), len(original_text.words))
max_words_perturbed = round(min_num_words * (self.max_percent / 100))
max_words_perturbed = math.ceil(min_num_words * (self.max_percent))
max_percent_met = num_words_diff <= max_words_perturbed
else:
max_percent_met = True
Expand Down

0 comments on commit 53a5c3a

Please sign in to comment.