Merge pull request #88 from QData/kuleshov

add Kuleshov recipe
QData · May 8, 2020 · 53a5c3a · 53a5c3a
2 parents 983498b + dd4eda3
commit 53a5c3a
Show file tree

Hide file tree

Showing 60 changed files with 396 additions and 92 deletions.
diff --git a/README.md b/README.md
@@ -67,6 +67,7 @@ The first are for classification and entailment attacks:
 - **alz-adjusted**: Alzantot's attack adjusted to follow the same constraints as tf-adjusted such that the only difference is the search method.
 - **deepwordbug**: Replace-1 scoring and multi-transformation character-swap attack (["Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers"](https://arxiv.org/abs/1801.04354)).
 - **hotflip**: Beam search and gradient-based word swap (["HotFlip: White-Box Adversarial Examples for Text Classification"](https://arxiv.org/abs/1712.06751)
+- **kuleshov**: Greedy search and counterfitted embedding swap (["Adversarial Examples for Natural Language Classification Problems"](https://openreview.net/pdf?id=r1QZ3zbAZ)
 
 The final is for translation attacks:
 - **seq2sick**: Greedy attack with goal of changing every word in the output translation. Currently implemented as black-box with plans to change to white-box as done in paper (["Seq2Sick: Evaluating the Robustness of Sequence-to-Sequence Models with Adversarial Examples"](https://arxiv.org/abs/1803.01128)).

diff --git a/textattack/__init__.py b/textattack/__init__.py
@@ -2,14 +2,15 @@
 
 from . import attack_recipes
 from . import attack_results
-from . import attack_methods
 from . import augmentation
+
 from . import constraints
 from . import datasets
 from . import goal_functions
 from . import goal_function_results
 from . import loggers
 from . import models
+from . import search_methods
 from . import shared 
 from . import tokenizers
 from . import transformations
diff --git a/textattack/attack_recipes/__init__.py b/textattack/attack_recipes/__init__.py
@@ -2,6 +2,7 @@
 from .alzantot_2018_adjusted import Alzantot2018Adjusted
 from .deepwordbug_gao_2018 import DeepWordBugGao2018
 from .hotflip_ebrahimi_2017 import HotFlipEbrahimi2017
+from .kuleshov_2017 import Kuleshov2017
 from .seq2sick_cheng_2018_blackbox import Seq2SickCheng2018BlackBox
 from .textfooler_jin_2019 import TextFoolerJin2019
 from .textfooler_jin_2019_adjusted import TextFoolerJin2019Adjusted
diff --git a/textattack/attack_recipes/alzantot_2018.py b/textattack/attack_recipes/alzantot_2018.py
@@ -9,12 +9,12 @@
     ArXiv, abs/1801.00554.
 """
 
-from textattack.attack_methods import GeneticAlgorithm
 from textattack.constraints.overlap import WordsPerturbed
+from textattack.constraints.grammaticality.language_models import Google1BillionWordsLanguageModel
 from textattack.constraints.semantics import WordEmbeddingDistance
-from textattack.constraints.semantics.language_models import GoogleLanguageModel
-from textattack.transformations.black_box import WordSwapEmbedding
 from textattack.goal_functions import UntargetedClassification
+from textattack.search_methods import GeneticAlgorithm
+from textattack.transformations import WordSwapEmbedding
 
 def Alzantot2018(model):
     #
@@ -30,7 +30,7 @@ def Alzantot2018(model):
     # Maximum words perturbed percentage of 20%
     #
     constraints.append(
-            WordsPerturbed(max_percent=20)
+            WordsPerturbed(max_percent=0.2)
     )
     #
     # Maximum word embedding euclidean distance of 0.5.
@@ -42,7 +42,7 @@ def Alzantot2018(model):
     # Language Model
     #
     constraints.append(
-            GoogleLanguageModel(top_n_per_index=4)
+            Google1BillionWordsLanguageModel(top_n_per_index=4)
     )
     #
     # Goal is untargeted classification

diff --git a/textattack/attack_recipes/alzantot_2018_adjusted.py b/textattack/attack_recipes/alzantot_2018_adjusted.py
@@ -9,12 +9,12 @@
     ArXiv, abs/1801.00554.
 """
 
-from textattack.attack_methods import GeneticAlgorithm
+from textattack.constraints.grammaticality import PartOfSpeech, LanguageTool
 from textattack.constraints.semantics import WordEmbeddingDistance
 from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder, BERT
-from textattack.constraints.syntax import PartOfSpeech, LanguageTool
-from textattack.transformations.black_box import WordSwapEmbedding
 from textattack.goal_functions import UntargetedClassification
+from textattack.search_methods import GeneticAlgorithm
+from textattack.transformations import WordSwapEmbedding
 
 def Alzantot2018Adjusted(model, SE_thresh=0.98, sentence_encoder='bert'):
     #

diff --git a/textattack/attack_recipes/deepwordbug_gao_2018.py b/textattack/attack_recipes/deepwordbug_gao_2018.py
@@ -8,11 +8,11 @@
     
 """
 
-from textattack.attack_methods import GreedyWordSwapWIR
 from textattack.constraints.overlap import LevenshteinEditDistance
 from textattack.goal_functions import UntargetedClassification
+from textattack.search_methods import GreedyWordSwapWIR
 from textattack.transformations import CompositeTransformation
-from textattack.transformations.black_box import \
+from textattack.transformations import \
     WordSwapNeighboringCharacterSwap, \
     WordSwapRandomCharacterDeletion, WordSwapRandomCharacterInsertion, \
     WordSwapRandomCharacterSubstitution, WordSwapNeighboringCharacterSwap

diff --git a/textattack/attack_recipes/hotflip_ebrahimi_2017.py b/textattack/attack_recipes/hotflip_ebrahimi_2017.py
@@ -11,12 +11,12 @@
     paper).
 """
 
-from textattack.attack_methods import BeamSearch
+from textattack.goal_functions import UntargetedClassification
+from textattack.constraints.grammaticality import PartOfSpeech
 from textattack.constraints.overlap import WordsPerturbed
 from textattack.constraints.semantics import WordEmbeddingDistance
-from textattack.constraints.syntax import PartOfSpeech
-from textattack.transformations.white_box import GradientBasedWordSwap
-from textattack.goal_functions import UntargetedClassification
+from textattack.search_methods import BeamSearch
+from textattack.transformations import GradientBasedWordSwap
 
 def HotFlipEbrahimi2017(model):
     #

diff --git a/textattack/attack_recipes/kuleshov_2017.py b/textattack/attack_recipes/kuleshov_2017.py
@@ -0,0 +1,61 @@
+"""
+    Kuleshov, V. et al. 
+    
+    Generating Natural Language Adversarial Examples. 
+    
+    
+    https://openreview.net/pdf?id=r1QZ3zbAZ.
+"""
+
+from textattack.constraints.overlap import WordsPerturbed
+from textattack.constraints.grammaticality.language_models import GPT2
+from textattack.constraints.semantics.sentence_encoders import ThoughtVector
+from textattack.goal_functions import UntargetedClassification
+from textattack.search_methods import GreedyWordSwap
+from textattack.transformations import WordSwapEmbedding
+
+def Kuleshov2017(model):
+    #
+    # "Specifically, in all experiments, we used a target of τ = 0.7,
+    # a neighborhood size of N = 15, and parameters λ_1 = 0.2 and δ = 0.5; we set
+    # the syntactic bound to λ_2 = 2 nats for sentiment analysis"
+
+    #
+    # Word swap with top-15 counter-fitted embedding neighbors.
+    #
+    transformation = WordSwapEmbedding(max_candidates=15)
+    #
+    # Maximum of 50% of words perturbed (δ in the paper).
+    #
+    constraints = []
+    constraints.append(
+            WordsPerturbed(max_percent=0.5)
+    )
+    #
+    # Maximum thought vector Euclidean distance of λ_1 = 0.2. (eq. 4)
+    #
+    constraints.append(
+        ThoughtVector(embedding_type='paragramcf', threshold=0.2, metric='max_euclidean')
+    )
+    #
+    #
+    # Maximum language model log-probability difference of λ_2 = 2. (eq. 5)
+    #
+    constraints.append(
+        GPT2(max_log_prob_diff=2.0)
+    )
+    #
+    # Goal is untargeted classification: reduce original probability score 
+    # to below τ = 0.7 (Algorithm 1).
+    #
+    goal_function = UntargetedClassification(model, target_max_score=0.7)
+    #
+    # Perform word substitution with a genetic algorithm.
+    #
+    attack = GreedyWordSwap(goal_function, constraints=constraints,
+        transformation=transformation)
+
+    return attack
+
+
+        # GPT2(max_log_prob_diff=2)
diff --git a/textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py b/textattack/attack_recipes/seq2sick_cheng_2018_blackbox.py
@@ -12,10 +12,10 @@
     
 """
 
-from textattack.attack_methods import GreedyWordSwapWIR
 from textattack.constraints.overlap import LevenshteinEditDistance
 from textattack.goal_functions import NonOverlappingOutput
-from textattack.transformations.black_box import WordSwapEmbedding
+from textattack.search_methods import GreedyWordSwapWIR
+from textattack.transformations import WordSwapEmbedding
 
 def Seq2SickCheng2018BlackBox(model, goal_function='non_overlapping'):
     #

diff --git a/textattack/attack_recipes/textfooler_jin_2019.py b/textattack/attack_recipes/textfooler_jin_2019.py
@@ -8,12 +8,12 @@
     
 """
 
-from textattack.attack_methods import GreedyWordSwapWIR
+from textattack.goal_functions import UntargetedClassification
+from textattack.constraints.grammaticality import PartOfSpeech
 from textattack.constraints.semantics import WordEmbeddingDistance
 from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder
-from textattack.constraints.syntax import PartOfSpeech
-from textattack.transformations.black_box import WordSwapEmbedding
-from textattack.goal_functions import UntargetedClassification
+from textattack.search_methods import GreedyWordSwapWIR
+from textattack.transformations import WordSwapEmbedding
 
 def TextFoolerJin2019(model):
     #

diff --git a/textattack/attack_recipes/textfooler_jin_2019_adjusted.py b/textattack/attack_recipes/textfooler_jin_2019_adjusted.py
@@ -8,12 +8,12 @@
     
 """
 
-from textattack.attack_methods import GreedyWordSwapWIR
 from textattack.constraints.semantics import WordEmbeddingDistance
 from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder, BERT
-from textattack.constraints.syntax import PartOfSpeech, LanguageTool
-from textattack.transformations.black_box import WordSwapEmbedding
+from textattack.constraints.grammaticality import PartOfSpeech, LanguageTool
 from textattack.goal_functions import UntargetedClassification
+from textattack.search_methods import GreedyWordSwapWIR
+from textattack.transformations import WordSwapEmbedding
 
 def TextFoolerJin2019Adjusted(model, SE_thresh=0.98, sentence_encoder='bert'):
     #

diff --git a/textattack/augmentation/recipes.py b/textattack/augmentation/recipes.py
@@ -5,15 +5,15 @@
 class WordNetAugmenter(Augmenter):
     """ Augments text by replacing with synonyms from the WordNet thesaurus. """
     def __init__(self):
-        from textattack.transformations.black_box import WordSwapWordNet
+        from textattack.transformations import WordSwapWordNet
         transformation = WordSwapWordNet()
         super().__init__(transformation, constraints=[])
 
 
 class EmbeddingAugmenter(Augmenter):
     """ Augments text by transforming words with their embeddings. """
     def __init__(self):
-        from textattack.transformations.black_box import WordSwapEmbedding
+        from textattack.transformations import WordSwapEmbedding
         transformation = WordSwapEmbedding(
             max_candidates=50, embedding_type='paragramcf'
         )
@@ -28,7 +28,7 @@ class CharSwapAugmenter(Augmenter):
     """ Augments words by swapping characters out for other characters. """
     def __init__(self):
         from textattack.transformations import CompositeTransformation
-        from textattack.transformations.black_box import \
+        from textattack.transformations import \
             WordSwapNeighboringCharacterSwap, \
             WordSwapRandomCharacterDeletion, WordSwapRandomCharacterInsertion, \
             WordSwapRandomCharacterSubstitution, WordSwapNeighboringCharacterSwap

diff --git a/textattack/constraints/__init__.py b/textattack/constraints/__init__.py
@@ -1,4 +1,4 @@
 from .constraint import Constraint
 
+from . import grammaticality
 from . import semantics
-from . import syntax
diff --git a/textattack/constraints/grammaticality/__init__.py b/textattack/constraints/grammaticality/__init__.py
@@ -0,0 +1,4 @@
+from . import language_models
+
+from .language_tool import LanguageTool
+from .part_of_speech import PartOfSpeech
diff --git a/textattack/constraints/grammaticality/language_models/__init__.py b/textattack/constraints/grammaticality/language_models/__init__.py
@@ -0,0 +1,3 @@
+from .google_language_model import Google1BillionWordsLanguageModel
+from .gpt2 import GPT2
+from .language_model_constraint import LanguageModelConstraint
diff --git a/textattack/constraints/grammaticality/language_models/google_language_model/__init__.py b/textattack/constraints/grammaticality/language_models/google_language_model/__init__.py
@@ -0,0 +1 @@
+from .google_language_model import GoogleLanguageModel as Google1BillionWordsLanguageModel
diff --git a/...google_language_model/alzantot_goog_lm.py → ...google_language_model/alzantot_goog_lm.py b/...google_language_model/alzantot_goog_lm.py → ...google_language_model/alzantot_goog_lm.py
diff --git a/...e_language_model/google_language_model.py → ...e_language_model/google_language_model.py b/...e_language_model/google_language_model.py → ...e_language_model/google_language_model.py
diff --git a/...ls/google_language_model/lm_data_utils.py → ...ls/google_language_model/lm_data_utils.py b/...ls/google_language_model/lm_data_utils.py → ...ls/google_language_model/lm_data_utils.py
diff --git a/..._models/google_language_model/lm_utils.py → ..._models/google_language_model/lm_utils.py b/..._models/google_language_model/lm_utils.py → ..._models/google_language_model/lm_utils.py
diff --git a/textattack/constraints/grammaticality/language_models/gpt2.py b/textattack/constraints/grammaticality/language_models/gpt2.py
@@ -0,0 +1,51 @@
+import torch
+from textattack.shared import utils
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+
+from .language_model_constraint import LanguageModelConstraint
+
+class GPT2(LanguageModelConstraint):
+    """ A constraint based on the GPT-2 language model. 
+        
+        
+        from "Better Language Models and Their Implications" 
+            (openai.com/blog/better-language-models/)
+    
+    """
+    def __init__(self, **kwargs):
+        self.model = GPT2LMHeadModel.from_pretrained('gpt2')
+        self.model.to(utils.get_device())
+        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+        super().__init__(**kwargs)
+
+    def get_log_probs_at_index(self, tokenized_text_list, word_index):
+        """ Gets the probability of the word at index `word_index` according
+            to GPT-2. Assumes that all items in `tokenized_text_list`
+            have the same prefix up until `word_index`.
+        """
+        prefix = tokenized_text_list[0].text_until_word_index(word_index)
+
+        if not utils.has_letter(prefix):
+            # This language model perplexity is not defined with respect to
+            # a word without a prefix. If the prefix is null, just return the
+            # log-probability 0.0.
+            return torch.zeros(len(tokenized_text_list), dtype=torch.float)
+
+        token_ids = self.tokenizer.encode(prefix)
+        tokens_tensor = torch.tensor([token_ids])
+        tokens_tensor = tokens_tensor.to(utils.get_device())
+
+        with torch.no_grad():
+            outputs = self.model(tokens_tensor)
+        predictions = outputs[0]
+
+        probs = []
+        for tokenized_text in tokenized_text_list:
+            next_word_ids = self.tokenizer.encode(tokenized_text.words[word_index])
+            next_word_prob = predictions[0, -1, next_word_ids[0]]
+            probs.append(next_word_prob)
+
+        return probs
+
+
+
diff --git a/textattack/constraints/grammaticality/language_models/language_model_constraint.py b/textattack/constraints/grammaticality/language_models/language_model_constraint.py
@@ -0,0 +1,42 @@
+import math
+import torch
+
+from textattack.constraints import Constraint
+
+class LanguageModelConstraint(Constraint):
+    """ 
+        Determines if two sentences have a swapped word that has a similar
+            probability according to a language model.
+        
+        Args:
+            max_log_prob_diff (float): the maximum difference in log-probability
+                between x and x_adv
+    """
+
+    def __init__(self, max_log_prob_diff=None):
+        if max_log_prob_diff is None:
+            raise ValueError('Must set max_log_prob_diff')
+        self.max_log_prob_diff = max_log_prob_diff
+
+    def get_log_probs_at_index(self, text_list, word_index):
+        """ Gets the log-probability of items in `text_list` at index 
+            `word_index` according to a language model.
+        """
+        raise NotImplementedError()
+
+    def __call__(self, x, x_adv, original_text=None):
+        try:
+            i = x_adv.attack_attrs['modified_word_index']
+        except AttributeError:
+            raise AttributeError('Cannot apply language model constraint without `modified_word_index`')
+
+        probs = self.get_log_probs_at_index((x, x_adv), i)
+        if len(probs) != 2:
+            raise ValueError(f'Error: get_log_probs_at_index returned {len(probs)} values for 2 inputs')
+        x_prob, x_adv_prob = probs
+        if self.max_log_prob_diff is None:
+            x_prob, x_adv_prob = math.log(p1), math.log(p2)
+        return abs(x_prob - x_adv_prob) <= self.max_log_prob_diff
+
+    def extra_repr_keys(self):
+        return ['max_log_prob_diff']
diff --git a/...ttack/constraints/syntax/language_tool.py → ...nstraints/grammaticality/language_tool.py b/...ttack/constraints/syntax/language_tool.py → ...nstraints/grammaticality/language_tool.py
diff --git a/...tack/constraints/syntax/part_of_speech.py → ...straints/grammaticality/part_of_speech.py b/...tack/constraints/syntax/part_of_speech.py → ...straints/grammaticality/part_of_speech.py
@@ -2,7 +2,7 @@
 import nltk
 
 from textattack.constraints import Constraint
-from textattack.shared.tokenized_text import TokenizedText
+from textattack.shared import TokenizedText
 
 class PartOfSpeech(Constraint):
     """ Constraints word swaps to only swap words with the same part of speech.

diff --git a/textattack/constraints/overlap/words_perturbed.py b/textattack/constraints/overlap/words_perturbed.py
@@ -1,3 +1,4 @@
+import math
 from textattack.constraints import Constraint
 
 class WordsPerturbed(Constraint):
@@ -6,6 +7,8 @@ class WordsPerturbed(Constraint):
     def __init__(self, max_num_words=None, max_percent=None):
         if (max_num_words is None) and (max_percent is None):
             raise ValueError('must set either max perc or max num words')
+        if max_percent and not (0 <= max_percent <= 1):
+            raise ValueError('max perc must be between 0 and 1')
         self.max_num_words = max_num_words
         self.max_percent = max_percent
 
@@ -16,7 +19,7 @@ def __call__(self, x, x_adv, original_text=None):
         num_words_diff = len(x_adv.all_words_diff(original_text))
         if self.max_percent:
             min_num_words = min(len(x_adv.words), len(original_text.words))
-            max_words_perturbed = round(min_num_words * (self.max_percent / 100))
+            max_words_perturbed = math.ceil(min_num_words * (self.max_percent))
             max_percent_met = num_words_diff <= max_words_perturbed
         else:
             max_percent_met = True