Merge pull request #751 from QData/local_install_minor

Local install minor
QData · Sep 30, 2023 · 102d824 · 102d824
2 parents bb2662d + 6582fcf
commit 102d824
Show file tree

Hide file tree

Showing 28 changed files with 122 additions and 51 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -117,7 +117,7 @@ Follow these steps to start contributing:
 
    ```bash
    $ cd TextAttack
-   $ pip install -e . ".[dev]"
+   $ pip install -e .[dev]
    $ pip install black docformatter isort pytest pytest-xdist
    ```
 

diff --git a/README.md b/README.md
@@ -319,6 +319,7 @@ for data augmentation:
 - `eda` augments text with a combination of word insertions, substitutions and deletions.
 - `checklist` augments text by contraction/extension and by substituting names, locations, numbers.
 - `clare` augments text by replacing, inserting, and merging with a pre-trained masked language model.
+- `back_trans` augments text by backtranslation approach. 
 
 
 #### Augmentation Command-Line Interface

diff --git a/docs/0_get_started/installation.md b/docs/0_get_started/installation.md
@@ -67,7 +67,7 @@ Besides, we highly recommend you to use virtual environment for textattack use,
 see [information here](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#removing-an-environment). Here is one conda example: 
 
 ```bash
-conda create -n textattackenv python=3.7
+conda create -n textattackenv python=3.8
 conda activate textattackenv
 conda env list
 ```

diff --git a/docs/1start/FAQ.md b/docs/1start/FAQ.md
@@ -43,7 +43,7 @@ Besides, we highly recommend you to use virtual environment for textattack use,
 see [information here](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#removing-an-environment). Here is one conda example: 
 
 ```bash
-conda create -n textattackenv python=3.7
+conda create -n textattackenv python=3.8
 conda activate textattackenv
 conda env list
 ```

diff --git a/docs/1start/support.md b/docs/1start/support.md
@@ -121,7 +121,7 @@ Follow these steps to start contributing:
 
    ```bash
    $ cd TextAttack
-   $ pip install -e . ".[dev]"
+   $ pip install -e .[dev]
    $ pip install black isort pytest pytest-xdist
    ```
 

diff --git a/docs/3recipes/augmenter_recipes_cmd.md b/docs/3recipes/augmenter_recipes_cmd.md
@@ -18,6 +18,7 @@ for data augmentation:
 - `eda` augments text with a combination of word insertions, substitutions and deletions.
 - `checklist` augments text by contraction/extension and by substituting names, locations, numbers.
 - `clare` augments text by replacing, inserting, and merging with a pre-trained masked language model.
+- `back_trans` augments text by backtranslation method. 
 
 
 ### Augmentation Command-Line Interface

diff --git a/tests/test_command_line/test_attack.py b/tests/test_command_line/test_attack.py
@@ -5,10 +5,7 @@
 import pytest
 
 DEBUG = False
-
-"""
-Attack command-line tests in the format (name, args, sample_output_file)
-"""
+"""Attack command-line tests in the format (name, args, sample_output_file)"""
 
 attack_test_params = [
     #

diff --git a/tests/test_command_line/test_loggers.py b/tests/test_command_line/test_loggers.py
@@ -5,10 +5,7 @@
 import pytest
 
 DEBUG = False
-
-"""
-Attack command-line tests in the format (name, args, sample_output_file)
-"""
+"""Attack command-line tests in the format (name, args, sample_output_file)"""
 
 """
  list_test_params data structure requires

diff --git a/tests/test_metric_api.py b/tests/test_metric_api.py
@@ -30,7 +30,7 @@ def test_use():
     from textattack import AttackArgs, Attacker
     from textattack.attack_recipes import DeepWordBugGao2018
     from textattack.datasets import HuggingFaceDataset
-    from textattack.metrics.quality_metrics import USEMetric
+    from textattack.metrics.quality_metrics import MeteorMetric
     from textattack.models.wrappers import HuggingFaceModelWrapper
 
     model = transformers.AutoModelForSequenceClassification.from_pretrained(
@@ -50,9 +50,40 @@ def test_use():
         disable_stdout=True,
     )
     attacker = Attacker(attack, dataset, attack_args)
-
     results = attacker.attack_dataset()
 
-    usem = USEMetric().calculate(results)
+    usem = MeteorMetric().calculate(results)
+
+    assert usem["avg_attack_meteor_score"] == 0.71
+
+
+def test_metric_recipe():
+    import transformers
+
+    from textattack import AttackArgs, Attacker
+    from textattack.attack_recipes import DeepWordBugGao2018
+    from textattack.datasets import HuggingFaceDataset
+    from textattack.metrics.recipe import AdvancedAttackMetric
+    from textattack.models.wrappers import HuggingFaceModelWrapper
+
+    model = transformers.AutoModelForSequenceClassification.from_pretrained(
+        "distilbert-base-uncased-finetuned-sst-2-english"
+    )
+    tokenizer = transformers.AutoTokenizer.from_pretrained(
+        "distilbert-base-uncased-finetuned-sst-2-english"
+    )
+    model_wrapper = HuggingFaceModelWrapper(model, tokenizer)
+    attack = DeepWordBugGao2018.build(model_wrapper)
+    dataset = HuggingFaceDataset("glue", "sst2", split="train")
+    attack_args = AttackArgs(
+        num_examples=1,
+        log_to_csv="log.csv",
+        checkpoint_interval=5,
+        checkpoint_dir="checkpoints",
+        disable_stdout=True,
+    )
+    attacker = Attacker(attack, dataset, attack_args)
+    results = attacker.attack_dataset()
 
-    assert usem["avg_attack_use_score"] == 0.76
+    adv_score = AdvancedAttackMetric(["meteor_score", "perplexity"]).calculate(results)
+    assert adv_score["avg_attack_meteor_score"] == 0.71
diff --git a/textattack/__init__.py b/textattack/__init__.py
@@ -2,7 +2,8 @@
 
 What is TextAttack?
 
-`TextAttack <https://github.com/QData/TextAttack>`__ is a Python framework for adversarial attacks, adversarial training, and data augmentation in NLP.
+`TextAttack <https://github.com/QData/TextAttack>`__
+is a Python framework for adversarial attacks, adversarial training, and data augmentation in NLP.
 
 TextAttack makes experimenting with the robustness of NLP models seamless, fast, and easy. It's also useful for NLP model training, adversarial training, and data augmentation.
 

diff --git a/textattack/attack_args.py b/textattack/attack_args.py
@@ -128,6 +128,7 @@
 @dataclass
 class AttackArgs:
     """Attack arguments to be passed to :class:`~textattack.Attacker`.
+
     Args:
         num_examples (:obj:`int`, 'optional`, defaults to :obj:`10`):
             The number of examples to attack. :obj:`-1` for entire dataset.
@@ -464,7 +465,9 @@ def create_loggers_from_args(cls, args):
 
 @dataclass
 class _CommandLineAttackArgs:
-    """Attack args for command line execution. This requires more arguments to
+    """Attack args for command line execution.
+
+    This requires more arguments to
     create ``Attack`` object as specified.
     Args:
         transformation (:obj:`str`, `optional`, defaults to :obj:`"word-swap-embedding"`):

diff --git a/textattack/attack_recipes/clare_li_2020.py b/textattack/attack_recipes/clare_li_2020.py
@@ -29,7 +29,8 @@
 class CLARE2020(AttackRecipe):
     """Li, Zhang, Peng, Chen, Brockett, Sun, Dolan.
 
-    "Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020)
+    "Contextualized Perturbation for Textual Adversarial Attack" (Li et
+    al., 2020)
 
     https://arxiv.org/abs/2009.07502
 

diff --git a/textattack/attack_recipes/deepwordbug_gao_2018.py b/textattack/attack_recipes/deepwordbug_gao_2018.py
@@ -28,8 +28,8 @@
 class DeepWordBugGao2018(AttackRecipe):
     """Gao, Lanchantin, Soffa, Qi.
 
-    Black-box Generation of Adversarial Text Sequences to Evade Deep Learning
-    Classifiers.
+    Black-box Generation of Adversarial Text Sequences to Evade Deep
+    Learning Classifiers.
 
     https://arxiv.org/abs/1801.04354
     """

diff --git a/textattack/attack_recipes/morpheus_tan_2020.py b/textattack/attack_recipes/morpheus_tan_2020.py
@@ -20,7 +20,8 @@
 class MorpheusTan2020(AttackRecipe):
     """Samson Tan, Shafiq Joty, Min-Yen Kan, Richard Socher.
 
-    It’s Morphin’ Time! Combating Linguistic Discrimination with Inflectional Perturbations
+    It’s Morphin’ Time! Combating Linguistic Discrimination with
+    Inflectional Perturbations
 
     https://www.aclweb.org/anthology/2020.acl-main.263/
     """

diff --git a/textattack/attack_recipes/pwws_ren_2019.py b/textattack/attack_recipes/pwws_ren_2019.py
@@ -23,11 +23,12 @@ class PWWSRen2019(AttackRecipe):
     Natural Language Adversarial Examples through Probability Weighted Word
     Saliency", Ren et al., 2019.
 
-    Words are prioritized for a synonym-swap transformation based on
-    a combination of their saliency score and maximum word-swap effectiveness.
-    Note that this implementation does not include the Named
-    Entity adversarial swap from the original paper, because it requires
-    access to the full dataset and ground truth labels in advance.
+    Words are prioritized for a synonym-swap transformation based on a
+    combination of their saliency score and maximum word-swap
+    effectiveness. Note that this implementation does not include the
+    Named Entity adversarial swap from the original paper, because it
+    requires access to the full dataset and ground truth labels in
+    advance.
 
     https://www.aclweb.org/anthology/P19-1103/
     """

diff --git a/textattack/attack_recipes/textfooler_jin_2019.py b/textattack/attack_recipes/textfooler_jin_2019.py
@@ -25,7 +25,8 @@
 class TextFoolerJin2019(AttackRecipe):
     """Jin, D., Jin, Z., Zhou, J.T., & Szolovits, P. (2019).
 
-    Is BERT Really Robust? Natural Language Attack on Text Classification and Entailment.
+    Is BERT Really Robust? Natural Language Attack on Text
+    Classification and Entailment.
 
     https://arxiv.org/abs/1907.11932
     """

diff --git a/textattack/augmentation/recipes.py b/textattack/augmentation/recipes.py
@@ -190,7 +190,8 @@ def __init__(self, **kwargs):
 class CLAREAugmenter(Augmenter):
     """Li, Zhang, Peng, Chen, Brockett, Sun, Dolan.
 
-    "Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020)
+    "Contextualized Perturbation for Textual Adversarial Attack" (Li et
+    al., 2020)
 
     https://arxiv.org/abs/2009.07502
 

diff --git a/textattack/constraints/grammaticality/language_models/google_language_model/lm_data_utils.py b/textattack/constraints/grammaticality/language_models/google_language_model/lm_data_utils.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
-
 """
 A library for loading 1B word benchmark dataset.
 ------------------------------------------------

diff --git a/textattack/constraints/grammaticality/language_models/learning_to_write/learning_to_write.py b/textattack/constraints/grammaticality/language_models/learning_to_write/learning_to_write.py
@@ -17,17 +17,16 @@
 class LearningToWriteLanguageModel(LanguageModelConstraint):
     """A constraint based on the L2W language model.
 
-    The RNN-based language model from "Learning to Write With Cooperative
-    Discriminators" (Holtzman et al, 2018).
+    The RNN-based language model from "Learning to Write With
+    Cooperative Discriminators" (Holtzman et al, 2018).
 
     https://arxiv.org/pdf/1805.06087.pdf
 
     https://github.com/windweller/l2w
 
-
-    Reused by Jia et al., 2019, as a substitution for the Google 1-billion
-    words language model (in a revised version the attack of Alzantot et
-    al., 2018).
+     Reused by Jia et al., 2019, as a substitution for the Google
+    1-billion words language model (in a revised version the attack of
+    Alzantot et al., 2018).
 
     https://worksheets.codalab.org/worksheets/0x79feda5f1998497db75422eca8fcd689
     """

diff --git a/textattack/constraints/pre_transformation/min_word_length.py b/textattack/constraints/pre_transformation/min_word_length.py
@@ -12,7 +12,8 @@ class MinWordLength(PreTransformationConstraint):
     """A constraint that prevents modifications to words less than a certain
     word character-length.
 
-    :param min_length: Minimum word character-length needed for changes to be made to a word.
+    :param min_length: Minimum word character-length needed for changes
+        to be made to a word.
     """
 
     def __init__(self, min_length):

diff --git a/textattack/constraints/semantics/sentence_encoders/bert/bert.py b/textattack/constraints/semantics/sentence_encoders/bert/bert.py
@@ -15,7 +15,9 @@ class BERT(SentenceEncoder):
     """Constraint using similarity between sentence encodings of x and x_adv
     where the text embeddings are created using BERT, trained on NLI data, and
     fine- tuned on the STS benchmark dataset.
-    Available models can be found here: https://huggingface.co/sentence-transformers"""
+
+    Available models can be found here: https://huggingface.co/sentence-transformers
+    """
 
     def __init__(
         self,

diff --git a/textattack/metrics/metric.py b/textattack/metrics/metric.py
@@ -8,7 +8,7 @@
 
 
 class Metric(ABC):
-    """A metric for evaluating Adversarial Attack candidates."""
+    """A metric for evaluating results and data quality."""
 
     @abstractmethod
     def __init__(self, **kwargs):

diff --git a/textattack/metrics/recipe.py b/textattack/metrics/recipe.py
@@ -0,0 +1,35 @@
+"""
+Attack Metric Quality Recipes:
+==============================
+
+"""
+
+from textattack.metrics.quality_metrics.bert_score import BERTScoreMetric
+from textattack.metrics.quality_metrics.meteor_score import MeteorMetric
+from textattack.metrics.quality_metrics.perplexity import Perplexity
+from textattack.metrics.quality_metrics.sentence_bert import SBERTMetric
+from textattack.metrics.quality_metrics.use import USEMetric
+
+from .metric import Metric
+
+
+class AdvancedAttackMetric(Metric):
+    """Calculate a suite of advanced metrics to evaluate attackResults'
+    quality."""
+
+    def __init__(self, choices=["use"]):
+        self.achoices = choices
+
+    def calculate(self, results):
+        advanced_metrics = {}
+        if "use" in self.achoices:
+            advanced_metrics.update(USEMetric().calculate(results))
+        if "perplexity" in self.achoices:
+            advanced_metrics.update(Perplexity().calculate(results))
+        if "bert_score" in self.achoices:
+            advanced_metrics.update(BERTScoreMetric().calculate(results))
+        if "meteor_score" in self.achoices:
+            advanced_metrics.update(MeteorMetric().calculate(results))
+        if "sbert_score" in self.achoices:
+            advanced_metrics.update(SBERTMetric().calculate(results))
+        return advanced_metrics
diff --git a/textattack/models/helpers/glove_embedding_layer.py b/textattack/models/helpers/glove_embedding_layer.py
@@ -16,9 +16,9 @@
 class EmbeddingLayer(nn.Module):
     """A layer of a model that replaces word IDs with their embeddings.
 
-    This is a useful abstraction for any nn.module which wants to take word IDs
-    (a sequence of text) as input layer but actually manipulate words'
-    embeddings.
+    This is a useful abstraction for any nn.module which wants to take
+    word IDs (a sequence of text) as input layer but actually manipulate
+    words' embeddings.
 
     Requires some pre-trained embedding with associated word IDs.
     """

diff --git a/textattack/models/wrappers/model_wrapper.py b/textattack/models/wrappers/model_wrapper.py
@@ -10,11 +10,12 @@
 class ModelWrapper(ABC):
     """A model wrapper queries a model with a list of text inputs.
 
-    Classification-based models return a list of lists, where each sublist
-    represents the model's scores for a given input.
+    Classification-based models return a list of lists, where each
+    sublist represents the model's scores for a given input.
 
-    Text-to-text models return a list of strings, where each string is the
-    output – like a translation or summarization – for a given input.
+    Text-to-text models return a list of strings, where each string is
+    the output – like a translation or summarization – for a given
+    input.
     """
 
     @abstractmethod

diff --git a/textattack/shared/attacked_text.py b/textattack/shared/attacked_text.py
@@ -25,7 +25,6 @@
 
 
 class AttackedText:
-
     """A helper class that represents a string that can be attacked.
 
     Models that take multiple sentences as input separate them by ``SPLIT_TOKEN``.

diff --git a/textattack/shared/utils/strings.py b/textattack/shared/utils/strings.py
@@ -86,7 +86,7 @@ def __repr__(self):
     __str__ = __repr__
 
     def extra_repr_keys(self):
-        """extra fields to be included in the representation of a class."""
+        """Extra fields to be included in the representation of a class."""
         return []
 
 
@@ -164,7 +164,7 @@ class ANSI_ESCAPE_CODES:
     FAIL = "\033[91m"
     BOLD = "\033[1m"
     UNDERLINE = "\033[4m"
-    """ This color stops the current color sequence. """
+    """This color stops the current color sequence."""
     STOP = "\033[0m"
 
 

diff --git a/textattack/transformations/word_swaps/word_swap_change_number.py b/textattack/transformations/word_swaps/word_swap_change_number.py
@@ -100,7 +100,7 @@ def _get_new_number(self, word):
                 return []
 
     def _alter_number(self, num):
-        """helper function of _get_new_number, replace a number with another
+        """Helper function of _get_new_number, replace a number with another
         random number within the range of self.max_change."""
         if num not in [0, 2, 4]:
             change = int(num * self.max_change) + 1