Merge pull request #90 from QData/config

minor fixes: config.json, tests, travis
QData · May 9, 2020 · e3e43e3 · e3e43e3
2 parents 53a5c3a + 7ba4830
commit e3e43e3
Show file tree

Hide file tree

Showing 15 changed files with 293 additions and 58 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,10 +1,9 @@
 language: python
-python: '3.7'
+python: '3.8'
 before_install:
   - python --version
   - pip install -U pip
   - pip install -U pytest
-  - pip install git+https://github.com/jxmorris12/language-check
 install:
   - pip install -e .
 script: pytest tests # run tests
diff --git a/local_tests/command_line_tests.py b/local_tests/command_line_tests.py
@@ -47,13 +47,29 @@ def register_test(command, name=None, output_file=None, desc=None):
 # test: run_attack targeted classification of class 2 on BERT MNLI with enable_csv
 #   and attack_n set, using the WordNet transformation and beam search with 
 #   beam width 2, using language tool constraint, on 10 samples
-#                   (takes about 171s)
+#                   (takes about 72s)
 #
 register_test(('python -m textattack --attack-n --goal-function targeted-classification:target_class=2 '
-    '--enable_csv --model bert-mnli --num_examples 10 --transformation word-swap-wordnet '
+    '--enable-csv --model bert-mnli --num-examples 4 --transformation word-swap-wordnet '
     '--constraints lang-tool --attack beam-search:beam_width=2'), 
     name='run_attack_targeted2_bertmnli_wordnet_beamwidth_2_enablecsv_attackn', 
-    output_file='local_tests/sample_outputs/run_attack_targetedclassification2_wordnet_langtool_enable_csv_beamsearch2_attack_n_10.txt', 
+    output_file='local_tests/sample_outputs/run_attack_targetedclassification2_wordnet_langtool_enable_csv_beamsearch2_attack_n_4.txt', 
+    desc=('Runs attack using targeted classification on class 2 on BERT MNLI with'
+        'enable_csv and attack_n set, using the WordNet transformation and beam '
+        'search with  beam width 2, using language tool constraint, on 10 samples')
+        )
+
+#
+# test: run_attack non-overlapping output of class 2 on T5 en->de translation with
+#   attack_n set, using the WordSwapRandomCharacterSubstitution transformation 
+#   and greedy word swap, using edit distance constraint, on 6 samples
+#                   (takes about 100s)
+#
+register_test(('python -m textattack --attack-n --goal-function non-overlapping-output '
+    '--model t5-en2de --num-examples 6 --transformation word-swap-random-char-substitution '
+    '--constraints edit-distance:12 words-perturbed:max_percent=0.75 --attack greedy-word'), 
+    name='run_attack_nonoverlapping_t5en2de_randomcharsub_editdistance_wordsperturbed_greedyword', 
+    output_file='local_tests/sample_outputs/run_attack_nonoverlapping_t5ende_editdistance_bleu.txt', 
     desc=('Runs attack using targeted classification on class 2 on BERT MNLI with'
         'enable_csv and attack_n set, using the WordNet transformation and beam '
         'search with  beam width 2, using language tool constraint, on 10 samples')

diff --git a/local_tests/python_function_tests.py b/local_tests/python_function_tests.py
@@ -11,7 +11,6 @@ def register_test(function, name=None, output_file=None, desc=None):
             function, name=name, output=output, desc=desc
         ))
 
-
 #######################################
 ##            BEGIN TESTS            ##
 #######################################
@@ -23,7 +22,7 @@ def check_gpu_count():
     import torch
     num_gpus = torch.cuda.device_count()
     if num_gpus == 0:
-        print(f'Error: detected 0 GPUs. Must run local tests with multiple GPUs. Perhaps you need to configure CUDA?')
+        raise ValueError(f'detected 0 GPUs. Must run local tests with a GPU. Perhaps you need to configure CUDA?')
 
 register_test(check_gpu_count, name='check CUDA', 
     output_file='local_tests/sample_outputs/empty_file.txt', 

diff --git a/local_tests/requirements.txt b/local_tests/requirements.txt
@@ -1 +1 @@
-colored
+colored
diff --git a/local_tests/run_tests.py b/local_tests/run_tests.py
@@ -1,27 +1,29 @@
+import argparse
 import os
 import time
 
+from test_lists import tests
 from test_models import color_text
 
 def log_sep():
     print('\n' + ('-' * 60) + '\n')
 
 def print_gray(s):
     print(color_text(s, 'light_gray'))
-    
-def main():
+
+def change_to_root_dir():
     # Change to TextAttack root directory.
     this_file_path = os.path.abspath(__file__)
     test_directory_name = os.path.dirname(this_file_path)
     textattack_root_directory_name = os.path.dirname(test_directory_name)
     os.chdir(textattack_root_directory_name)
     print_gray(f'Executing tests from {textattack_root_directory_name}.')
-
-    # Execute tests.
+
+def run_all_tests():
+    change_to_root_dir()
     start_time = time.time()
     passed_tests = 0
 
-    from tests import tests
     for test in tests: 
         log_sep()
         test_passed = test()
@@ -32,7 +34,38 @@ def main():
     print_gray(f'Passed {passed_tests}/{len(tests)} in {end_time-start_time}s.')
 
 
+def run_tests_by_name(test_names):
+    test_names = set(test_names)
+    start_time = time.time()
+    passed_tests = 0
+    executed_tests = 0
+    for test in tests: 
+        if test.name not in test_names:
+            continue
+        log_sep()
+        test_passed = test()
+        if test_passed:
+            passed_tests += 1
+        executed_tests += 1
+        test_names.remove(test.name)
+    log_sep()
+    end_time = time.time()
+    print_gray(f'Passed {passed_tests}/{executed_tests} in {end_time-start_time}s.')
+
+    if len(test_names):
+        print(f'Tests not executed: {",".join(test_names)}')
+
+def parse_args():
+    all_test_names = [t.name for t in tests]
+    parser = argparse.ArgumentParser(description='Run TextAttack local tests.')
+    parser.add_argument('--tests', default=None, nargs='+', choices=all_test_names,
+                    help='names of specific tests to run')
+
+    return parser.parse_args()
 
 if __name__ == '__main__':
-    # @TODO add argparser and test sizes.
-    main()
+    args = parse_args()
+    if args.tests:
+        run_tests_by_name(args.tests)
+    else:
+        run_all_tests()
diff --git a/local_tests/sample_outputs/run_attack_nonoverlapping_t5ende_editdistance_bleu.txt b/local_tests/sample_outputs/run_attack_nonoverlapping_t5ende_editdistance_bleu.txt
@@ -0,0 +1,64 @@
+GreedyWordSwap(
+  (goal_function):  NonOverlappingOutput
+  (transformation):  WordSwapRandomCharacterSubstitution(
+    (replace_stopwords):  False
+  )
+  (constraints): 
+    (0): LevenshteinEditDistance(
+        (max_edit_distance):  12
+      )
+    (1): WordsPerturbed(
+        (max_percent):  0.75
+      )
+  (is_black_box):  True
+) 
+
+Load time: /.*/s
+--------------------------------------------- Result 1 ---------------------------------------------
+Eine republikanische Strategie, um der Wiederwahl Obamas entgegenzuwirken-->[91m[FAILED][0m
+A Republican strategy to counter the re-election of Obama
+
+
+--------------------------------------------- Result 2 ---------------------------------------------
+Die republikanischen Führer rechtfertigten ihre Politik durch die Not-->[91m[FAILED][0m
+Republican leaders justified their policy by the need to combat electoral fraud.
+
+
+--------------------------------------------- Result 3 ---------------------------------------------
+Das Brennan-Zentrum betrachtet dies jedoch als Mythos und behaupt-->Allerdings hält das Brennan Centre dies für einen Mythos, indem e
+However, the Brennan Centre [91mconsiders[0m this a myth, stating that electoral fraud is rarer in the United States than the number of people killed by lightning.
+However, the Brennan Centre [94mcTnsiders[0m this a myth, stating that electoral fraud is rarer in the United States than the number of people killed by lightning.
+
+
+--------------------------------------------- Result 4 ---------------------------------------------
+Tatsächlich identifizierten republikanische Anwälte-->In einer DecOde identifizierten republikanische Anwält
+[91mIndeed[0m, Republican lawyers identified only 300 cases of electoral fraud in the United [91mStates[0m in a [91mdecade[0m.
+[94mIndedd[0m, Republican lawyers identified only 300 cases of electoral fraud in the United [94mTtates[0m in a [94mdecOde[0m.
+
+
+--------------------------------------------- Result 5 ---------------------------------------------
+Eines ist sicher: Diese neuen Bestimmungen werden sich negativ auf die Wahlbeteiligung aus-->Ein Hhing ist sicher: Diese neuen Bestimmungen werden sich negativ auf die Wahlbeteil
+One [91mthing[0m is certain: these new provisions will have a negative impact on voter turn-out.
+One [94mHhing[0m is certain: these new provisions will have a negative impact on voter turn-out.
+
+
+--------------------------------------------- Result 6 ---------------------------------------------
+In diesem Sinne werden die Maßnahmen das demokratische System der USA teilweise untergraben-->[91m[FAILED][0m
+In this sense, the measures will partially undermine the American democratic system.
+
+
+
+(0lqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqwqqqqqqqqk(B
+(0x(B Attack Results                (0x(B        (0x(B
+(0tqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqnqqqqqqqqu(B
+(0x(B Number of successful attacks: (0x(B 3      (0x(B
+(0x(B Number of failed attacks:     (0x(B 3      (0x(B
+(0x(B Number of skipped attacks:    (0x(B 0      (0x(B
+(0x(B Original accuracy:            (0x(B 100.0% (0x(B
+(0x(B Accuracy under attack:        (0x(B 50.0%  (0x(B
+(0x(B Attack success rate:          (0x(B 50.0%  (0x(B
+(0x(B Average perturbed word %:     (0x(B 9.62%  (0x(B
+(0x(B Average num. words per input: (0x(B 15.33  (0x(B
+(0x(B Avg num queries:              (0x(B 23.67  (0x(B
+(0mqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqvqqqqqqqqj(B
+Attack time: /.*/s
diff --git a/...un_attack_targetedclassification2_wordnet_langtool_enable_csv_beamsearch2_attack_n_10.txt b/...un_attack_targetedclassification2_wordnet_langtool_enable_csv_beamsearch2_attack_n_10.txt
diff --git a/...run_attack_targetedclassification2_wordnet_langtool_enable_csv_beamsearch2_attack_n_4.txt b/...run_attack_targetedclassification2_wordnet_langtool_enable_csv_beamsearch2_attack_n_4.txt
@@ -0,0 +1,65 @@
+BeamSearch(
+  (goal_function):  TargetedClassification(
+    (target_class):  2
+  )
+  (transformation):  WordSwapWordNet(
+    (replace_stopwords):  False
+  )
+  (constraints): 
+    (0): LanguageTool(
+        (grammar_error_threshold):  0
+      )
+  (is_black_box):  True
+) 
+
+Logging to CSV at path /.*/csv.
+Load time: /.*/s
+--------------------------------------------- Result 1 ---------------------------------------------
+[91m0[0m-->[94m2[0m
+In Temple Bar , the bookshop at the [91mGallery[0m of Photography carries a large selection of photographic publications , and the Flying Pig is a secondhand bookshop .
+
+There is a bookshop at the gallery .
+In Temple Bar , the bookshop at the [94mdrift[0m of Photography carries a large selection of photographic publications , and the Flying Pig is a secondhand bookshop .
+
+There is a bookshop at the gallery .
+
+
+--------------------------------------------- Result 2 ---------------------------------------------
+[91m0[0m-->[91m[FAILED][0m
+On Naxos , you can walk through the pretty villages of the Tragea Valley and the foothills of Mount Zas , admiring Byzantine churches and exploring olive groves at your leisure .
+
+Naxos is a place with beautiful scenery for leisure .
+
+
+--------------------------------------------- Result 3 ---------------------------------------------
+[92m1[0m-->[91m[FAILED][0m
+Impossible .
+
+Impossible , unless circumstances are met .
+
+
+--------------------------------------------- Result 4 ---------------------------------------------
+[91m0[0m-->[94m2[0m
+Expenses included in calculating net cost for education and training programs that are intended to increase or maintain national economic productive capacity shall be reported as investments in human capital as required supplementary stewardship information accompanying the financial statements of the Federal Government and its component units .
+
+Net cost for education programs can be calculated as a way to [91mincrease[0m productivity .
+Expenses included in calculating net cost for education and training programs that are intended to increase or maintain national economic productive capacity shall be reported as investments in human capital as required supplementary stewardship information accompanying the financial statements of the Federal Government and its component units .
+
+Net cost for education programs can be calculated as a way to [94mincrement[0m productivity .
+
+
+
+(0lqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqwqqqqqqqqk(B
+(0x(B Attack Results                (0x(B        (0x(B
+(0tqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqnqqqqqqqqu(B
+(0x(B Number of successful attacks: (0x(B 2      (0x(B
+(0x(B Number of failed attacks:     (0x(B 2      (0x(B
+(0x(B Number of skipped attacks:    (0x(B 0      (0x(B
+(0x(B Original accuracy:            (0x(B 100.0% (0x(B
+(0x(B Accuracy under attack:        (0x(B 50.0%  (0x(B
+(0x(B Attack success rate:          (0x(B 50.0%  (0x(B
+(0x(B Average perturbed word %:     (0x(B 2.38%  (0x(B
+(0x(B Average num. words per input: (0x(B 34.25  (0x(B
+(0x(B Avg num queries:              (0x(B 278.5  (0x(B
+(0mqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqvqqqqqqqqj(B
+Attack time: /.*/s
diff --git a/local_tests/tests.py → local_tests/test_lists.py b/local_tests/tests.py → local_tests/test_lists.py
diff --git a/local_tests/test_models.py b/local_tests/test_models.py
@@ -2,16 +2,18 @@
 import io
 import os
 import re
+import signal
 import sys
 import subprocess
+import traceback
 
 def color_text(s, color):
     return colored.stylize(s, colored.fg(color))
 
-FNULL = open(os.devnull, 'w')
+stderr_file_name = 'err.out'
 
 MAGIC_STRING = '/.*/'
-def compare_outputs(desired_output, test_output):
+def compare_output_equivalence(desired_output, test_output):
     """ Desired outputs have the magic string '/.*/' inserted wherever the 
         outputat that position doesn't actually matter. (For example, when the 
         time to execute is printed, or another non-deterministic feature of the 
@@ -50,12 +52,12 @@ def execute(self):
     def __call__(self):
         """ Runs test and prints success or failure. """
         self.log_start()
-        test_output = self.execute()
-        if compare_outputs(self.output, test_output):
+        test_output, errored = self.execute()
+        if (not errored) and compare_output_equivalence(self.output, test_output):
             self.log_success()
             return True
         else:
-            self.log_failure(test_output)
+            self.log_failure(test_output, errored)
             return False
 
     def log_start(self):
@@ -65,12 +67,15 @@ def log_success(self):
         success_text = f'✓ Succeeded.'
         print(color_text(success_text, 'green'))
 
-    def log_failure(self, test_output):
+    def log_failure(self, test_output, errored):
         fail_text = f'✗ Failed.'
         print(color_text(fail_text, 'red'))
-        print('\n')
-        print(f'Test output: {test_output}.')
-        print(f'Correct output: {self.output}.')
+        if errored:
+            print(f'Test exited early with error: {test_output}')
+        else:
+            output1 = f'Test output: {test_output}.'
+            output2 = f'Correct output: {self.output}.'
+            print(f'\n{output1}\n{output2}\n')
 
 class CommandLineTest(TextAttackTest):
     """ Runs a command-line command to check for desired output. """
@@ -81,13 +86,22 @@ def __init__(self, command, name=None, output=None, desc=None):
         super().__init__(name=name, output=output, desc=desc)
 
     def execute(self):
+        stderr_file = open(stderr_file_name, 'w+')
         result = subprocess.run(
             self.command.split(), 
             stdout=subprocess.PIPE,
-            # @TODO: Collect stderr somewhere. In the event of an error, point user to the error file.
-            stderr=FNULL 
+            stderr=stderr_file 
         )
-        return result.stdout.decode()
+        stderr_file.seek(0) # go back to beginning of file so we can read the whole thing
+        stderr_str = stderr_file.read()
+        # Remove temp file.
+        remove_stderr_file()
+        if result.returncode == 0:
+            # If the command succeeds, return stdout.
+            return result.stdout.decode(), False
+        else:
+            # If the command returns an exit code, return stderr.
+            return stderr_str, True
 
 class Capturing(list):
     """ A context manager that captures standard out during its execution. 
@@ -113,8 +127,26 @@ def __init__(self, function, name=None, output=None, desc=None):
         super().__init__(name=name, output=output, desc=desc)
 
     def execute(self):
-        with Capturing() as output_lines:
-            self.function()
-        output = '\n'.join(output_lines)
-        return output
-
+        try:
+            with Capturing() as output_lines:
+                self.function()
+            output = '\n'.join(output_lines)
+            return output, False
+        except: # catch *all* exceptions
+            exc_str_lines = traceback.format_exc().splitlines()
+            exc_str = '\n'.join(exc_str_lines)
+            return exc_str, True
+
+def remove_stderr_file():
+    # Make sure the stderr file is removed on exit.
+    try:
+        os.unlink(stderr_file_name)
+    except FileNotFoundError: 
+        # File doesn't exit - that means we never made it or already cleaned it up
+        pass
+
+def exit_handler(_,__): 
+    remove_stderr_file()
+
+# If the program exits early, make sure it didn't create any unneeded files.
+signal.signal(signal.SIGINT, exit_handler)
diff --git a/textattack/config.json b/textattack/config.json
@@ -1,3 +1,6 @@
 {
-    "CACHE_DIR": "~/.cache/textattack/"
+    "CACHE_DIR": "~/.cache/textattack/",
+    "CONSTRAINT_CACHE_SIZE": 262144,
+    "MODEL_BATCH_SIZE": 32,
+    "MODEL_CACHE_SIZE": 262144
 }