Skip to content

Commit

Permalink
Merge pull request #90 from QData/config
Browse files Browse the repository at this point in the history
minor fixes: config.json, tests, travis
  • Loading branch information
jxmorris12 committed May 9, 2020
2 parents 53a5c3a + 7ba4830 commit e3e43e3
Show file tree
Hide file tree
Showing 15 changed files with 293 additions and 58 deletions.
3 changes: 1 addition & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
language: python
python: '3.7'
python: '3.8'
before_install:
- python --version
- pip install -U pip
- pip install -U pytest
- pip install git+https://github.com/jxmorris12/language-check
install:
- pip install -e .
script: pytest tests # run tests
22 changes: 19 additions & 3 deletions local_tests/command_line_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,29 @@ def register_test(command, name=None, output_file=None, desc=None):
# test: run_attack targeted classification of class 2 on BERT MNLI with enable_csv
# and attack_n set, using the WordNet transformation and beam search with
# beam width 2, using language tool constraint, on 10 samples
# (takes about 171s)
# (takes about 72s)
#
register_test(('python -m textattack --attack-n --goal-function targeted-classification:target_class=2 '
'--enable_csv --model bert-mnli --num_examples 10 --transformation word-swap-wordnet '
'--enable-csv --model bert-mnli --num-examples 4 --transformation word-swap-wordnet '
'--constraints lang-tool --attack beam-search:beam_width=2'),
name='run_attack_targeted2_bertmnli_wordnet_beamwidth_2_enablecsv_attackn',
output_file='local_tests/sample_outputs/run_attack_targetedclassification2_wordnet_langtool_enable_csv_beamsearch2_attack_n_10.txt',
output_file='local_tests/sample_outputs/run_attack_targetedclassification2_wordnet_langtool_enable_csv_beamsearch2_attack_n_4.txt',
desc=('Runs attack using targeted classification on class 2 on BERT MNLI with'
'enable_csv and attack_n set, using the WordNet transformation and beam '
'search with beam width 2, using language tool constraint, on 10 samples')
)

#
# test: run_attack non-overlapping output of class 2 on T5 en->de translation with
# attack_n set, using the WordSwapRandomCharacterSubstitution transformation
# and greedy word swap, using edit distance constraint, on 6 samples
# (takes about 100s)
#
register_test(('python -m textattack --attack-n --goal-function non-overlapping-output '
'--model t5-en2de --num-examples 6 --transformation word-swap-random-char-substitution '
'--constraints edit-distance:12 words-perturbed:max_percent=0.75 --attack greedy-word'),
name='run_attack_nonoverlapping_t5en2de_randomcharsub_editdistance_wordsperturbed_greedyword',
output_file='local_tests/sample_outputs/run_attack_nonoverlapping_t5ende_editdistance_bleu.txt',
desc=('Runs attack using targeted classification on class 2 on BERT MNLI with'
'enable_csv and attack_n set, using the WordNet transformation and beam '
'search with beam width 2, using language tool constraint, on 10 samples')
Expand Down
3 changes: 1 addition & 2 deletions local_tests/python_function_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def register_test(function, name=None, output_file=None, desc=None):
function, name=name, output=output, desc=desc
))


#######################################
## BEGIN TESTS ##
#######################################
Expand All @@ -23,7 +22,7 @@ def check_gpu_count():
import torch
num_gpus = torch.cuda.device_count()
if num_gpus == 0:
print(f'Error: detected 0 GPUs. Must run local tests with multiple GPUs. Perhaps you need to configure CUDA?')
raise ValueError(f'detected 0 GPUs. Must run local tests with a GPU. Perhaps you need to configure CUDA?')

register_test(check_gpu_count, name='check CUDA',
output_file='local_tests/sample_outputs/empty_file.txt',
Expand Down
2 changes: 1 addition & 1 deletion local_tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
colored
colored
47 changes: 40 additions & 7 deletions local_tests/run_tests.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
import argparse
import os
import time

from test_lists import tests
from test_models import color_text

def log_sep():
print('\n' + ('-' * 60) + '\n')

def print_gray(s):
print(color_text(s, 'light_gray'))
def main():

def change_to_root_dir():
# Change to TextAttack root directory.
this_file_path = os.path.abspath(__file__)
test_directory_name = os.path.dirname(this_file_path)
textattack_root_directory_name = os.path.dirname(test_directory_name)
os.chdir(textattack_root_directory_name)
print_gray(f'Executing tests from {textattack_root_directory_name}.')

# Execute tests.

def run_all_tests():
change_to_root_dir()
start_time = time.time()
passed_tests = 0

from tests import tests
for test in tests:
log_sep()
test_passed = test()
Expand All @@ -32,7 +34,38 @@ def main():
print_gray(f'Passed {passed_tests}/{len(tests)} in {end_time-start_time}s.')


def run_tests_by_name(test_names):
test_names = set(test_names)
start_time = time.time()
passed_tests = 0
executed_tests = 0
for test in tests:
if test.name not in test_names:
continue
log_sep()
test_passed = test()
if test_passed:
passed_tests += 1
executed_tests += 1
test_names.remove(test.name)
log_sep()
end_time = time.time()
print_gray(f'Passed {passed_tests}/{executed_tests} in {end_time-start_time}s.')

if len(test_names):
print(f'Tests not executed: {",".join(test_names)}')

def parse_args():
all_test_names = [t.name for t in tests]
parser = argparse.ArgumentParser(description='Run TextAttack local tests.')
parser.add_argument('--tests', default=None, nargs='+', choices=all_test_names,
help='names of specific tests to run')

return parser.parse_args()

if __name__ == '__main__':
# @TODO add argparser and test sizes.
main()
args = parse_args()
if args.tests:
run_tests_by_name(args.tests)
else:
run_all_tests()
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
GreedyWordSwap(
(goal_function): NonOverlappingOutput
(transformation): WordSwapRandomCharacterSubstitution(
(replace_stopwords): False
)
(constraints):
(0): LevenshteinEditDistance(
(max_edit_distance): 12
)
(1): WordsPerturbed(
(max_percent): 0.75
)
(is_black_box): True
)

Load time: /.*/s
--------------------------------------------- Result 1 ---------------------------------------------
Eine republikanische Strategie, um der Wiederwahl Obamas entgegenzuwirken-->[FAILED]
A Republican strategy to counter the re-election of Obama


--------------------------------------------- Result 2 ---------------------------------------------
Die republikanischen Führer rechtfertigten ihre Politik durch die Not-->[FAILED]
Republican leaders justified their policy by the need to combat electoral fraud.


--------------------------------------------- Result 3 ---------------------------------------------
Das Brennan-Zentrum betrachtet dies jedoch als Mythos und behaupt-->Allerdings hält das Brennan Centre dies für einen Mythos, indem e
However, the Brennan Centre considers this a myth, stating that electoral fraud is rarer in the United States than the number of people killed by lightning.
However, the Brennan Centre cTnsiders this a myth, stating that electoral fraud is rarer in the United States than the number of people killed by lightning.


--------------------------------------------- Result 4 ---------------------------------------------
Tatsächlich identifizierten republikanische Anwälte-->In einer DecOde identifizierten republikanische Anwält
Indeed, Republican lawyers identified only 300 cases of electoral fraud in the United States in a decade.
Indedd, Republican lawyers identified only 300 cases of electoral fraud in the United Ttates in a decOde.


--------------------------------------------- Result 5 ---------------------------------------------
Eines ist sicher: Diese neuen Bestimmungen werden sich negativ auf die Wahlbeteiligung aus-->Ein Hhing ist sicher: Diese neuen Bestimmungen werden sich negativ auf die Wahlbeteil
One thing is certain: these new provisions will have a negative impact on voter turn-out.
One Hhing is certain: these new provisions will have a negative impact on voter turn-out.


--------------------------------------------- Result 6 ---------------------------------------------
In diesem Sinne werden die Maßnahmen das demokratische System der USA teilweise untergraben-->[FAILED]
In this sense, the measures will partially undermine the American democratic system.



(0lqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqwqqqqqqqqk(B
(0x(B Attack Results (0x(B (0x(B
(0tqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqnqqqqqqqqu(B
(0x(B Number of successful attacks: (0x(B 3 (0x(B
(0x(B Number of failed attacks: (0x(B 3 (0x(B
(0x(B Number of skipped attacks: (0x(B 0 (0x(B
(0x(B Original accuracy: (0x(B 100.0% (0x(B
(0x(B Accuracy under attack: (0x(B 50.0% (0x(B
(0x(B Attack success rate: (0x(B 50.0% (0x(B
(0x(B Average perturbed word %: (0x(B 9.62% (0x(B
(0x(B Average num. words per input: (0x(B 15.33 (0x(B
(0x(B Avg num queries: (0x(B 23.67 (0x(B
(0mqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqvqqqqqqqqj(B
Attack time: /.*/s
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
BeamSearch(
(goal_function): TargetedClassification(
(target_class): 2
)
(transformation): WordSwapWordNet(
(replace_stopwords): False
)
(constraints):
(0): LanguageTool(
(grammar_error_threshold): 0
)
(is_black_box): True
)

Logging to CSV at path /.*/csv.
Load time: /.*/s
--------------------------------------------- Result 1 ---------------------------------------------
0-->2
In Temple Bar , the bookshop at the Gallery of Photography carries a large selection of photographic publications , and the Flying Pig is a secondhand bookshop .

There is a bookshop at the gallery .
In Temple Bar , the bookshop at the drift of Photography carries a large selection of photographic publications , and the Flying Pig is a secondhand bookshop .

There is a bookshop at the gallery .


--------------------------------------------- Result 2 ---------------------------------------------
0-->[FAILED]
On Naxos , you can walk through the pretty villages of the Tragea Valley and the foothills of Mount Zas , admiring Byzantine churches and exploring olive groves at your leisure .

Naxos is a place with beautiful scenery for leisure .


--------------------------------------------- Result 3 ---------------------------------------------
1-->[FAILED]
Impossible .

Impossible , unless circumstances are met .


--------------------------------------------- Result 4 ---------------------------------------------
0-->2
Expenses included in calculating net cost for education and training programs that are intended to increase or maintain national economic productive capacity shall be reported as investments in human capital as required supplementary stewardship information accompanying the financial statements of the Federal Government and its component units .

Net cost for education programs can be calculated as a way to increase productivity .
Expenses included in calculating net cost for education and training programs that are intended to increase or maintain national economic productive capacity shall be reported as investments in human capital as required supplementary stewardship information accompanying the financial statements of the Federal Government and its component units .

Net cost for education programs can be calculated as a way to increment productivity .



(0lqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqwqqqqqqqqk(B
(0x(B Attack Results (0x(B (0x(B
(0tqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqnqqqqqqqqu(B
(0x(B Number of successful attacks: (0x(B 2 (0x(B
(0x(B Number of failed attacks: (0x(B 2 (0x(B
(0x(B Number of skipped attacks: (0x(B 0 (0x(B
(0x(B Original accuracy: (0x(B 100.0% (0x(B
(0x(B Accuracy under attack: (0x(B 50.0% (0x(B
(0x(B Attack success rate: (0x(B 50.0% (0x(B
(0x(B Average perturbed word %: (0x(B 2.38% (0x(B
(0x(B Average num. words per input: (0x(B 34.25 (0x(B
(0x(B Avg num queries: (0x(B 278.5 (0x(B
(0mqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqvqqqqqqqqj(B
Attack time: /.*/s
File renamed without changes.
66 changes: 49 additions & 17 deletions local_tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@
import io
import os
import re
import signal
import sys
import subprocess
import traceback

def color_text(s, color):
return colored.stylize(s, colored.fg(color))

FNULL = open(os.devnull, 'w')
stderr_file_name = 'err.out'

MAGIC_STRING = '/.*/'
def compare_outputs(desired_output, test_output):
def compare_output_equivalence(desired_output, test_output):
""" Desired outputs have the magic string '/.*/' inserted wherever the
outputat that position doesn't actually matter. (For example, when the
time to execute is printed, or another non-deterministic feature of the
Expand Down Expand Up @@ -50,12 +52,12 @@ def execute(self):
def __call__(self):
""" Runs test and prints success or failure. """
self.log_start()
test_output = self.execute()
if compare_outputs(self.output, test_output):
test_output, errored = self.execute()
if (not errored) and compare_output_equivalence(self.output, test_output):
self.log_success()
return True
else:
self.log_failure(test_output)
self.log_failure(test_output, errored)
return False

def log_start(self):
Expand All @@ -65,12 +67,15 @@ def log_success(self):
success_text = f'✓ Succeeded.'
print(color_text(success_text, 'green'))

def log_failure(self, test_output):
def log_failure(self, test_output, errored):
fail_text = f'✗ Failed.'
print(color_text(fail_text, 'red'))
print('\n')
print(f'Test output: {test_output}.')
print(f'Correct output: {self.output}.')
if errored:
print(f'Test exited early with error: {test_output}')
else:
output1 = f'Test output: {test_output}.'
output2 = f'Correct output: {self.output}.'
print(f'\n{output1}\n{output2}\n')

class CommandLineTest(TextAttackTest):
""" Runs a command-line command to check for desired output. """
Expand All @@ -81,13 +86,22 @@ def __init__(self, command, name=None, output=None, desc=None):
super().__init__(name=name, output=output, desc=desc)

def execute(self):
stderr_file = open(stderr_file_name, 'w+')
result = subprocess.run(
self.command.split(),
stdout=subprocess.PIPE,
# @TODO: Collect stderr somewhere. In the event of an error, point user to the error file.
stderr=FNULL
stderr=stderr_file
)
return result.stdout.decode()
stderr_file.seek(0) # go back to beginning of file so we can read the whole thing
stderr_str = stderr_file.read()
# Remove temp file.
remove_stderr_file()
if result.returncode == 0:
# If the command succeeds, return stdout.
return result.stdout.decode(), False
else:
# If the command returns an exit code, return stderr.
return stderr_str, True

class Capturing(list):
""" A context manager that captures standard out during its execution.
Expand All @@ -113,8 +127,26 @@ def __init__(self, function, name=None, output=None, desc=None):
super().__init__(name=name, output=output, desc=desc)

def execute(self):
with Capturing() as output_lines:
self.function()
output = '\n'.join(output_lines)
return output

try:
with Capturing() as output_lines:
self.function()
output = '\n'.join(output_lines)
return output, False
except: # catch *all* exceptions
exc_str_lines = traceback.format_exc().splitlines()
exc_str = '\n'.join(exc_str_lines)
return exc_str, True

def remove_stderr_file():
# Make sure the stderr file is removed on exit.
try:
os.unlink(stderr_file_name)
except FileNotFoundError:
# File doesn't exit - that means we never made it or already cleaned it up
pass

def exit_handler(_,__):
remove_stderr_file()

# If the program exits early, make sure it didn't create any unneeded files.
signal.signal(signal.SIGINT, exit_handler)
5 changes: 4 additions & 1 deletion textattack/config.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
{
"CACHE_DIR": "~/.cache/textattack/"
"CACHE_DIR": "~/.cache/textattack/",
"CONSTRAINT_CACHE_SIZE": 262144,
"MODEL_BATCH_SIZE": 32,
"MODEL_CACHE_SIZE": 262144
}

0 comments on commit e3e43e3

Please sign in to comment.