Skip to content

Commit

Permalink
Merge pull request #79 from QData/wordnet-transformation
Browse files Browse the repository at this point in the history
Wordnet transformation
  • Loading branch information
jxmorris12 committed Apr 25, 2020
2 parents 95b685f + c35668d commit 5d7259e
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 12 deletions.
2 changes: 2 additions & 0 deletions scripts/run_attack_args_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
}

TRANSFORMATION_CLASS_NAMES = {
'word-swap-wordnet': 'textattack.transformations.WordSwapWordNet',
'word-swap-embedding': 'textattack.transformations.WordSwapEmbedding',
'word-swap-homoglyph': 'textattack.transformations.WordSwapHomoglyph',
'word-swap-neighboring-char-swap': 'textattack.transformations.WordSwapNeighboringCharacterSwap',
Expand Down Expand Up @@ -283,6 +284,7 @@ def parse_logger_from_args(args):# Create logger
attack_logger.add_output_csv(csv_path, color_method)
print('Logging to CSV at path {}.'.format(csv_path))


# Visdom
if args.enable_visdom:
attack_logger.enable_visdom()
Expand Down
3 changes: 2 additions & 1 deletion textattack/transformations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
from .word_swap_neighboring_character_swap import WordSwapNeighboringCharacterSwap
from .word_swap_random_character_deletion import WordSwapRandomCharacterDeletion
from .word_swap_random_character_insertion import WordSwapRandomCharacterInsertion
from .word_swap_random_character_substitution import WordSwapRandomCharacterSubstitution
from .word_swap_random_character_substitution import WordSwapRandomCharacterSubstitution
from .word_swap_wordnet import WordSwapWordNet
12 changes: 11 additions & 1 deletion textattack/transformations/word_swap.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,26 @@ def __call__(self, tokenized_text, indices_to_replace=None):
word_swaps = []
for i in indices_to_replace:
word_to_replace = words[i]
# Don't replace stopwords.
if not self.replace_stopwords and word_to_replace.lower() in self.stopwords:
continue
replacement_words = self._get_replacement_words(word_to_replace)
new_tokenized_texts = []
for r in replacement_words:
# Don't replace with numbers, punctuation, or other non-letter characters.
if not is_word(r):
continue
new_tokenized_texts.append(tokenized_text.replace_word_at_index(i, r))
transformations.extend(new_tokenized_texts)

return transformations


def extra_repr_keys(self):
return ['replace_stopwords']
return ['replace_stopwords']

def is_word(s):
""" String `s` counts as a word if it has at least one letter. """
for c in s:
if c.isalpha(): return True
return False
3 changes: 1 addition & 2 deletions textattack/transformations/word_swap_homoglyph.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import numpy as np
import os

from textattack.shared import utils
from textattack.transformations.word_swap import WordSwap

class WordSwapHomoglyph(WordSwap):
""" Transforms an input by replacing its words with visually-similar words using homoglyph swaps.
""" Transforms an input by replacing its words with visually similar words using homoglyph swaps.
"""

def __init__(self, replace_stopwords=False):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
import os

from textattack.shared import utils
from textattack.transformations.word_swap import WordSwap
Expand Down Expand Up @@ -36,4 +35,4 @@ def _get_replacement_words(self, word):
return candidate_words

def extra_repr_keys(self):
return super().extra_repr_keys() + ['random_one']
return super().extra_repr_keys() + ['random_one']
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
import os

from textattack.shared import utils
from textattack.transformations.word_swap import WordSwap
Expand Down Expand Up @@ -35,4 +34,4 @@ def _get_replacement_words(self, word):
return candidate_words

def extra_repr_keys(self):
return super().extra_repr_keys() + ['random_one']
return super().extra_repr_keys() + ['random_one']
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
import os

from textattack.shared import utils
from textattack.transformations.word_swap import WordSwap
Expand Down Expand Up @@ -32,4 +31,4 @@ def _get_replacement_words(self, word):
candidate_word = word[:i] + self._get_random_letter() + word[i:]
candidate_words.append(candidate_word)

return candidate_words
return candidate_words
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
import os

from textattack.shared import utils
from textattack.transformations.word_swap import WordSwap
Expand Down Expand Up @@ -33,4 +32,4 @@ def _get_replacement_words(self, word):
candidate_word = word[:i] + self._get_random_letter() + word[i+1:]
candidate_words.append(candidate_word)

return candidate_words
return candidate_words
15 changes: 15 additions & 0 deletions textattack/transformations/word_swap_wordnet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from nltk.corpus import wordnet
from textattack.transformations.word_swap import WordSwap

class WordSwapWordNet(WordSwap):
""" Transforms an input by replacing its words with synonyms provided by WordNet.
"""

def _get_replacement_words(self, word, random=False):
""" Returns a list containing all possible words with 1 character replaced by a homoglyph.
"""
synonyms = set()
for syn in wordnet.synsets(word):
for l in syn.lemmas():
synonyms.add(l.name())
return list(synonyms)

0 comments on commit 5d7259e

Please sign in to comment.