Skip to content

Commit

Permalink
Add abbreviations support to regex patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
Toilal committed Oct 6, 2015
1 parent cbe9678 commit 8de859c
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 0 deletions.
9 changes: 9 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,15 @@ If `regex module`_ is available, it automatically supports repeated captures.
>>> matches[0].children
[<01:(0, 2)>, <04:(9, 11)>]
- ``abbreviations``

Defined as a list of 2-tuple, each tuple is an abbreviation.

>>> Rebulk().regex(r'Custom-separators', abbreviations=[("-", "[\W_]+")])\
... .matches("Custom_separators using-abbreviations")
[<Custom_separators:(0, 17)>]


Functional Patterns
-------------------
Functional Patterns are based on the evaluation of a function.
Expand Down
7 changes: 7 additions & 0 deletions rebulk/pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,21 @@ def __init__(self, *patterns, **kwargs):
self.repeated_captures = kwargs.get('repeated_captures')
if self.repeated_captures and not REGEX_AVAILABLE: # pragma: no cover
raise NotImplementedError("repeated_capture is available only with regex module.")
self.abbreviations = kwargs.get('abbreviations', [])
self._kwargs = kwargs
self._match_kwargs = _filter_match_kwargs(kwargs)
self._children_match_kwargs = _filter_match_kwargs(kwargs, children=True)
self._patterns = []
for pattern in patterns:
if isinstance(pattern, six.string_types):
if self.abbreviations and pattern:
for key, replacement in self.abbreviations:
pattern = pattern.replace(key, replacement)
pattern = call(re.compile, pattern, **self._kwargs)
elif isinstance(pattern, dict):
if self.abbreviations and 'pattern' in pattern:
for key, replacement in self.abbreviations:
pattern['pattern'] = pattern['pattern'].replace(key, replacement)
pattern = re.compile(**pattern)
elif hasattr(pattern, '__iter__'):
pattern = re.compile(*pattern)
Expand Down
11 changes: 11 additions & 0 deletions rebulk/test/test_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,17 @@ def test_no_match(self):
matches = list(pattern.matches(self.input_string))
assert len(matches) == 0

def test_shortcuts(self):
pattern = RePattern("Celtic-violin", abbreviations=[("-", r"[\W_]+")])

matches = list(pattern.matches(self.input_string))
assert len(matches) == 1

pattern = RePattern({"pattern": "celtic-violin", "flags": re.IGNORECASE}, abbreviations=[("-", r"[\W_]+")])

matches = list(pattern.matches(self.input_string))
assert len(matches) == 1

def test_multiple_patterns(self):
pattern = RePattern("pla.?ing", "ann.?yed", "Heb.?ew")

Expand Down

0 comments on commit 8de859c

Please sign in to comment.