Skip to content

Commit

Permalink
Fix issue on prefer_longuer processor when matches are not separated
Browse files Browse the repository at this point in the history
  • Loading branch information
Toilal committed Oct 19, 2015
1 parent d449ba6 commit 8bd4c3c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
4 changes: 3 additions & 1 deletion rebulk/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ def conflict_prefer_longer(matches):

for i in range(*match.span):
conflicting_matches.update(matches.starting(i))
conflicting_matches.update(matches.ending(i))
if i != match.span[0]:
conflicting_matches.update(matches.ending(i))

conflicting_matches.remove(match)
if conflicting_matches:
# keep the match only if it's the longest
for conflicting_match in filter(lambda match: not match.private, conflicting_matches):
Expand Down
19 changes: 19 additions & 0 deletions rebulk/test/test_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,22 @@ def test_conflict_prefer_longer():
values = [x.value for x in processed_matches]

assert values == ["ijklmnopqrst"]

input_string = "123456789"

pattern = StringPattern("123", "456789")
matches = Matches(pattern.matches(input_string))

processed_matches = conflict_prefer_longer(matches)

values = [x.value for x in processed_matches]
assert values == ["123", "456789"]

pattern = StringPattern("123456", "789")
matches = Matches(pattern.matches(input_string))

processed_matches = conflict_prefer_longer(matches)

values = [x.value for x in processed_matches]
assert values == ["123456", "789"]

0 comments on commit 8bd4c3c

Please sign in to comment.