From d86f648a8ae9dcb5ccc50993abcd5b59ee409cf3 Mon Sep 17 00:00:00 2001 From: Toilal Date: Sat, 24 Oct 2015 13:49:35 +0200 Subject: [PATCH] Add conflicting method to Matches object --- README.rst | 4 ++++ rebulk/match.py | 25 ++++++++++++++++++++++++- rebulk/processors.py | 8 +------- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index 1af0f36..fced915 100644 --- a/README.rst +++ b/README.rst @@ -332,6 +332,10 @@ It has the following additional methods and properties on it. Retrieves a list of *hole* ``Match`` objects for given range. A hole match is created for each range where no match is available. +- ``conflicting(match, predicate=None, index=None)`` + + Retrieves a list of ``Match`` objects that conflicts with given match. + - ``chain_before(self, position, seps, start=0, predicate=None, index=None)``: Retrieves a list of chained matches, before position, matching predicate and separated by characters from seps only. diff --git a/rebulk/match.py b/rebulk/match.py index 44de1a2..c1587d5 100644 --- a/rebulk/match.py +++ b/rebulk/match.py @@ -211,7 +211,7 @@ def chain_before(self, position, seps, start=0, predicate=None, index=None): for i in reversed(range(start, position)): index_matches = self.at_index(i) - filtered_matches = [index_match for index_match in index_matches if predicate and predicate(index_match)] + filtered_matches = [index_match for index_match in index_matches if not predicate or predicate(index_match)] if filtered_matches: for chain_match in filtered_matches: if chain_match not in chain: @@ -315,6 +315,29 @@ def holes(self, start=0, end=None, formatter=None, ignore=None, predicate=None, ret[-1].end = min(rindex, end) return filter_index(ret, predicate, index) + def conflicting(self, match, predicate=None, index=None): + """ + Retrieves a list of ``Match`` objects that conflicts with given match. + :param match: + :type match: + :param predicate: + :type predicate: + :param index: + :type index: + :return: + :rtype: + """ + ret = [] + + for i in range(*match.span): + ret.extend(self.starting(i)) + if i != match.span[0]: + ret.extend(self.ending(i)) + + ret.remove(match) + + return filter_index(ret, predicate, index) + def at_match(self, match, predicate=None, index=None): """ Retrieves a list of matches from given match. diff --git a/rebulk/processors.py b/rebulk/processors.py index d9b0f9d..b34cb7f 100644 --- a/rebulk/processors.py +++ b/rebulk/processors.py @@ -37,14 +37,8 @@ def conflict_prefer_longer(matches): """ to_remove_matches = IdentitySet() for match in filter(lambda match: not match.private, matches): - conflicting_matches = IdentitySet() + conflicting_matches = matches.conflicting(match) - for i in range(*match.span): - conflicting_matches.update(matches.starting(i)) - if i != match.span[0]: - conflicting_matches.update(matches.ending(i)) - - conflicting_matches.remove(match) if conflicting_matches: # keep the match only if it's the longest for conflicting_match in filter(lambda match: not match.private, conflicting_matches):