diff --git a/rebulk/introspector.py b/rebulk/introspector.py new file mode 100644 index 0000000..f2104d6 --- /dev/null +++ b/rebulk/introspector.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Introspect rebulk object to retrieve capabilities. +""" +from abc import ABCMeta, abstractproperty +from collections import defaultdict + +import six +from .pattern import StringPattern, RePattern +from .utils import extend_safe + + +@six.add_metaclass(ABCMeta) +class Description(object): + """ + Abstract class for a description. + """ + @abstractproperty + def properties(self): + """ + Properties of described object. + :return: all properties that described object can generate grouped by name. + :rtype: dict + """ + pass + + +class PatternDescription(Description): + """ + Description of a pattern. + """ + def __init__(self, pattern): + self.pattern = pattern + self._properties = defaultdict(list) + + if pattern.marker or pattern.private: + return + if pattern.properties: + for key, values in pattern.properties.items(): + extend_safe(self._properties[key], values) + elif 'value' in pattern.match_options: + self._properties[pattern.name].append(pattern.match_options['value']) + elif isinstance(pattern, StringPattern): + extend_safe(self._properties[pattern.name], pattern.patterns) + elif isinstance(pattern, RePattern): + if pattern.name and not pattern.private_parent and pattern.name not in pattern.private_names: + extend_safe(self._properties[pattern.name], [None]) + if not pattern.private_children: + for regex_pattern in pattern.patterns: + for group_name, values in regex_pattern.groupindex.items(): + if group_name not in pattern.private_names: + extend_safe(self._properties[group_name], [None]) + + + @property + def properties(self): + """ + Properties for this rule. + :return: + :rtype: dict + """ + return self._properties + + +class RuleDescription(Description): + """ + Description of a rule. + """ + def __init__(self, rule): + self.rule = rule + + self._properties = defaultdict(list) + + if rule.properties: + for key, values in rule.properties.items(): + extend_safe(self._properties[key], values) + + @property + def properties(self): + """ + Properties for this rule. + :return: + :rtype: dict + """ + return self._properties + + +class Introspection(Description): + """ + Introspection results. + """ + def __init__(self, rebulk, context=None): + self.patterns = [PatternDescription(pattern) for pattern in rebulk.effective_patterns(context) + if not pattern.private] + self.rules = [RuleDescription(rule) for rule in rebulk.effective_rules(context)] + + @property + def properties(self): + """ + Properties for Introspection results. + :return: + :rtype: + """ + properties = defaultdict(list) + for pattern in self.patterns: + for key, values in pattern.properties.items(): + extend_safe(properties[key], values) + for rule in self.rules: + for key, values in rule.properties.items(): + extend_safe(properties[key], values) + return properties + + +def introspect(rebulk, context=None): + """ + Introspect a Rebulk instance to grab defined objects and properties that can be generated. + :param rebulk: + :type rebulk: Rebulk + :param context: + :type context: + :return: Introspection instance + :rtype: Introspection + """ + return Introspection(rebulk, context) diff --git a/rebulk/pattern.py b/rebulk/pattern.py index ff3df24..433ecab 100644 --- a/rebulk/pattern.py +++ b/rebulk/pattern.py @@ -29,8 +29,8 @@ class Pattern(object): """ def __init__(self, name=None, tags=None, formatter=None, validator=None, children=False, every=False, - private_parent=False, private_children=False, private=False, marker=False, format_all=False, - validate_all=False, disabled=False, log_level=None): + private_parent=False, private_children=False, private=False, private_names=None, marker=False, + format_all=False, validate_all=False, disabled=False, log_level=None, properties=None): """ :param name: Name of this pattern :type name: str @@ -53,6 +53,8 @@ def __init__(self, name=None, tags=None, formatter=None, validator=None, childre :type private_parent: bool :param private_children: force return of children and flag children matches as private. :type private_children: bool + :param private_names: force return of named matches as private. + :type private_names: bool :param marker: flag this pattern as beeing a marker. :type private: bool :param format_all if True, pattern will format every match in the hierarchy (even match not yield). @@ -63,8 +65,8 @@ def __init__(self, name=None, tags=None, formatter=None, validator=None, childre :type disabled: bool|function :param log_lvl: Log level associated to this pattern :type log_lvl: int - """ + # pylint:disable=too-many-locals self.name = name self.tags = ensure_list(tags) self.formatters, self._default_formatter = ensure_dict(formatter, lambda x: x) @@ -72,6 +74,7 @@ def __init__(self, name=None, tags=None, formatter=None, validator=None, childre self.every = every self.children = children self.private = private + self.private_names = private_names if private_names else [] self.private_parent = private_parent self.private_children = private_children self.marker = marker @@ -82,6 +85,7 @@ def __init__(self, name=None, tags=None, formatter=None, validator=None, childre else: self.disabled = disabled self._log_level = log_level + self._properties = properties self.defined_at = debug.defined_at() @property @@ -162,6 +166,7 @@ def matches(self, input_string, context=None): :return: matches based on input_string for this pattern :rtype: iterator[Match] """ + ret = [] for pattern in self.patterns: yield_parent = self._yield_parent() @@ -185,8 +190,22 @@ def matches(self, input_string, context=None): if yield_children or self.private_children: for child in match.children: ret.append(child) + self._matches_privatize(ret) return ret + def _matches_privatize(self, matches): + """ + Mark matches included in private_names with private flag. + :param matches: + :type matches: + :return: + :rtype: + """ + if self.private_names: + for child in matches: + if child.name in self.private_names: + child.private = True + @abstractproperty def patterns(self): # pragma: no cover """ @@ -197,6 +216,27 @@ def patterns(self): # pragma: no cover """ pass + @property + def properties(self): + """ + Properties names and values that can ben retrieved by this pattern. + :return: + :rtype: + """ + if self._properties: + return self._properties + return {} + + @abstractproperty + def match_options(self): # pragma: no cover + """ + dict of default options for generated Match objects + + :return: **options to pass to Match constructor + :rtype: dict + """ + pass + @abstractmethod def _match(self, pattern, input_string, context=None): # pragma: no cover """ @@ -234,6 +274,10 @@ def __init__(self, *patterns, **kwargs): def patterns(self): return self._patterns + @property + def match_options(self): + return self._match_kwargs + def _match(self, pattern, input_string, context=None): for index in call(find_all, input_string, pattern, **self._kwargs): yield call(Match, index, index + len(pattern), pattern=self, input_string=input_string, @@ -276,6 +320,10 @@ def __init__(self, *patterns, **kwargs): def patterns(self): return self._patterns + @property + def match_options(self): + return self._match_kwargs + def _match(self, pattern, input_string, context=None): names = {v: k for k, v in pattern.groupindex.items()} for match_object in pattern.finditer(input_string): @@ -315,6 +363,10 @@ def __init__(self, *patterns, **kwargs): def patterns(self): return self._patterns + @property + def match_options(self): + return self._match_kwargs + def _match(self, pattern, input_string, context=None): ret = call(pattern, input_string, context, **self._kwargs) if ret: diff --git a/rebulk/rebulk.py b/rebulk/rebulk.py index 68a1f6e..67a543a 100644 --- a/rebulk/rebulk.py +++ b/rebulk/rebulk.py @@ -237,6 +237,21 @@ def matches(self, string, context=None): return matches + def effective_rules(self, context=None): + """ + Get effective rules for this rebulk object and its children. + :param context: + :type context: + :return: + :rtype: + """ + rules = Rules() + rules.extend(self._rules) + for rebulk in self._rebulks: + if not rebulk.disabled(context): + extend_safe(rules, rebulk._rules) + return rules + def _execute_rules(self, matches, context): """ Execute rules for this rebulk and children. @@ -248,13 +263,24 @@ def _execute_rules(self, matches, context): :rtype: """ if not self.disabled(context): - rules = Rules() - rules.extend(self._rules) - for rebulk in self._rebulks: - if not rebulk.disabled(context): - extend_safe(rules, rebulk._rules) + rules = self.effective_rules(context) rules.execute_all_rules(matches, context) + def effective_processors(self, context=None): + """ + Get effective processors for this rebulk object and its children. + :param context: + :type context: + :return: + :rtype: + """ + processors = [] + for rebulk in self._rebulks: + if not rebulk.disabled(context): + extend_safe(processors, rebulk._processors) + extend_safe(processors, self._processors) + return processors + def _execute_processors(self, matches, context): """ Execute processors for this rebulk and children. @@ -266,16 +292,28 @@ def _execute_processors(self, matches, context): :rtype: """ if not self.disabled(context): - processors = list(self._processors) - for rebulk in self._rebulks: - if not rebulk.disabled(context): - extend_safe(processors, rebulk._processors) + processors = self.effective_processors(context) for func in processors: ret = call(func, matches, context) if isinstance(ret, Matches): matches = ret return matches + def effective_post_processors(self, context=None): + """ + Get effective post processors for this rebulk object and its children. + :param context: + :type context: + :return: + :rtype: + """ + post_processors = [] + for rebulk in self._rebulks: + if not rebulk.disabled(context): + extend_safe(post_processors, rebulk._post_processors) + extend_safe(post_processors, self._post_processors) + return post_processors + def _execute_post_processors(self, matches, context): """ Execute post processors for this rebulk and children. @@ -287,17 +325,27 @@ def _execute_post_processors(self, matches, context): :rtype: """ if not self.disabled(context): - post_processors = [] - for rebulk in self._rebulks: - if not rebulk.disabled(context): - extend_safe(post_processors, rebulk._post_processors) - extend_safe(post_processors, self._post_processors) + post_processors = self.effective_post_processors(context) for func in post_processors: ret = call(func, matches, context) if isinstance(ret, Matches): matches = ret return matches + def effective_patterns(self, context=None): + """ + Get effective patterns for this rebulk object and its children. + :param context: + :type context: + :return: + :rtype: + """ + patterns = list(self._patterns) + for rebulk in self._rebulks: + if not rebulk.disabled(context): + extend_safe(patterns, rebulk._patterns) + return patterns + def _matches_patterns(self, matches, context): """ Search for all matches with current paterns agains input_string @@ -309,10 +357,7 @@ def _matches_patterns(self, matches, context): :rtype: """ if not self.disabled(context): - patterns = list(self._patterns) - for rebulk in self._rebulks: - if not rebulk.disabled(context): - extend_safe(patterns, rebulk._patterns) + patterns = self.effective_patterns(context) for pattern in patterns: if not pattern.disabled(context): pattern_matches = pattern.matches(matches.input_string, context) diff --git a/rebulk/rules.py b/rebulk/rules.py index c5e1e1b..8b953e8 100644 --- a/rebulk/rules.py +++ b/rebulk/rules.py @@ -69,6 +69,7 @@ class CustomRule(Condition, Consequence): priority = 0 name = None dependency = None + properties = {} def __init__(self, log_level=None): self.defined_at = debug.defined_at() diff --git a/rebulk/test/default_rules_module.py b/rebulk/test/default_rules_module.py index e18f553..82a17e6 100644 --- a/rebulk/test/default_rules_module.py +++ b/rebulk/test/default_rules_module.py @@ -38,6 +38,7 @@ def when(self, matches, context): class RuleAppend2(Rule): consequence = [AppendMatch('renamed')] + properties = {'renamed': []} def when(self, matches, context): return [Match(5, 10)] diff --git a/rebulk/test/test_introspector.py b/rebulk/test/test_introspector.py new file mode 100644 index 0000000..d4b30f3 --- /dev/null +++ b/rebulk/test/test_introspector.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Introspector tests +""" +# pylint: disable=no-self-use,pointless-statement,missing-docstring,protected-access,invalid-name +from .. import Rebulk +from .. import introspector +from .default_rules_module import RuleAppend2 + + +def test_string_introspector(): + rebulk = Rebulk().string('One', 'Two', 'Three', name='first').string('1', '2', '3', name='second') + + introspected = introspector.introspect(rebulk, None) + + assert len(introspected.patterns) == 2 + + first_properties = introspected.patterns[0].properties + assert len(first_properties) == 1 + first_properties['first'] == ['One', 'Two', 'Three'] + + second_properties = introspected.patterns[1].properties + assert len(second_properties) == 1 + second_properties['second'] == ['1', '2', '3'] + + properties = introspected.properties + assert len(properties) == 2 + assert properties['first'] == first_properties['first'] + assert properties['second'] == second_properties['second'] + + +def test_string_properties(): + rebulk = Rebulk()\ + .string('One', 'Two', 'Three', name='first', properties={'custom': ['One']})\ + .string('1', '2', '3', name='second', properties={'custom': [1]}) + + introspected = introspector.introspect(rebulk, None) + + assert len(introspected.patterns) == 2 + assert len(introspected.rules) == 0 + + first_properties = introspected.patterns[0].properties + assert len(first_properties) == 1 + first_properties['custom'] == ['One'] + + second_properties = introspected.patterns[1].properties + assert len(second_properties) == 1 + second_properties['custom'] == [1] + + properties = introspected.properties + assert len(properties) == 1 + assert properties['custom'] == ['One', 1] + + +def test_pattern_value(): + rebulk = Rebulk()\ + .regex('One', 'Two', 'Three', name='first', value="string") \ + .string('1', '2', '3', name='second', value="digit") \ + .string('4', '5', '6', name='third') + + introspected = introspector.introspect(rebulk, None) + + assert len(introspected.patterns) == 3 + assert len(introspected.rules) == 0 + + first_properties = introspected.patterns[0].properties + assert len(first_properties) == 1 + first_properties['first'] == ['string'] + + second_properties = introspected.patterns[1].properties + assert len(second_properties) == 1 + second_properties['second'] == ['digit'] + + third_properties = introspected.patterns[2].properties + assert len(third_properties) == 1 + third_properties['third'] == ['4', '5', '6'] + + properties = introspected.properties + assert len(properties) == 3 + assert properties['first'] == first_properties['first'] + assert properties['second'] == second_properties['second'] + assert properties['third'] == third_properties['third'] + + +def test_rule_properties(): + rebulk = Rebulk().rules(RuleAppend2) + + introspected = introspector.introspect(rebulk, None) + + assert len(introspected.rules) == 1 + assert len(introspected.patterns) == 0 + + rule_properties = introspected.rules[0].properties + assert len(rule_properties) == 1 + rule_properties['renamed'] == [] diff --git a/rebulk/test/test_pattern.py b/rebulk/test/test_pattern.py index 5cb2567..0316aab 100644 --- a/rebulk/test/test_pattern.py +++ b/rebulk/test/test_pattern.py @@ -17,7 +17,7 @@ class TestStringPattern(object): "which were the Hebrew letter qoph." def test_single(self): - pattern = StringPattern("Celtic", label="test") + pattern = StringPattern("Celtic") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 @@ -27,22 +27,28 @@ def test_single(self): assert matches[0].value == "Celtic" def test_repr(self): - pattern = StringPattern("Celtic", label="test") + pattern = StringPattern("Celtic") assert repr(pattern) == '' def test_ignore_case(self): - pattern = StringPattern("celtic", label="test", ignore_case=False) + pattern = StringPattern("celtic", ignore_case=False) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 - pattern = StringPattern("celtic", label="test", ignore_case=True) + pattern = StringPattern("celtic", ignore_case=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 assert matches[0].value == "Celtic" + def test_private_names(self): + pattern = StringPattern("celtic", name="test", private_names=["test"], ignore_case=True) + + matches = list(pattern.matches(self.input_string)) + assert len(matches) == 1 + assert matches[0].private def test_no_match(self): pattern = StringPattern("Python") @@ -95,7 +101,7 @@ class TestRePattern(object): "which were the Hebrew letter qoph." def test_single_compiled(self): - pattern = RePattern(re.compile("Celt.?c"), label="test") + pattern = RePattern(re.compile("Celt.?c")) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 @@ -105,7 +111,7 @@ def test_single_compiled(self): assert matches[0].value == "Celtic" def test_single_string(self): - pattern = RePattern("Celt.?c", label="test") + pattern = RePattern("Celt.?c") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 @@ -115,7 +121,7 @@ def test_single_string(self): assert matches[0].value == "Celtic" def test_single_kwargs(self): - pattern = RePattern({"pattern": "celt.?c", "flags": re.IGNORECASE}, label="test") + pattern = RePattern({"pattern": "celt.?c", "flags": re.IGNORECASE}) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 @@ -125,7 +131,7 @@ def test_single_kwargs(self): assert matches[0].value == "Celtic" def test_single_vargs(self): - pattern = RePattern(("celt.?c", re.IGNORECASE), label="test") + pattern = RePattern(("celt.?c", re.IGNORECASE)) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 @@ -135,7 +141,7 @@ def test_single_vargs(self): assert matches[0].value == "Celtic" def test_no_match(self): - pattern = RePattern("abc.?def", label="test") + pattern = RePattern("abc.?def") matches = list(pattern.matches(self.input_string)) assert len(matches) == 0 @@ -173,7 +179,7 @@ def test_multiple_patterns(self): assert matches[2].value == "Hebrew" def test_unnamed_groups(self): - pattern = RePattern(r"(Celt.?c)\s+(\w+)", label="test") + pattern = RePattern(r"(Celt.?c)\s+(\w+)") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 @@ -205,7 +211,7 @@ def test_unnamed_groups(self): assert group2.parent == parent def test_named_groups(self): - pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", label="test") + pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)") matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 @@ -236,7 +242,7 @@ def test_named_groups(self): assert group2.parent == parent def test_children(self): - pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", label="test", children=True) + pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 2 @@ -255,7 +261,7 @@ def test_children(self): assert group2.value == "violin" def test_children_parent_private(self): - pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", label="test", children=True, private_parent=True) + pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", children=True, private_parent=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 @@ -283,7 +289,7 @@ def test_children_parent_private(self): assert group2.value == "violin" def test_parent_children_private(self): - pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", label="test", private_children=True) + pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", private_children=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 @@ -311,7 +317,7 @@ def test_parent_children_private(self): assert group2.value == "violin" def test_every(self): - pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", label="test", every=True) + pattern = RePattern(r"(?PCelt.?c)\s+(?P\w+)", every=True) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 @@ -391,7 +397,7 @@ def func(input_string): if i > -1: return i, i + len("fly"), "fly", "functional" - pattern = FunctionalPattern(func, label="test") + pattern = FunctionalPattern(func) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 @@ -407,7 +413,7 @@ def func(input_string): if i > -1: return {"start": i, "end": i + len("fly"), "name": "functional"} - pattern = FunctionalPattern(func, label="test") + pattern = FunctionalPattern(func) matches = list(pattern.matches(self.input_string)) assert len(matches) == 1 @@ -431,7 +437,7 @@ def func(input_string): matches.append({"start": i, "end": i + len("Hebrew")}) return matches - pattern = FunctionalPattern(func, label="test") + pattern = FunctionalPattern(func) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 @@ -463,7 +469,7 @@ def func(input_string): if i > -1: yield (i, {"end": i + len("Hebrew")}) - pattern = FunctionalPattern(func, label="test") + pattern = FunctionalPattern(func) matches = list(pattern.matches(self.input_string)) assert len(matches) == 3 @@ -484,7 +490,7 @@ def func(input_string): assert matches[2].value == "Hebrew" def test_no_match(self): - pattern = FunctionalPattern(lambda x: None, label="test") + pattern = FunctionalPattern(lambda x: None) matches = list(pattern.matches(self.input_string)) assert len(matches) == 0