Skip to content

Commit

Permalink
Merge pull request #2073 from Zac-HD/lark-python
Browse files Browse the repository at this point in the history
Explicit terminal strategies for Lark
  • Loading branch information
Zac-HD committed Sep 4, 2019
2 parents ea41eb5 + 2b7ddef commit 2a68481
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 28 deletions.
8 changes: 8 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
RELEASE_TYPE: minor

This release improves the :func:`~hypothesis.extra.lark.from_lark` strategy,
tightening argument validation and adding the ``explicit`` argument to allow use
with terminals that use ``@declare`` instead of a string or regular expression.

This feature is required to handle features such as indent and dedent tokens
in Python code, which can be generated with the :pypi:`hypothesmith` package.
78 changes: 65 additions & 13 deletions hypothesis-python/src/hypothesis/extra/lark.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@

import hypothesis._strategies as st
from hypothesis.errors import InvalidArgument
from hypothesis.internal.compat import getfullargspec
from hypothesis.internal.compat import getfullargspec, string_types
from hypothesis.internal.conjecture.utils import calc_label_from_name
from hypothesis.internal.validation import check_type
from hypothesis.searchstrategy import SearchStrategy

if False:
from typing import Text # noqa
from typing import Dict, Text # noqa

__all__ = ["from_lark"]

Expand All @@ -69,14 +69,28 @@ class DrawState(object):
result = attr.ib(default=attr.Factory(list))


def get_terminal_names(terminals, rules, ignore_names):
"""Get names of all terminals in the grammar.
The arguments are the results of calling ``Lark.grammar.compile()``,
so you would think that the ``terminals`` and ``ignore_names`` would
have it all... but they omit terminals created with ``@declare``,
which appear only in the expansion(s) of nonterminals.
"""
names = {t.name for t in terminals} | set(ignore_names)
for rule in rules:
names |= {t.name for t in rule.expansion if isinstance(t, Terminal)}
return names


class LarkStrategy(SearchStrategy):
"""Low-level strategy implementation wrapping a Lark grammar.
See ``from_lark`` for details.
"""

def __init__(self, grammar, start=None):
check_type(lark.lark.Lark, grammar, "grammar")
def __init__(self, grammar, start, explicit):
assert isinstance(grammar, lark.lark.Lark)
if start is None:
start = grammar.options.start
if not isinstance(start, list):
Expand Down Expand Up @@ -110,6 +124,16 @@ def __init__(self, grammar, start=None):
t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True)
for t in terminals
}
unknown_explicit = set(explicit) - get_terminal_names(
terminals, rules, ignore_names
)
if unknown_explicit:
raise InvalidArgument(
"The following arguments were passed as explicit_strategies, "
"but there is no such terminal production in this grammar: %r"
% (sorted(unknown_explicit),)
)
self.terminal_strategies.update(explicit)

nonterminals = {}

Expand Down Expand Up @@ -145,8 +169,10 @@ def draw_symbol(self, data, symbol, draw_state):
strategy = self.terminal_strategies[symbol.name]
except KeyError:
raise InvalidArgument(
"Undefined terminal %r. Generation does not currently support use of %%declare."
% (symbol.name,)
"Undefined terminal %r. Generation does not currently support "
"use of %%declare unless you pass `explicit`, a dict of "
'names-to-strategies, such as `{%r: st.just("")}`'
% (symbol.name, symbol.name)
)
draw_state.result.append(data.draw(strategy))
else:
Expand All @@ -169,10 +195,22 @@ def calc_has_reusable_values(self, recur):
return True


def check_explicit(name):
def inner(value):
check_type(string_types, value, "value drawn from " + name)
return value

return inner


@st.cacheable
@st.defines_strategy_with_reusable_values
def from_lark(grammar, start=None):
# type: (lark.lark.Lark, Text) -> st.SearchStrategy[Text]
def from_lark(
grammar, # type: lark.lark.Lark
start=None, # type: Text
explicit=None, # type: Dict[Text, st.SearchStrategy[Text]]
):
# type: (...) -> st.SearchStrategy[Text]
"""A strategy for strings accepted by the given context-free grammar.
``grammar`` must be a ``Lark`` object, which wraps an EBNF specification.
Expand All @@ -183,12 +221,26 @@ def from_lark(grammar, start=None):
nonterminal ``start`` symbol in the grammar, which was supplied as an
argument to the Lark class. To generate strings matching a different
symbol, including terminals, you can override this by passing the
``start`` argument to ``from_lark``.
``start`` argument to ``from_lark``. Note that Lark may remove unreachable
productions when the grammar is compiled, so you should probably pass the
same value for ``start`` to both.
Currently ``from_lark`` does not support grammars that need custom lexing.
Any lexers will be ignored, and any undefined terminals from the use of
``%declare`` will result in generation errors. We hope to support more of
these features in future.
"""
``%declare`` will result in generation errors. To define strategies for
such terminals, pass a dictionary mapping their name to a corresponding
strategy as the ``explicit`` argument.
return LarkStrategy(grammar, start)
The :pypi:`hypothesmith` project includes a strategy for Python source,
based on a grammar and careful post-processing.
"""
check_type(lark.lark.Lark, grammar, "grammar")
if explicit is None:
explicit = {}
else:
check_type(dict, explicit, "explicit")
explicit = {
k: v.map(check_explicit("explicit[%r]=%r" % (k, v)))
for k, v in explicit.items()
}
return LarkStrategy(grammar, start, explicit)
51 changes: 36 additions & 15 deletions hypothesis-python/tests/lark/test_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from hypothesis.errors import InvalidArgument
from hypothesis.extra.lark import from_lark
from hypothesis.internal.compat import integer_types, text_type
from hypothesis.strategies import data
from hypothesis.strategies import data, just
from tests.common.debug import find_any

# Adapted from the official Lark tutorial, with modifications to ensure
Expand All @@ -51,6 +51,11 @@
%ignore WS
"""

LIST_GRAMMAR = r"""
list : "[" [NUMBER ("," NUMBER)*] "]"
NUMBER: /[0-9]+/
"""


@given(from_lark(Lark(EBNF_GRAMMAR, start="value")))
def test_generates_valid_json(string):
Expand Down Expand Up @@ -90,16 +95,7 @@ def test_can_generate_ignored_tokens():


def test_generation_without_whitespace():
list_grammar = r"""
list : "[" [NUMBER ("," NUMBER)*] "]"
NUMBER: /[0-9]+/
"""

@given(from_lark(Lark(list_grammar, start="list")))
def test(g):
assert " " not in g

test()
find_any(from_lark(Lark(LIST_GRAMMAR, start="list")), lambda g: " " not in g)


def test_cannot_convert_EBNF_to_strategy_directly():
Expand All @@ -109,13 +105,38 @@ def test_cannot_convert_EBNF_to_strategy_directly():
with pytest.raises(TypeError):
# Not even the right number of arguments
from_lark(EBNF_GRAMMAR, start="value").example()
with pytest.raises(InvalidArgument):
# Wrong type for explicit_strategies
from_lark(Lark(LIST_GRAMMAR, start="list"), explicit=[]).example()


def test_can_not_use_undefined_terminals_yet():
grammar = r"""
list : "[" ELEMENT ("," ELEMENT)* "]"
def test_undefined_terminals_require_explicit_strategies():
elem_grammar = r"""
list : "[" [ELEMENT ("," ELEMENT)*] "]"
%declare ELEMENT
"""
with pytest.raises(InvalidArgument):
from_lark(Lark(elem_grammar, start="list")).example()
strategy = {"ELEMENT": just("200")}
from_lark(Lark(elem_grammar, start="list"), explicit=strategy).example()


def test_cannot_use_explicit_strategies_for_unknown_terminals():
with pytest.raises(InvalidArgument):
from_lark(
Lark(LIST_GRAMMAR, start="list"), explicit={"unused_name": just("")}
).example()


def test_non_string_explicit_strategies_are_invalid():
with pytest.raises(InvalidArgument):
from_lark(Lark(grammar, start="list")).example()
from_lark(
Lark(LIST_GRAMMAR, start="list"), explicit={"NUMBER": just(0)}
).example()


@given(
string=from_lark(Lark(LIST_GRAMMAR, start="list"), explicit={"NUMBER": just("0")})
)
def test_can_override_defined_terminal(string):
assert sum(json.loads(string)) == 0

0 comments on commit 2a68481

Please sign in to comment.