Skip to content

Commit

Permalink
Improve performance of specification parser
Browse files Browse the repository at this point in the history
Ref. #291
  • Loading branch information
treiher committed Aug 25, 2020
1 parent 57b0a09 commit d474b90
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 34 deletions.
55 changes: 21 additions & 34 deletions rflx/parser/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from pyparsing import (
CaselessKeyword,
Combine,
Forward,
Group,
Keyword,
Expand All @@ -20,16 +19,10 @@
StringEnd,
Suppress,
Token,
Word,
WordEnd,
WordStart,
ZeroOrMore,
alphanums,
alphas,
delimitedList,
infixNotation,
locatedExpr,
nums,
oneOf,
opAssoc,
)
Expand Down Expand Up @@ -121,24 +114,19 @@ def right_parenthesis() -> Token:


def unqualified_identifier() -> Token:
return (
locatedExpr(WordStart(alphas) + Word(alphanums + "_") + WordEnd(alphanums + "_"))
.setParseAction(parse_identifier)
.setName("Identifier")
)
return locatedExpr(Regex(r"[a-zA-Z]\w*")).setParseAction(parse_identifier).setName("Identifier")


def qualified_identifier() -> Token:
return (delimitedList(unqualified_identifier(), ".")).setParseAction(
lambda t: ID(
".".join(map(str, t.asList())),
Location(start=t[0].location.start, end=t[-1].location.end),
)
return (
locatedExpr(Regex(r"[a-zA-Z]\w*(\.[a-zA-Z]\w*)*"))
.setParseAction(parse_identifier)
.setName("QualifiedIdentifier")
)


def variable() -> Token:
return delimitedList(unqualified_identifier(), delim=".").setParseAction(parse_variable)
return Group(qualified_identifier()).setParseAction(parse_variable)


def qualified_variable() -> Token:
Expand All @@ -148,14 +136,13 @@ def qualified_variable() -> Token:


def numeric_literal() -> Token:
numeral = Combine(Word(nums) + ZeroOrMore(Optional(Word("_")) + Word(nums)))
numeral = Regex(r"\d+(_?\d+)*")
numeral.setParseAction(lambda t: t[0].replace("_", ""))

decimal_literal = Group(numeral)
decimal_literal.setParseAction(lambda t: (int(t[0][0]), 0))

extended_digit = Word(nums + "ABCDEF")
based_numeral = Combine(extended_digit + ZeroOrMore(Optional("_") + extended_digit))
based_numeral = Regex(r"[0-9A-F]+(_?[0-9A-F]+)*")
based_numeral.setParseAction(lambda t: t[0].replace("_", ""))

based_literal = numeral + Literal("#") - based_numeral - Literal("#")
Expand Down Expand Up @@ -850,29 +837,29 @@ def parse_then(string: str, location: int, tokens: ParseResults) -> Then:
def parse_identifier(string: str, location: int, tokens: ParseResults) -> ID:
tokens, locn = evaluate_located_expression(string, tokens)

if tokens[0].lower() in const.RESERVED_WORDS:
if tokens.lower() in const.RESERVED_WORDS:
fail(
f'reserved word "{tokens[0]}" used as identifier',
Subsystem.PARSER,
Severity.ERROR,
locn,
f'reserved word "{tokens}" used as identifier', Subsystem.PARSER, Severity.ERROR, locn,
)

return ID(tokens[0], locn)
return ID(tokens, locn)


@fatalexceptions
def parse_variable(string: str, location: int, tokens: ParseResults) -> Union[Variable, Selected]:
assert 1 <= len(tokens) <= 2
assert tokens[0].location
assert tokens[-1].location
identifier = tokens[0][0]

locn = Location(start=tokens[0].location.start, end=tokens[-1].location.end)
assert 1 <= len(identifier.parts) <= 2
assert identifier.location

if len(tokens) == 2:
return Selected(Variable(tokens[0], location=locn), tokens[1], location=locn)
if len(identifier.parts) == 2:
return Selected(
Variable(identifier.parent, location=identifier.location),
identifier.name,
location=identifier.location,
)

return Variable(tokens[0], location=locn)
return Variable(identifier, location=identifier.location)


@fatalexceptions
Expand Down
30 changes: 30 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,36 @@ def raise_parser_error() -> None:
fail("TEST", Subsystem.PARSER, Severity.ERROR)


@pytest.mark.parametrize(
"string,expected",
[("X", ID("X")), ("X2", ID("X2")), ("X_Y", ID("X_Y")), ("X_Y_3", ID("X_Y_3"))],
)
def test_grammar_unqualified_identifier(string: str, expected: ID) -> None:
actual = grammar.unqualified_identifier().parseString(string, parseAll=True)[0]
assert actual == expected
assert actual.location


@pytest.mark.parametrize(
"string,expected",
[
("X", ID("X")),
("X2", ID("X2")),
("X_Y", ID("X_Y")),
("X_Y_3", ID("X_Y_3")),
("X.Y", ID("X.Y")),
("X2.Y2", ID("X2.Y2")),
("X_Y.Z", ID("X_Y.Z")),
("X_Y_3.Z_4", ID("X_Y_3.Z_4")),
("X.Y.Z", ID("X.Y.Z")),
],
)
def test_grammar_qualified_identifier(string: str, expected: ID) -> None:
actual = grammar.qualified_identifier().parseString(string, parseAll=True)[0]
assert actual == expected
assert actual.location


@pytest.mark.parametrize(
"string,expected",
[
Expand Down

0 comments on commit d474b90

Please sign in to comment.