-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Remove dependency from ply and implement custom parser * Test cleanup * Implement whole parser * Small cleanup * Changed one parser test * Implemented multiple consecutive out operator functionality * Add docs for parser
- Loading branch information
Showing
4 changed files
with
160 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,67 +1,53 @@ | ||
import ply.lex as lex | ||
import ply.yacc as yacc | ||
from homotopy.syntax_tree import SimpleSnippet, CompositeSnippet | ||
|
||
# Exceptions | ||
|
||
class Parser: | ||
parameter_chars = "!@#$%:~" | ||
in_operator = '>' | ||
out_operator = '<' | ||
and_operator = '&' | ||
|
||
class IllegalCharacter(Exception): | ||
pass | ||
@staticmethod | ||
def parse(snippet_text): | ||
stack = [] | ||
current_match = [] | ||
last_operator = Parser.in_operator | ||
|
||
# Lexer | ||
for c in snippet_text + "\0": | ||
if c in Parser.parameter_chars or c in ["\0", Parser.in_operator, Parser.out_operator, Parser.and_operator]: | ||
if last_operator == Parser.in_operator: | ||
stack.append(SimpleSnippet("".join(current_match))) | ||
else: | ||
current_snippet = stack.pop() | ||
stack.append( | ||
CompositeSnippet(current_snippet, last_operator, SimpleSnippet("".join(current_match)))) | ||
|
||
# List of token names. | ||
last_operator = c | ||
current_match.clear() | ||
else: | ||
current_match.append(c) | ||
|
||
if c == Parser.and_operator: | ||
last_operator = Parser.in_operator | ||
Parser.merge_stack(stack) | ||
|
||
tokens = ( | ||
'SNIPPET', | ||
'LEFT_OPERATOR', | ||
'RIGHT_OPERATOR', | ||
) | ||
if c == Parser.out_operator: | ||
last_operator = Parser.in_operator | ||
|
||
# Starting letters of operators. | ||
left = '!@#' | ||
right = '$%:' | ||
for _ in range(2): | ||
Parser.merge_stack(stack) | ||
|
||
# Regular expression rules for tokens | ||
t_SNIPPET = r'[a-zA-Z_0-9]+' | ||
t_LEFT_OPERATOR = r'[{0}][{0}{1}]*'.format(left, right) | ||
t_RIGHT_OPERATOR = r'[{1}][{0}{1}]*'.format(left, right) | ||
while len(stack) > 1: | ||
Parser.merge_stack(stack) | ||
|
||
return stack.pop() | ||
|
||
# Error handling rule | ||
def t_error(t): | ||
raise IllegalCharacter(t.value[0]) | ||
@staticmethod | ||
def merge_stack(stack): | ||
last_tree = stack.pop() | ||
if last_tree != SimpleSnippet(''): | ||
next_tree = stack.pop() | ||
stack.append(CompositeSnippet(next_tree, Parser.in_operator, last_tree)) | ||
|
||
# Build the lexer | ||
|
||
|
||
lexer = lex.lex() | ||
|
||
# Parser | ||
|
||
# Set up precedence. | ||
precedence = ( | ||
('left', 'LEFT_OPERATOR'), | ||
('right', 'RIGHT_OPERATOR'), | ||
) | ||
|
||
|
||
# Grammar rules. | ||
def p_expression_plus(p): | ||
""" | ||
expression : expression LEFT_OPERATOR expression | ||
| expression RIGHT_OPERATOR expression | ||
""" | ||
p[0] = CompositeSnippet(p[1], p[2], p[3]) | ||
|
||
|
||
def p_expression_minus(p): | ||
""" | ||
expression : SNIPPET | ||
""" | ||
p[0] = SimpleSnippet(p[1]) | ||
|
||
|
||
# Build the parser | ||
parser = yacc.yacc() | ||
parser = Parser() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,70 @@ | ||
from lib2to3.pgen2 import parse | ||
from unittest import TestCase | ||
|
||
from homotopy.parser import parser, IllegalCharacter | ||
from homotopy.parser import parser | ||
from homotopy.syntax_tree import SimpleSnippet, CompositeSnippet | ||
|
||
|
||
class TestParser(TestCase): | ||
def test_basic(self): | ||
self.assertEqual(parser.parse('asd'), SimpleSnippet('asd')) | ||
self.assertEqual(parser.parse(''), SimpleSnippet('')) | ||
|
||
with self.assertRaises(IllegalCharacter): | ||
parser.parse("☼") | ||
self.assertEqual(parser.parse('asd'), SimpleSnippet('asd')) | ||
self.assertEqual(parser.parse('asd#'), CompositeSnippet(SimpleSnippet('asd'), '#', SimpleSnippet(''))) | ||
self.assertEqual(parser.parse('asd!#1'), | ||
CompositeSnippet( | ||
CompositeSnippet(SimpleSnippet('asd'), '!', | ||
SimpleSnippet('')), '#', | ||
SimpleSnippet('1')) | ||
) | ||
|
||
def test_left_associativity(self): | ||
left = '!@#' | ||
for l in left: | ||
def test_parameters(self): | ||
operators = '!@#$%:~' | ||
for l in operators: | ||
self.assertEqual(parser.parse('first{0}second{0}third'.format(l)), | ||
CompositeSnippet( | ||
CompositeSnippet(SimpleSnippet('first'), l, | ||
SimpleSnippet('second')), l, | ||
SimpleSnippet('third')) | ||
) | ||
|
||
def test_right_associativity(self): | ||
right = '$%:' | ||
for r in right: | ||
self.assertEqual(parser.parse('first{0}second{0}third'.format(r)), | ||
CompositeSnippet(SimpleSnippet('first'), r, | ||
CompositeSnippet(SimpleSnippet('second'), r, | ||
SimpleSnippet('third')) | ||
) | ||
) | ||
def test_into(self): | ||
self.assertEqual(parser.parse('asd>dsa'), CompositeSnippet(SimpleSnippet('asd'), '>', SimpleSnippet('dsa'))) | ||
self.assertEqual(parser.parse('asd>dsa#2'), | ||
CompositeSnippet(SimpleSnippet('asd'), '>', | ||
CompositeSnippet(SimpleSnippet('dsa'), '#', | ||
SimpleSnippet('2'))) | ||
) | ||
|
||
self.assertEqual(parser.parse('for>if&if'), | ||
CompositeSnippet( | ||
CompositeSnippet(SimpleSnippet('for'), '>', | ||
SimpleSnippet('if')), '>', | ||
SimpleSnippet('if')) | ||
) | ||
|
||
self.assertEqual(parser.parse('for>if>if<if'), | ||
CompositeSnippet( | ||
CompositeSnippet(SimpleSnippet('for'), '>', | ||
CompositeSnippet(SimpleSnippet('if'), '>', SimpleSnippet('if'))), '>', | ||
SimpleSnippet('if')) | ||
) | ||
|
||
self.assertEqual(parser.parse('for>if>if>if<<if'), | ||
CompositeSnippet( | ||
CompositeSnippet(SimpleSnippet('for'), '>', | ||
CompositeSnippet(SimpleSnippet('if'), '>', | ||
CompositeSnippet( | ||
SimpleSnippet('if'), '>', | ||
SimpleSnippet('if')))), '>', | ||
SimpleSnippet('if')) | ||
) | ||
|
||
self.assertEqual(parser.parse('for>if#5>if<if'), | ||
CompositeSnippet( | ||
CompositeSnippet(SimpleSnippet('for'), '>', | ||
CompositeSnippet( | ||
CompositeSnippet(SimpleSnippet('if'), '#', SimpleSnippet('5')), '>', | ||
SimpleSnippet('if'))), '>', | ||
SimpleSnippet('if')) | ||
) |