From 3a9f9405a12161b3d8ac19a59e224566e263a0c8 Mon Sep 17 00:00:00 2001 From: Jaroslav Tulach Date: Thu, 4 Apr 2019 05:33:36 +0200 Subject: [PATCH] Parser based on parser combinators and partial evaluation --- .../org/apidesign/language/self/PELexer.java | 66 ++ .../org/apidesign/language/self/PEParser.java | 586 ++++++++++++++++++ .../apidesign/language/self/SelfParser.java | 348 +++++++++-- .../language/self/SelfParserTest.java | 147 ++++- 4 files changed, 1104 insertions(+), 43 deletions(-) create mode 100644 src/main/java/org/apidesign/language/self/PELexer.java create mode 100644 src/main/java/org/apidesign/language/self/PEParser.java diff --git a/src/main/java/org/apidesign/language/self/PELexer.java b/src/main/java/org/apidesign/language/self/PELexer.java new file mode 100644 index 0000000..9f1a98e --- /dev/null +++ b/src/main/java/org/apidesign/language/self/PELexer.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package org.apidesign.language.self; + +import com.oracle.truffle.api.profiles.ConditionProfile; +import org.netbeans.api.lexer.Token; +import org.netbeans.api.lexer.TokenId; + +interface PELexer { + + public Object[] asArgumentsArray(); + + public Token peek(ConditionProfile seenEof); + + public String position(); + + public void resetStackPointer(int pointer); + + public Token nextToken(ConditionProfile seenEof); + + public int getStackPointer(); + + public String tokenNames(TokenId token); + + public default String tokenNames(Token token) { + return tokenNames(token.id()); + } +} diff --git a/src/main/java/org/apidesign/language/self/PEParser.java b/src/main/java/org/apidesign/language/self/PEParser.java new file mode 100644 index 0000000..a5c17a3 --- /dev/null +++ b/src/main/java/org/apidesign/language/self/PEParser.java @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package org.apidesign.language.self; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Optional; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.function.Supplier; + +import com.oracle.truffle.api.CallTarget; +import com.oracle.truffle.api.CompilerAsserts; +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; +import com.oracle.truffle.api.Truffle; +import com.oracle.truffle.api.frame.VirtualFrame; +import com.oracle.truffle.api.nodes.DirectCallNode; +import com.oracle.truffle.api.nodes.ExplodeLoop; +import com.oracle.truffle.api.nodes.ExplodeLoop.LoopExplosionKind; +import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.nodes.RootNode; +import com.oracle.truffle.api.profiles.ConditionProfile; +import static org.apidesign.language.self.Alternative.error; +import org.netbeans.api.lexer.Token; +import org.netbeans.api.lexer.TokenId; + +abstract class Element extends Node { + + @CompilationFinal protected long firstA; + @CompilationFinal protected long firstB; + @CompilationFinal protected int singleToken = -1; + + protected abstract void createFirstSet(Element setHolder, HashSet> rulesAdded); + + public abstract void initialize(); + + public abstract T consume(PELexer lexer); + + public final boolean canStartWith(Token token) { + int id = token.id().ordinal(); + if (singleToken == -1L) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + if (Long.bitCount(firstA) + Long.bitCount(firstB) == 1) { + // if the "first" set consists of a single token, it can be checked more efficiently + if (firstA == 0) { + singleToken = Long.numberOfTrailingZeros(firstB) + 64; + } else { + singleToken = Long.numberOfTrailingZeros(firstA); + } + } else { + singleToken = 0; + } + } + if (singleToken != 0) { + return id == singleToken; + } + + if (id < 64) { + assert id > 0; + return (firstA & (1L << id)) != 0; + } else { + assert id < 128; + return (firstB & (1L << (id - 64))) != 0; + } + } +} + +class RuleRootNode extends RootNode { + + @Child private Rule rule; + + RuleRootNode(Rule rule) { + super(null); + this.rule = rule; + } + + @Override + public Object execute(VirtualFrame frame) { + PELexer lexer = (PELexer) frame.getArguments()[0]; + return rule.element.consume(lexer); + } + + @Override + public String getName() { + return "parser rule " + rule.getName(); + } + + @Override + public String toString() { + return getName(); + } +} + +final class CallRule extends Element { + + private final Rule rule; + @Child DirectCallNode call; + + CallRule(Rule rule) { + this.rule = rule; + } + + @Override + protected void createFirstSet(Element setHolder, HashSet> rulesAdded) { + rule.element.createFirstSet(setHolder, rulesAdded); + } + + @Override + public void initialize() { + rule.initialize(); + } + + @SuppressWarnings("unchecked") + @Override + public T consume(PELexer lexer) { + if (call == null) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + call = insert(Truffle.getRuntime().createDirectCallNode(rule.getCallTarget())); + } + if (PEParser.PEPARSER_DIRECT_CALL) { + return rule.element.consume(lexer); + } else { + return (T) call.call(lexer.asArgumentsArray()); // do not create a new array every time + } + } +} + +final class Rule extends Element { + + private final String name; + @Child Element element; + CallTarget target; + + Rule(String name) { + this.name = name; + } + + public CallTarget getCallTarget() { + if (target == null) { + target = Truffle.getRuntime().createCallTarget(new RuleRootNode(this)); + } + return target; + } + + public void define(Element newElement) { + this.element = newElement; + } + + @Override + protected void createFirstSet(Element setHolder, HashSet> rulesAdded) { + if (!rulesAdded.contains(this)) { + rulesAdded.add(this); + if (firstA != 0 || firstB != 0) { + setHolder.firstA |= firstA; + setHolder.firstB |= firstB; + } else { + if (element != null) { + element.createFirstSet(setHolder, rulesAdded); + } + } + } + } + + void initializeRule() { + CompilerAsserts.neverPartOfCompilation(); + createFirstSet(this, new HashSet<>()); + } + + @Override + public void initialize() { + // do nothing - already initialized + } + + public String getName() { + return name; + } + + static int level = 0; + + @Override + public T consume(PELexer lexer) { + throw new IllegalStateException(getRootNode().getName()); + } +} + +abstract class SequenceBase extends Element { + + protected abstract Element[] elements(); + + @Override + protected void createFirstSet(Element setHolder, HashSet> rulesAdded) { + int i = 0; + Element[] elements = elements(); + while (i < elements.length && elements[i] instanceof OptionalElement) { + // add all optional prefixes + ((OptionalElement) elements[i]).element.createFirstSet(setHolder, rulesAdded); + i++; + } + assert i < elements.length : "non-optional element needed in sequence"; + // add the first non-optional element + elements[i].createFirstSet(setHolder, rulesAdded); + } + + @Override + public void initialize() { + for (Element element : elements()) { + element.initialize(); + } + } +} + +final class Sequence2 extends SequenceBase { + @Child private Element a; + @Child private Element b; + private final BiFunction action; + + Sequence2(BiFunction action, Element a, Element b) { + this.action = action; + this.a = a; + this.b = b; + } + + @Override + protected Element[] elements() { + return new Element[]{a, b}; + } + + @Override + public T consume(PELexer lexer) { + final A valueA = a.consume(lexer); + final B valueB = b.consume(lexer); + return action.apply(valueA, valueB); + } +} + +final class Sequence3 extends SequenceBase { + @Child private Element a; + @Child private Element b; + @Child private Element c; + private final PEParser.Function3 action; + + Sequence3(PEParser.Function3 action, Element a, Element b, Element c) { + this.action = action; + this.a = a; + this.b = b; + this.c = c; + } + + @Override + protected Element[] elements() { + return new Element[]{a, b, c}; + } + + @Override + public T consume(PELexer lexer) { + final A valueA = a.consume(lexer); + final B valueB = b.consume(lexer); + final C valueC = c.consume(lexer); + return action.apply(valueA, valueB, valueC); + } +} + +final class Sequence4 extends SequenceBase { + @Child private Element a; + @Child private Element b; + @Child private Element c; + @Child private Element d; + private final PEParser.Function4 action; + + Sequence4(PEParser.Function4 action, Element a, Element b, Element c, Element d) { + this.action = action; + this.a = a; + this.b = b; + this.c = c; + this.d = d; + } + + @Override + protected Element[] elements() { + return new Element[]{a, b, c}; + } + + @Override + public T consume(PELexer lexer) { + final A valueA = a.consume(lexer); + final B valueB = b.consume(lexer); + final C valueC = c.consume(lexer); + final D valueD = d.consume(lexer); + return action.apply(valueA, valueB, valueC, valueD); + } +} + +final class Alternative extends Element { + @Children private final Element[] options; + private final ConditionProfile seenEof = ConditionProfile.createBinaryProfile(); + + Alternative(Element[] options) { + this.options = options; + } + + @Override + protected void createFirstSet(Element setHolder, HashSet> rulesAdded) { + assert options.length > 0; + + for (Element option : options) { + option.createFirstSet(setHolder, rulesAdded); + } + } + + @Override + public void initialize() { + for (Element element : options) { + element.createFirstSet(element, new HashSet<>()); + element.initialize(); + } + } + + @Override + @ExplodeLoop(kind = LoopExplosionKind.FULL_EXPLODE_UNTIL_RETURN) + public T consume(PELexer lexer) { + Token lookahead = lexer.peek(seenEof); + for (Element element : options) { + if (element.canStartWith(lookahead)) { + // matched + return element.consume(lexer); + } + } + CompilerDirectives.transferToInterpreter(); + throw error("no alternative found at " + lexer.position() + " in " + getRootNode().getName()); + } + + static RuntimeException error(String message) { + CompilerAsserts.neverPartOfCompilation(); + throw new RuntimeException(message); + } +} + +final class Repetition extends Element { + @Child private Element element; + private final Supplier createList; + private final BiFunction addToList; + private final Function createResult; + private final ConditionProfile seenEof = ConditionProfile.createBinaryProfile(); + + Repetition(Element element, Supplier createList, BiFunction addToList, Function createResult) { + this.element = element; + this.createList = createList; + this.addToList = addToList; + this.createResult = createResult; + } + + @Override + protected void createFirstSet(Element setHolder, HashSet> rulesAdded) { + throw new IllegalStateException("should not reach here"); + } + + @Override + public void initialize() { + element.createFirstSet(element, new HashSet<>()); + element.initialize(); + } + + @Override + public R consume(PELexer lexer) { + ListT list = createList.get(); + while (true) { + Token lookahead = lexer.peek(seenEof); + if (!element.canStartWith(lookahead)) { + return createResult.apply(list); + } + list = addToList.apply(list, element.consume(lexer)); + } + } +} + +final class OptionalElement extends Element { + @Child Element element; + private final Function hasValueAction; + private final Supplier hasNoValueAction; + private final ConditionProfile seenEof = ConditionProfile.createBinaryProfile(); + + OptionalElement(Element element, Function hasValueAction, Supplier hasNoValueAction) { + this.element = element; + this.hasValueAction = hasValueAction; + this.hasNoValueAction = hasNoValueAction; + } + + @Override + protected void createFirstSet(Element setHolder, HashSet> rulesAdded) { + throw new IllegalStateException("should not reach here"); + } + + @Override + public void initialize() { + element.createFirstSet(element, new HashSet<>()); + element.initialize(); + } + + @Override + public R consume(PELexer lexer) { + Token lookahead = lexer.peek(seenEof); + if (element.canStartWith(lookahead)) { + return hasValueAction.apply(element.consume(lexer)); + } + return hasNoValueAction.get(); + } +} + +final class TokenReference extends Element { + private final TID token; + private final PEParser.TokenFunction action; + private final ConditionProfile seenEof = ConditionProfile.createBinaryProfile(); + + TokenReference(TID token, PEParser.TokenFunction action) { + this.token = token; + this.action = action; + } + + @Override + protected void createFirstSet(Element setHolder, HashSet> rulesAdded) { + int id = token.ordinal(); + if (id < 64) { + assert id > 0; + setHolder.firstA |= 1L << id; + } else { + assert id < 128; + setHolder.firstB |= 1L << (id - 64); + } + } + + @Override + public void initialize() { + // nothing to do + } + + @Override + public T consume(PELexer lexer) { + Token tokenId = lexer.peek(seenEof); + Token actualToken; + if ((actualToken = lexer.nextToken(seenEof)).id() != token) { + CompilerDirectives.transferToInterpreter(); + error("expecting " + lexer.tokenNames(token) + ", got " + lexer.tokenNames(actualToken) + " at " + lexer.position()); + } + return action.apply((Token) tokenId); + } +} + +@SuppressWarnings("unchecked") +public final class PEParser { + + static final boolean PEPARSER_DIRECT_CALL = Boolean.getBoolean("PEParser.directcall"); + + private final ArrayList> rules = new ArrayList<>(); + @CompilationFinal private Rule root; + + private static void replaceRules(Element[] elements) { + for (int i = 0; i < elements.length; i++) { + if (elements[i] instanceof Rule) { + elements[i] = new CallRule<>((Rule) elements[i]); + } + } + } + + private static Element replaceRule(Element element) { + if (element instanceof Rule) { + return new CallRule<>((Rule) element); + } else { + return element; + } + } + + public static Element alt(Element... options) { + replaceRules(options); + return new Alternative<>(options); + } + + public static Element seq(Element a, Element b, BiFunction action) { + return new Sequence2<>(action, replaceRule(a), replaceRule(b)); + } + + public static Element seq(Element a, Element b, Element c, Function3 action) { + return new Sequence3<>(action, replaceRule(a), replaceRule(b), replaceRule(c)); + } + + public static Element seq(Element a, Element b, Element c, Element d, Function4 action) { + return new Sequence4<>(action, replaceRule(a), replaceRule(b), replaceRule(c), replaceRule(d)); + } + + public static Element rep(Element element, Supplier createList, BiFunction addToList, Function createResult) { + return new Repetition<>(replaceRule(element), createList, addToList, createResult); + } + + public static Element> opt(Element element) { + return new OptionalElement<>(replaceRule(element), v -> Optional.of(v), () -> Optional.empty()); + } + + public static Element> ref(T id) { + return ref(id, (t) -> t); + } + + public static Element ref(T id, TokenFunction action) { + return new TokenReference<>(id, action); + } + + public Rule rule(String name) { + Rule rule = new Rule<>(name); + rules.add(rule); + return rule; + } + + public interface Function3 { + R apply(A a, B b, C c); + } + + public interface Function4 { + R apply(A a, B b, C c, D d); + } + + public interface Function5 { + R apply(A a, B b, C c, D d, E e); + } + + public interface TokenFunction { + R apply(Token token); + } + + PEParser() { + // private constructor + } + + final void initialize(Rule newRoot) { + this.root = newRoot; + for (Rule rule : rules) { + rule.initializeRule(); + } + for (Rule rule : rules) { + if (rule.element != null) { + rule.element.initialize(); + } + } + + } + + public Object parse(PELexer lexer) { + return root.getCallTarget().call(lexer.asArgumentsArray()); + } +} diff --git a/src/main/java/org/apidesign/language/self/SelfParser.java b/src/main/java/org/apidesign/language/self/SelfParser.java index 8d080f7..a7161a1 100644 --- a/src/main/java/org/apidesign/language/self/SelfParser.java +++ b/src/main/java/org/apidesign/language/self/SelfParser.java @@ -1,11 +1,18 @@ package org.apidesign.language.self; +import com.oracle.truffle.api.profiles.ConditionProfile; import com.oracle.truffle.api.source.Source; +import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.function.Consumer; +import static org.apidesign.language.self.PEParser.*; +import org.apidesign.language.self.SelfLexer.BasicNode; +import org.apidesign.language.self.SelfLexer.ListItem; import org.netbeans.api.lexer.Language; import org.netbeans.api.lexer.Token; import org.netbeans.api.lexer.TokenHierarchy; @@ -18,50 +25,227 @@ import org.netbeans.spi.lexer.TokenFactory; final class SelfParser { + private static final PEParser PARSER; + static { + PARSER = new PEParser(); + // create the rules + Rule program = PARSER.rule("program"); + Rule statement = PARSER.rule("statement"); + Rule objectLiteral = PARSER.rule("object"); + Rule exprlist = PARSER.rule("exprlist"); + Rule constant = PARSER.rule("constant"); + Rule unaryLevel = PARSER.rule("unaryLevel"); + Rule binaryLevel = PARSER.rule("binaryLevel"); + Rule keywordLevel = PARSER.rule("keywordLevel"); + Rule expression = PARSER.rule("expression"); + + program.define(seq(statement, rep(statement, ListItem::empty, ListItem::new, ListItem::self), + (l, r) -> new SelfLexer.BasicNode("program", SelfLexer.concat(l, r)))); + + Element> slotId = alt( + ref(SelfTokenId.IDENTIFIER), + seq(ref(SelfTokenId.KEYWORD_LOWERCASE), alt( + seq( + ref(SelfTokenId.IDENTIFIER), rep( + seq(ref(SelfTokenId.KEYWORD), ref(SelfTokenId.IDENTIFIER), (key, id) -> { + return key; + }), + ListItem::empty, ListItem::new, ListItem::self + ), (id, rest) -> { + return id; + }) +// rep(ref(SelfTokenId.KEYWORD)) + ), (key, alt) -> { + return key; + }), + seq(ref(SelfTokenId.OPERATOR), opt(ref(SelfTokenId.IDENTIFIER)), (op, id) -> op) + ); + + Element slot = alt( + seq( + slotId, alt(ref(SelfTokenId.EQUAL), ref(SelfTokenId.ARROW)), alt(constant, ref(SelfTokenId.IDENTIFIER), statement), + (a, b, c) -> { + boolean mutable = b.id() != SelfTokenId.EQUAL; + return new Slot(a.text(), mutable, c); + } + ), + ref(SelfTokenId.ARGUMENT, (t) -> Slot.argument(t.text())) + ); + + final Element dotAndSlot = seq(ref(SelfTokenId.DOT), slot, (dot, slot1) -> { + return slot1; + }); + Element> extraSlots = rep(dotAndSlot, () -> new ArrayList(), (l, v) -> { + l.add(v); + return l; + }, (t) -> t); + + Element> slots = alt( + ref(SelfTokenId.BAR, t -> Collections.emptyList()), + seq(slot, extraSlots, ref(SelfTokenId.BAR), (t, m, u) -> { + m.add(0, t); + return m; + }) + ); + + Element objectStatement = seq( + ref(SelfTokenId.LPAREN), alt( + seq(ref(SelfTokenId.BAR), slots, opt(exprlist), (bar, slts, expr) -> { + return slts; + }), + seq(exprlist, ref(SelfTokenId.RPAREN), (expr, rparen) -> Collections.emptyList()), + ref(SelfTokenId.RPAREN, (rparen) -> Collections.emptyList()) + ), + (t, u) -> { + return new SelfLexer.BasicNode("()") { + @Override + void print(Consumer registrar) { + Map obj = new HashMap<>(); + for (Slot s : u) { + obj.put(s.id.toString(), s.valueToString()); + } + registrar.accept(obj); + } + }; + } + ); + objectLiteral.define(objectStatement); + statement.define(alt(objectLiteral)); + + constant.define(alt(ref(SelfTokenId.SELF), ref(SelfTokenId.STRING), ref(SelfTokenId.NUMBER), objectLiteral)); + + Element unaryExprHead = alt(constant, ref(SelfTokenId.IDENTIFIER)); + Element unaryExprTail = rep(ref(SelfTokenId.IDENTIFIER), ListItem::empty, ListItem::new, ListItem::self); + unaryLevel.define(seq(unaryExprHead, unaryExprTail, (t, u) -> { + return null; + })); + + Element binaryExpr = alt( + seq(ref(SelfTokenId.OPERATOR), unaryLevel, (t, u) -> { + return null; + }), + seq(unaryLevel, ref(SelfTokenId.OPERATOR), unaryLevel, (u1, t, u2) -> { + return null; + }) + ); + binaryLevel.define(binaryExpr); + + Element keywordSeq = seq(ref(SelfTokenId.KEYWORD_LOWERCASE), binaryLevel, rep( + seq(ref(SelfTokenId.KEYWORD), keywordLevel, (arg0, arg1) -> { + return null; + }), + ListItem::empty, ListItem::new, ListItem::self + ), (a, b, c) -> { + return null; + }); + keywordLevel.define(keywordSeq); + expression.define(alt(keywordLevel, binaryLevel)); + exprlist.define(seq(expression, rep(seq(ref(SelfTokenId.DOT), expression, (arg0, arg1) -> { + return null; + }), ListItem::empty, ListItem::new, ListItem::self), (arg0, arg1) -> { + return null; + })); + PARSER.initialize(program); + } + public static void parse(Source s, Consumer registrar) { TokenSequence seq = TokenHierarchy.create(s.getCharacters(), SelfTokenId.language()).tokenSequence(SelfTokenId.language()); - while (seq.moveNext()) { - final SelfTokenId id = seq.token().id(); - if (id == SelfTokenId.LPAREN) { - parseObject(seq, registrar); + class SeqLexer implements PELexer { + private final Object[] self = new Object[] { this }; + { + nextTokenMove(); + } + + @Override + public Object[] asArgumentsArray() { + return self; + } + + @Override + public Token peek(ConditionProfile seenEof) { + return seq.token(); + } + + @Override + public String position() { + return "at: " + seq.offset() + ": " + seq.subSequence(seq.offset()).toString(); + } + + @Override + public void resetStackPointer(int pointer) { + seq.move(pointer); + seq.moveNext(); + } + + @Override + public Token nextToken(ConditionProfile seenEof) { + Token token = peek(seenEof); + nextTokenMove(); + return token; + } + + private void nextTokenMove() { + while (seq.moveNext()) { + final Token lookahead = seq.token(); + if (lookahead.id() != SelfTokenId.WHITESPACE) { + break; + } + } + } + + @Override + public int getStackPointer() { + return seq.offset(); + } + + @Override + public String tokenNames(TokenId id) { + return "token " + id; + } + + @Override + public String toString() { + return position(); } } + BasicNode bn = (BasicNode) PARSER.parse(new SeqLexer()); + bn.print(registrar); } - static boolean parseObject(TokenSequence seq, Consumer registrar) { - assert seq.token().id() == SelfTokenId.LPAREN; - if (!seq.moveNext()) { - return false; + private static final class Slot { + + private static Slot argument(CharSequence text) { + return new Slot(text, false, null); } - if (seq.token().id() == SelfTokenId.BAR) { - for (;;) { - if (!seq.moveNext()) { - return false; - } - if (seq.token().id() == SelfTokenId.BAR) { - break; - } - } + + private final CharSequence id; + private final boolean mutable; + private final Object value; + + public Slot(CharSequence id, boolean mutable, Object value) { + this.id = id; + this.mutable = mutable; + this.value = value; } - for (;;) { - if (seq.token().id() == SelfTokenId.RPAREN) { - registrar.accept(new Object()); - break; - } - if (!seq.moveNext()) { - return false; + + private Object valueToString() { + if (value instanceof Token) { + return ((Token)value).text().toString(); } + return value.toString(); } - return true; } } enum SelfTokenId implements TokenId { WHITESPACE(null, "whitespace"), - IDENTIFIER(null, null), - KEYWORD(null, null), - ARGUMENT(null, null), + IDENTIFIER(null, "identifier"), + SELF(null, "identifier"), + RESEND(null, "identifier"), + KEYWORD_LOWERCASE(null, "identifier"), + KEYWORD(null, "identifier"), + ARGUMENT(null, "identifier"), OPERATOR(null, null), NUMBER(null, "number"), STRING(null, "string"), @@ -70,6 +254,9 @@ enum SelfTokenId implements TokenId { LPAREN("(", "separator"), RPAREN(")", "separator"), BAR("|", "separator"), + DOT(".", "separator"), + EQUAL("=", "separator"), + ARROW("<-", "separator"), ERROR(null, "error"); @@ -144,9 +331,11 @@ public Token nextToken() { case ')': return token(SelfTokenId.RPAREN); + case '.': + return token(SelfTokenId.DOT); + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - case '.': return finishIntOrFloatLiteral(ch); case '!': case '@': case '#': case '$': case '%': @@ -172,8 +361,12 @@ public Token nextToken() { } } input.backup(1); - if (justOne && ch == '|') { - return token(SelfTokenId.BAR); + if (justOne) { + switch (ch) { + case '|': return token(SelfTokenId.BAR); + case '=': return token(SelfTokenId.EQUAL); + case '.': return token(SelfTokenId.DOT); + } } return token(SelfTokenId.OPERATOR); } @@ -239,11 +432,7 @@ private Token consumeString(int ch) { ch = input.read(); switch (ch) { case '\\': - if (backslash) { - backslash = false; - } else { - backslash = true; - } + backslash = !backslash; break; case '\'': if (!backslash) { @@ -339,9 +528,9 @@ private Token token(SelfTokenId id) { : tokenFactory.createToken(id); } - private Token consumeIdentifier(int ch) { + private Token consumeIdentifier(int first) { for (;;) { - ch = input.read(); + int ch = input.read(); if (Character.isLetterOrDigit(ch)) { continue; } @@ -350,14 +539,93 @@ private Token consumeIdentifier(int ch) { } SelfTokenId id; if (':' == ch) { - id = SelfTokenId.KEYWORD; + id = Character.isLowerCase(first) ? SelfTokenId.KEYWORD_LOWERCASE : SelfTokenId.KEYWORD; } else { input.backup(1); // backup the extra char (or EOF) - id = SelfTokenId.IDENTIFIER; + switch (input.readText().toString()) { + case "self": id = SelfTokenId.SELF; break; + case "resend": id = SelfTokenId.RESEND; break; + default: id = SelfTokenId.IDENTIFIER; + } } return token(id); } } + static class BasicNode { + private final String name; + private final BasicNode[] children; + + BasicNode(String name, BasicNode... children) { + this.name = name; + this.children = children; + } + BasicNode(String name, List children) { + this.name = name; + this.children = children.toArray(new BasicNode[children.size()]); + } + + public void print(int level) { + for (int i = 0; i < level; i++) { + System.out.print(" "); + } + System.out.println(name); + for (BasicNode child : children) { + child.print(level + 1); + } + } + + void print(Consumer registrar) { + for (BasicNode child : children) { + child.print(registrar); + } + } + } + + public static A selectFirst(A a, @SuppressWarnings("unused") B b) { + return a; + } + + public static B selectSecond(@SuppressWarnings("unused") A a, B b) { + return b; + } + + public static BasicNode[] concat(BasicNode first, ListItem rest) { + final int size = ListItem.size(rest); + BasicNode[] result = new BasicNode[size + 1]; + result[0] = first; + for (int i = size; i >= 1; i--) { + result[i + 1] = rest.item; + rest = rest.prev; + } + return result; + } + + public static final class ListItem { + final ListItem prev; + final E item; + + public ListItem(ListItem prev, E item) { + this.prev = prev; + this.item = item; + } + + public static ListItem empty() { + return null; + } + + public static ListItem self(ListItem self) { + return self; + } + + public static int size(ListItem item) { + int cnt = 0; + while (item != null) { + cnt++; + item = item.prev; + } + return cnt; + } + } } diff --git a/src/test/java/org/apidesign/language/self/SelfParserTest.java b/src/test/java/org/apidesign/language/self/SelfParserTest.java index 8eefa76..53439be 100644 --- a/src/test/java/org/apidesign/language/self/SelfParserTest.java +++ b/src/test/java/org/apidesign/language/self/SelfParserTest.java @@ -1,6 +1,7 @@ package org.apidesign.language.self; import com.oracle.truffle.api.source.Source; +import java.util.Map; import java.util.function.Consumer; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -31,7 +32,7 @@ public void testLexingTheInput() { @Test public void identifiers() { - String text = " i _IntAdd cloud9 m a_point \n\t\r NotAnIdent"; + String text = " i _IntAdd cloud9 resend m a_point \n\t\r NotAnIdent self"; TokenSequence seq = TokenHierarchy.create(text, SelfTokenId.language()).tokenSequence(SelfTokenId.language()); assertNextToken(SelfTokenId.WHITESPACE, seq); @@ -41,11 +42,15 @@ public void identifiers() { assertNextToken(SelfTokenId.WHITESPACE, seq); assertNextToken(SelfTokenId.IDENTIFIER, seq).text("cloud9"); assertNextToken(SelfTokenId.WHITESPACE, seq); + assertNextToken(SelfTokenId.RESEND, seq); + assertNextToken(SelfTokenId.WHITESPACE, seq); assertNextToken(SelfTokenId.IDENTIFIER, seq).text("m"); assertNextToken(SelfTokenId.WHITESPACE, seq); assertNextToken(SelfTokenId.IDENTIFIER, seq).text("a_point"); assertNextToken(SelfTokenId.WHITESPACE, seq); assertNextToken(SelfTokenId.ERROR, seq).text("NotAnIdent"); + assertNextToken(SelfTokenId.WHITESPACE, seq); + assertNextToken(SelfTokenId.SELF, seq); assertFalse("At the end of input", seq.moveNext()); } @@ -55,7 +60,7 @@ public void keywords() { TokenSequence seq = TokenHierarchy.create(text, SelfTokenId.language()).tokenSequence(SelfTokenId.language()); assertNextToken(SelfTokenId.WHITESPACE, seq); - assertNextToken(SelfTokenId.KEYWORD, seq).text("at:"); + assertNextToken(SelfTokenId.KEYWORD_LOWERCASE, seq).text("at:"); assertNextToken(SelfTokenId.WHITESPACE, seq); assertNextToken(SelfTokenId.ERROR, seq).text("NoKeyword"); assertNextToken(SelfTokenId.WHITESPACE, seq); @@ -99,12 +104,14 @@ public void operators() { @Test public void numbers() { - String text = "\r123 3.14 1272.34e+15 1e10 1272.34e-15 16r27fe -5"; + String text = "\r123 . 3.14 1272.34e+15 1e10 1272.34e-15 16r27fe -5"; TokenSequence seq = TokenHierarchy.create(text, SelfTokenId.language()).tokenSequence(SelfTokenId.language()); assertNextToken(SelfTokenId.WHITESPACE, seq); assertNextToken(SelfTokenId.NUMBER, seq).text("123"); assertNextToken(SelfTokenId.WHITESPACE, seq); + assertNextToken(SelfTokenId.DOT, seq).text("."); + assertNextToken(SelfTokenId.WHITESPACE, seq); assertNextToken(SelfTokenId.NUMBER, seq).text("3.14"); assertNextToken(SelfTokenId.WHITESPACE, seq); assertNextToken(SelfTokenId.NUMBER, seq).text("1272.34e+15"); @@ -145,6 +152,22 @@ public void comments() { assertFalse("At the end of input", seq.moveNext()); } + @Test + public void parseCodeObject() { + Source s = Source.newBuilder("Self", "( 1 + 2 )", "empty.sf").build(); + class Collect implements Consumer { + Object obj; + @Override + public void accept(Object arg0) { + assertNull("No object yet", obj); + obj = arg0; + } + } + Collect c = new Collect(); + SelfParser.parse(s, c); + + assertNotNull("Object created", c.obj); + } @Test public void parseEmptyObject() { Source s = Source.newBuilder("Self", "()", "empty.sf").build(); @@ -162,6 +185,124 @@ public void accept(Object arg0) { assertNotNull("Object created", c.obj); } + @Test + public void parseEmptyObjectWithSlots() { + Source s = Source.newBuilder("Self", "( | | )", "empty.sf").build(); + class Collect implements Consumer { + Object obj; + @Override + public void accept(Object arg0) { + assertNull("No object yet", obj); + obj = arg0; + } + } + Collect c = new Collect(); + SelfParser.parse(s, c); + + assertNotNull("Object created", c.obj); + } + + @Test + public void parseEmptyObjectWithOneSlot() { + Source s = Source.newBuilder("Self", "( | x = 's' | )", "empty.sf").build(); + class Collect implements Consumer { + Object obj; + @Override + public void accept(Object arg0) { + assertNull("No object yet", obj); + obj = arg0; + } + } + Collect c = new Collect(); + SelfParser.parse(s, c); + + assertNotNull("Object created", c.obj); + assertTrue("Instance of hash map: " + c.obj, c.obj instanceof Map); + Map map = (Map) c.obj; + assertEquals("Value of x is s", "'s'", map.get("x")); + } + + @Test + public void parseIdFn() { + Source s = Source.newBuilder("Self", "( | id: n = ( ^n ) | )", "empty.sf").build(); + class Collect implements Consumer { + Object obj; + @Override + public void accept(Object arg0) { + assertNull("No object yet", obj); + obj = arg0; + } + } + Collect c = new Collect(); + SelfParser.parse(s, c); + + assertNotNull("Object created", c.obj); + assertTrue("Instance of hash map: " + c.obj, c.obj instanceof Map); + Map map = (Map) c.obj; + assertNotNull("Value of id is set", map.get("id:")); + } + + @Test + public void parsePlusFn() { + Source s = Source.newBuilder("Self", "( | plus: n = ( n + 1 ) | ) plus: 3", "plus.sf").build(); + class Collect implements Consumer { + Object obj; + @Override + public void accept(Object arg0) { + assertNull("No object yet", obj); + obj = arg0; + } + } + Collect c = new Collect(); + SelfParser.parse(s, c); + + assertNotNull("Object created", c.obj); + assertTrue("Instance of hash map: " + c.obj, c.obj instanceof Map); + Map map = (Map) c.obj; + assertNotNull("Value of id is set", map.get("plus:")); + } + + @Test + public void parseConstantFn() { + Source s = Source.newBuilder("Self", "( | id: n = 'e' | )", "empty.sf").build(); + class Collect implements Consumer { + Object obj; + @Override + public void accept(Object arg0) { + assertNull("No object yet", obj); + obj = arg0; + } + } + Collect c = new Collect(); + SelfParser.parse(s, c); + + assertNotNull("Object created", c.obj); + assertTrue("Instance of hash map: " + c.obj, c.obj instanceof Map); + Map map = (Map) c.obj; + assertEquals("Value of id is object", "'e'", map.get("id:")); + } + + @Test + public void parseEmptyObjectWithTwoSlots() { + Source s = Source.newBuilder("Self", "( | x = 's' . y = 3 | )", "empty.sf").build(); + class Collect implements Consumer { + Object obj; + @Override + public void accept(Object arg0) { + assertNull("No object yet", obj); + obj = arg0; + } + } + Collect c = new Collect(); + SelfParser.parse(s, c); + + assertNotNull("Object created", c.obj); + assertTrue("Instance of hash map: " + c.obj, c.obj instanceof Map); + Map map = (Map) c.obj; + assertEquals("Value of x is s", "'s'", map.get("x")); + assertEquals("Value of y is s", "3", map.get("y")); + } + private TokenHandle assertNextToken(String text, TokenSequence seq) { assertTrue("There is more tokens", seq.moveNext()); Token token = seq.token();