From 4f9a16a9d6280b06ca990255757fbf9aa49e94b8 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 4 Oct 2015 16:08:24 +0500 Subject: [PATCH] Ranges (pegjs/pegjs#30): Implement ranges support. Range syntax: ``` expression| exact | expression| .. | expression|min.. | expression| ..max| expression|min..max| ``` Introduce two new opcodes: * IF_LT , , * IF_GE , , Introduce a new AST node -- `repeated`, that contains expression and the minimum and maximum number of it repetition. If `node.min.value` is `null` or isn't positive -- check of the minimum length isn't made. If `node.max.value` is `null`, check of the maximum length isn't made. If `node.min` is `null` then it is equals to the `node.max` (exact repetitions case) --- lib/compiler/asts.js | 6 ++ lib/compiler/opcodes.js | 6 +- lib/compiler/passes/generate-bytecode.js | 85 +++++++++++++++++++ lib/compiler/passes/generate-js.js | 8 ++ lib/compiler/passes/inference-match-result.js | 7 ++ .../passes/report-duplicate-labels.js | 1 + .../passes/report-infinite-repetition.js | 19 ++++- lib/compiler/visitor.js | 1 + lib/peg.d.ts | 46 +++++++++- src/parser.pegjs | 33 +++++++ test/types/peg.test-d.ts | 17 +++- 11 files changed, 221 insertions(+), 8 deletions(-) diff --git a/lib/compiler/asts.js b/lib/compiler/asts.js index 73b0ba6f..f7331849 100644 --- a/lib/compiler/asts.js +++ b/lib/compiler/asts.js @@ -43,6 +43,12 @@ const asts = { simple_not: consumesFalse, optional: consumesFalse, zero_or_more: consumesFalse, + repeated(node) { + // Handle exact case + const min = node.min ? node.min : node.max; + + return min.value > 0 ? consumes(node.expression) : false; + }, semantic_and: consumesFalse, semantic_not: consumesFalse, diff --git a/lib/compiler/opcodes.js b/lib/compiler/opcodes.js index 58d8e522..a15daa75 100644 --- a/lib/compiler/opcodes.js +++ b/lib/compiler/opcodes.js @@ -26,6 +26,8 @@ const opcodes = { IF: 13, // IF t, f IF_ERROR: 14, // IF_ERROR t, f IF_NOT_ERROR: 15, // IF_NOT_ERROR t, f + IF_LT: 30, // IF_LT min, t, f + IF_GE: 31, // IF_GE max, t, f WHILE_NOT_ERROR: 16, // WHILE_NOT_ERROR b // Matching @@ -60,7 +62,9 @@ const opcodes = { // sections above are repeated here in order to ensure we don't // reuse them. // - // 30-34 reserved for @mingun + // IF_LT: 30 + // IF_GE: 31 + // 32-34 reserved for @mingun // PUSH_EMPTY_STRING: 35 // PLUCK: 36 diff --git a/lib/compiler/passes/generate-bytecode.js b/lib/compiler/passes/generate-bytecode.js index c8709ac4..841436e6 100644 --- a/lib/compiler/passes/generate-bytecode.js +++ b/lib/compiler/passes/generate-bytecode.js @@ -106,6 +106,22 @@ const { ALWAYS_MATCH, SOMETIMES_MATCH, NEVER_MATCH } = require("./inference-matc // interpret(ip + 3 + t, ip + 3 + t + f); // } // +// [30] IF_LT min, t, f +// +// if (stack.top().length < min) { +// interpret(ip + 3, ip + 3 + t); +// } else { +// interpret(ip + 3 + t, ip + 3 + t + f); +// } +// +// [31] IF_GE max, t, f +// +// if (stack.top().length >= max) { +// interpret(ip + 3, ip + 3 + t); +// } else { +// interpret(ip + 3 + t, ip + 3 + t + f); +// } +// // [16] WHILE_NOT_ERROR b // // while(stack.top() !== FAILED) { @@ -384,6 +400,51 @@ function generateBytecode(ast, options) { ); } + /* eslint capitalized-comments: "off" */ + /** + * @param {number[]} expressionCode Bytecode for parsing repetitions + * @param {import("../../peg").ast.RepeatedBoundary} max Maximum boundary of repetitions. + * If `null`, the maximum boundary is unlimited + * + * @returns {number[]} Bytecode that performs check of the maximum boundary + */ + function buildCheckMax(expressionCode, max) { + if (max.value !== null) { + // Push `peg$FAILED` - this break loop on next iteration, so |result| + // will contains not more then |max| elements. + return buildCondition( + SOMETIMES_MATCH, + [op.IF_GE, max.value], // if (r.length >= max) stack:[ [elem...] ] + [op.PUSH_FAILED], // elem = peg$FAILED; stack:[ [elem...], peg$FAILED ] + expressionCode // else + ); // elem = expr(); stack:[ [elem...], elem ] + } + + return expressionCode; + } + + /* eslint capitalized-comments: "off" */ + /** + * @param {number[]} expressionCode Bytecode for parsing repeated elements + * @param {import("../../peg").ast.RepeatedBoundary} min Minimum boundary of repetitions. + * If `null`, the minimum boundary is zero + * + * @returns {number[]} Bytecode that performs check of the minimum boundary + */ + function buildCheckMin(expressionCode, min) { + return buildSequence( + expressionCode, // result = [elem...]; stack:[ pos, [elem...] ] + buildCondition( + SOMETIMES_MATCH, + [op.IF_LT, min.value], // if (result.length < min) { + [op.POP, op.POP_CURR_POS, // currPos = savedPos; stack:[ ] + // eslint-disable-next-line indent + op.PUSH_FAILED], // result = peg$FAILED; stack:[ peg$FAILED ] + [op.NIP] // } stack:[ [elem...] ] + ) + ); + } + function wrapGenerators(generators) { if (options && options.output === "source-and-map") { Object.entries(generators).forEach(([name, generator]) => { @@ -703,6 +764,30 @@ function generateBytecode(ast, options) { ); }, + repeated(node, context) { + // Handle case when minimum was literally equals to maximum + const min = node.min ? node.min : node.max; + const hasMin = min.value > 0; + const expressionCode = generate(node.expression, { + sp: context.sp + (hasMin ? 2 : 1), + env: cloneEnv(context.env), + action: null, + }); + // Check the high boundary, if it is defined. + const checkMaxCode = buildCheckMax(expressionCode, node.max); + const mainLoopCode = buildSequence( + // If the low boundary present, then backtracking is possible, so save the current pos + hasMin ? [op.PUSH_CURR_POS] : [], // var savedPos = curPos; stack:[ pos ] + [op.PUSH_EMPTY_ARRAY], // var result = []; stack:[ pos, [] ] + expressionCode, // var elem = expr(); stack:[ pos, [], elem ] + buildAppendLoop(checkMaxCode), // while(...)r.push(elem); stack:[ pos, [...], elem|peg$FAILED ] + [op.POP] // stack:[ pos, [elem...] ] (pop elem===`peg$FAILED`) + ); + + // Check the low boundary, if it is defined and not |0|. + return hasMin ? buildCheckMin(mainLoopCode, min) : mainLoopCode; + }, + group(node, context) { return generate(node.expression, { sp: context.sp, diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js index d5e4abec..a733f612 100644 --- a/lib/compiler/passes/generate-js.js +++ b/lib/compiler/passes/generate-js.js @@ -483,6 +483,14 @@ function generateJS(ast, options) { compileCondition(stack.top() + " !== peg$FAILED", 0); break; + case op.IF_LT: // IF_LT min, t, f + compileCondition(stack.top() + ".length < " + bc[ip + 1], 1); + break; + + case op.IF_GE: // IF_GE max, t, f + compileCondition(stack.top() + ".length >= " + bc[ip + 1], 1); + break; + case op.WHILE_NOT_ERROR: // WHILE_NOT_ERROR b compileLoop(stack.top() + " !== peg$FAILED"); break; diff --git a/lib/compiler/passes/inference-match-result.js b/lib/compiler/passes/inference-match-result.js index 819d649a..d793bb3d 100644 --- a/lib/compiler/passes/inference-match-result.js +++ b/lib/compiler/passes/inference-match-result.js @@ -96,6 +96,13 @@ function inferenceMatchResult(ast) { optional: alwaysMatch, zero_or_more: alwaysMatch, one_or_more: inferenceExpression, + repeated(node) { + const match = inference(node.expression); + // Handle exact case + const min = node.min ? node.min : node.max; + + return (node.match = min.value > 0 ? match : ALWAYS_MATCH); + }, group: inferenceExpression, semantic_and: sometimesMatch, semantic_not: sometimesMatch, diff --git a/lib/compiler/passes/report-duplicate-labels.js b/lib/compiler/passes/report-duplicate-labels.js index d79bb866..a836c8dc 100644 --- a/lib/compiler/passes/report-duplicate-labels.js +++ b/lib/compiler/passes/report-duplicate-labels.js @@ -55,6 +55,7 @@ function reportDuplicateLabels(ast, options, session) { optional: checkExpressionWithClonedEnv, zero_or_more: checkExpressionWithClonedEnv, one_or_more: checkExpressionWithClonedEnv, + repeated: checkExpressionWithClonedEnv, group: checkExpressionWithClonedEnv, }); diff --git a/lib/compiler/passes/report-infinite-repetition.js b/lib/compiler/passes/report-infinite-repetition.js index 3606c301..6cd2ce6f 100644 --- a/lib/compiler/passes/report-infinite-repetition.js +++ b/lib/compiler/passes/report-infinite-repetition.js @@ -3,7 +3,7 @@ const asts = require("../asts"); const visitor = require("../visitor"); -// Reports expressions that don't consume any input inside |*| or |+| in the +// Reports expressions that don't consume any input inside |*|, |+| or repeated in the // grammar, which prevents infinite loops in the generated parser. function reportInfiniteRepetition(ast, options, session) { const check = visitor.build({ @@ -24,6 +24,23 @@ function reportInfiniteRepetition(ast, options, session) { ); } }, + + repeated(node) { + if (asts.alwaysConsumesOnSuccess(ast, node.expression)) { + return; + } + if (node.max.value === null) { + session.error( + "Possible infinite loop when parsing (unbounded range repetition used with an expression that may not consume any input)", + node.location + ); + } else { + session.warning( + `An expression always match ${node.max.value} times, because it does not consume any input`, + node.location + ); + } + }, }); check(ast); diff --git a/lib/compiler/visitor.js b/lib/compiler/visitor.js index ed970a08..5a78ce0e 100644 --- a/lib/compiler/visitor.js +++ b/lib/compiler/visitor.js @@ -53,6 +53,7 @@ const visitor = { optional: visitExpression, zero_or_more: visitExpression, one_or_more: visitExpression, + repeated: visitExpression, group: visitExpression, semantic_and: visitNop, semantic_not: visitNop, diff --git a/lib/peg.d.ts b/lib/peg.d.ts index d84ca7d7..9d4fae7f 100644 --- a/lib/peg.d.ts +++ b/lib/peg.d.ts @@ -139,6 +139,7 @@ declare namespace ast { | Labeled | Prefixed | Primary + | Repeated | Sequence | Suffixed; @@ -148,6 +149,7 @@ declare namespace ast { | Labeled | Prefixed | Primary + | Repeated | Sequence | Suffixed; @@ -161,7 +163,12 @@ declare namespace ast { interface Action extends CodeBlockExpr<"action"> { expression: ( - Labeled | Prefixed | Primary | Sequence | Suffixed + Labeled + | Prefixed + | Primary + | Repeated + | Sequence + | Suffixed ); } @@ -170,6 +177,7 @@ declare namespace ast { = Labeled | Prefixed | Primary + | Repeated | Suffixed; interface Sequence extends Expr<"sequence"> { @@ -192,12 +200,12 @@ declare namespace ast { */ labelLocation: LocationRange; /** Expression which result will be available in the user code under name `label`. */ - expression: Prefixed | Primary | Suffixed; + expression: Prefixed | Primary | Repeated | Suffixed; } /** Expression with a preceding operator. */ interface Prefixed extends Expr<"simple_and" | "simple_not" | "text"> { - expression: Primary | Suffixed; + expression: Primary | Repeated | Suffixed; } /** Expression with a following operator. */ @@ -205,6 +213,31 @@ declare namespace ast { expression: Primary; } + interface Boundary { + type: T; + location: LocationRange; + } + + interface ConstantBoundary extends Boundary<"constant"> { + /** Repetition count. Always a positive integer. */ + value: number; + } + + type RepeatedBoundary + = ConstantBoundary; + + /** Expression repeated from `min` to `max` times. */ + interface Repeated extends Expr<"repeated"> { + /** + * Minimum count of repetitions. If `null` then exact repetition + * is used and minimum the same as maximum. + */ + min: RepeatedBoundary | null; + /** Maximum count of repetitions. */ + max: RepeatedBoundary; + expression: Primary; + } + type Primary = Any | CharacterClass @@ -673,6 +706,13 @@ export namespace compiler { * @param args Any arguments passed to the `Visitor` */ one_or_more?(node: ast.Suffixed, ...args: any[]): any; + /** + * Default behavior: run visitor on `expression` and return it result + * + * @param node Node, representing repetition of the `expression` specified number of times + * @param args Any arguments passed to the `Visitor` + */ + repeated?(node: ast.Repeated, ...args: any[]): any; /** * Default behavior: run visitor on `expression` and return it result * diff --git a/src/parser.pegjs b/src/parser.pegjs index 30d0635f..56741c24 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -197,6 +197,7 @@ SuffixedExpression location: location() }; } + / RepeatedExpression / PrimaryExpression SuffixedOperator @@ -204,6 +205,35 @@ SuffixedOperator / "*" / "+" +RepeatedExpression + = expression:PrimaryExpression __ "|" __ boundaries:Boundaries __ "|" { + let min = boundaries[0]; + let max = boundaries[1]; + if (max.value === 0) { + error("The maximum count of repetitions of the rule must be > 0", max.location); + } + + return { + type: "repeated", + min, + max, + expression, + location: location(), + }; + } + +Boundaries + = min:Boundary? __ ".." __ max:Boundary? { + return [ + min !== null ? min : { type: "constant", value: 0 }, + max !== null ? max : { type: "constant", value: null }, + ]; + } + / exact:Boundary { return [null, exact]; } + +Boundary + = value:Integer { return { type: "constant", value, location: location() }; } + PrimaryExpression = LiteralMatcher / CharacterClassMatcher @@ -430,6 +460,9 @@ BareCodeBlock Code = $((![{}] SourceCharacter)+ / "{" Code "}")* +Integer + = digits:$DecimalDigit+ { return parseInt(digits, 10); } + // Unicode Character Categories // // Extracted from the following Unicode Character Database file: diff --git a/test/types/peg.test-d.ts b/test/types/peg.test-d.ts index 6426b30d..6e9a56d7 100644 --- a/test/types/peg.test-d.ts +++ b/test/types/peg.test-d.ts @@ -257,6 +257,7 @@ describe("peg.d.ts", () => { peggy.ast.Labeled | peggy.ast.Prefixed | peggy.ast.Primary | + peggy.ast.Repeated | peggy.ast.Sequence | peggy.ast.Suffixed>(node.expression); visit(node.expression); @@ -280,6 +281,7 @@ describe("peg.d.ts", () => { expectType< peggy.ast.Prefixed | peggy.ast.Primary | + peggy.ast.Repeated | peggy.ast.Suffixed>(node.expression); visit(node.expression); }, @@ -289,7 +291,10 @@ describe("peg.d.ts", () => { expectType<"simple_and" | "simple_not" | "text">(node.type); expect(node.type).toBe("text"); expectType(node.location); - expectType(node.expression); + expectType< + peggy.ast.Primary | + peggy.ast.Repeated | + peggy.ast.Suffixed>(node.expression); visit(node.expression); }, simple_and(node) { @@ -298,7 +303,10 @@ describe("peg.d.ts", () => { expectType<"simple_and" | "simple_not" | "text">(node.type); expect(node.type).toBe("simple_and"); expectType(node.location); - expectType(node.expression); + expectType< + peggy.ast.Primary | + peggy.ast.Repeated | + peggy.ast.Suffixed>(node.expression); visit(node.expression); }, simple_not(node) { @@ -306,7 +314,10 @@ describe("peg.d.ts", () => { expectType(node); expectType<"simple_and" | "simple_not" | "text">(node.type); expect(node.type).toBe("simple_not"); - expectType(node.expression); + expectType< + peggy.ast.Primary | + peggy.ast.Repeated | + peggy.ast.Suffixed>(node.expression); visit(node.expression); }, optional(node) {