From 2826da9e5bea55cb3f7b258534ddadcb7dd652b1 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 4 Oct 2015 16:38:04 +0500 Subject: [PATCH] Ranges (pegjs/pegjs#30): Add ability for use labels as range boundaries Added two new opcodes: - IF_LT_DYNAMIC: same as IF_LT, but the argument is a reference to the stack variable instead of constant - IF_GE_DYNAMIC: same as IF_GE, but the argument is a reference to the stack variable instead of constant --- lib/compiler/asts.js | 11 +++- lib/compiler/opcodes.js | 6 +- lib/compiler/passes/generate-bytecode.js | 58 +++++++++++++++---- lib/compiler/passes/generate-js.js | 8 +++ lib/compiler/passes/inference-match-result.js | 31 +++++++++- .../passes/report-infinite-repetition.js | 9 ++- lib/peg.d.ts | 8 ++- src/parser.pegjs | 3 +- 8 files changed, 116 insertions(+), 18 deletions(-) diff --git a/lib/compiler/asts.js b/lib/compiler/asts.js index f7331849..a15beaec 100644 --- a/lib/compiler/asts.js +++ b/lib/compiler/asts.js @@ -44,10 +44,17 @@ const asts = { optional: consumesFalse, zero_or_more: consumesFalse, repeated(node) { - // Handle exact case + // If minimum is `null` it is equals to maximum (parsed from `|exact|` syntax) const min = node.min ? node.min : node.max; - return min.value > 0 ? consumes(node.expression) : false; + // If the low boundary is variable then it can be zero. + // Expression, repeated zero times, does not consume any input + // but always matched - so it does not always consumes on success + if (min.type !== "constant" || min.value === 0) { + return false; + } + + return consumes(node.expression); }, semantic_and: consumesFalse, semantic_not: consumesFalse, diff --git a/lib/compiler/opcodes.js b/lib/compiler/opcodes.js index 2596b3ac..f17c2b78 100644 --- a/lib/compiler/opcodes.js +++ b/lib/compiler/opcodes.js @@ -28,6 +28,8 @@ const opcodes = { IF_NOT_ERROR: 15, // IF_NOT_ERROR t, f IF_LT: 30, // IF_LT min, t, f IF_GE: 31, // IF_GE max, t, f + IF_LT_DYNAMIC: 32, // IF_LT_DYNAMIC min, t, f + IF_GE_DYNAMIC: 33, // IF_GE_DYNAMIC max, t, f WHILE_NOT_ERROR: 16, // WHILE_NOT_ERROR b // Matching @@ -64,7 +66,9 @@ const opcodes = { // // IF_LT: 30 // IF_GE: 31 - // 32-34 reserved for @mingun + // IF_LT_DYNAMIC: 32 + // IF_GE_DYNAMIC: 33 + // 34 reserved for @mingun // PUSH_EMPTY_STRING: 35 // PLUCK: 36 }; diff --git a/lib/compiler/passes/generate-bytecode.js b/lib/compiler/passes/generate-bytecode.js index a04e4d36..795a6aa6 100644 --- a/lib/compiler/passes/generate-bytecode.js +++ b/lib/compiler/passes/generate-bytecode.js @@ -122,6 +122,22 @@ const { ALWAYS_MATCH, SOMETIMES_MATCH, NEVER_MATCH } = require("./inference-matc // interpret(ip + 3 + t, ip + 3 + t + f); // } // +// [32] IF_LT_DYNAMIC min, t, f +// +// if (stack.top().length < stack[min]) { +// interpret(ip + 3, ip + 3 + t); +// } else { +// interpret(ip + 3 + t, ip + 3 + t + f); +// } +// +// [33] IF_GE_DYNAMIC max, t, f +// +// if (stack.top().length >= stack[max]) { +// interpret(ip + 3, ip + 3 + t); +// } else { +// interpret(ip + 3 + t, ip + 3 + t + f); +// } +// // [16] WHILE_NOT_ERROR b // // while(stack.top() !== FAILED) { @@ -376,16 +392,22 @@ function generateBytecode(ast) { * @param {number[]} expressionCode Bytecode for parsing repetitions * @param {import("../../peg").ast.RepeatedBoundary} max Maximum boundary of repetitions. * If `null`, the maximum boundary is unlimited + * @param {object} context + * @param {number} sp Pointer to the top of the variable stack * * @returns {number[]} Bytecode that performs check of the maximum boundary */ - function buildCheckMax(expressionCode, max) { + function buildCheckMax(expressionCode, max, context, sp) { if (max.value !== null) { + const checkCode = max.type === "constant" + ? [op.IF_GE, max.value] + : [op.IF_GE_DYNAMIC, sp - context.env[max.value]]; + // Push `peg$FAILED` - this break loop on next iteration, so |result| // will contains not more then |max| elements. return buildCondition( SOMETIMES_MATCH, - [op.IF_GE, max.value], // if (r.length >= max) stack:[ [elem...] ] + checkCode, // if (r.length >= max) stack:[ [elem...] ] [op.PUSH_FAILED], // elem = peg$FAILED; stack:[ [elem...], peg$FAILED ] expressionCode // else ); // elem = expr(); stack:[ [elem...], elem ] @@ -402,12 +424,16 @@ function generateBytecode(ast) { * * @returns {number[]} Bytecode that performs check of the minimum boundary */ - function buildCheckMin(expressionCode, min) { + function buildCheckMin(expressionCode, min, context) { + const checkCode = min.type === "constant" + ? [op.IF_LT, min.value] + : [op.IF_LT_DYNAMIC, context.sp + 2 - context.env[min.value]]; + return buildSequence( expressionCode, // result = [elem...]; stack:[ pos, [elem...] ] buildCondition( SOMETIMES_MATCH, - [op.IF_LT, min.value], // if (result.length < min) { + checkCode, // if (result.length < min) { [op.POP, op.POP_CURR_POS, // currPos = savedPos; stack:[ ] // eslint-disable-next-line indent op.PUSH_FAILED], // result = peg$FAILED; stack:[ peg$FAILED ] @@ -697,25 +723,37 @@ function generateBytecode(ast) { repeated(node, context) { // Handle case when minimum was literally equals to maximum const min = node.min ? node.min : node.max; - const hasMin = min.value > 0; + const hasMin = min.type !== "constant" || min.value > 0; + const hasBoundedMax = node.max.type !== "constant" && node.max.value !== null; + const sp = context.sp + (hasMin ? 2 : 1); + const expressionCode = generate(node.expression, { - sp: context.sp + (hasMin ? 2 : 1), + sp, env: cloneEnv(context.env), action: null, }); // Check the high boundary, if it is defined. - const checkMaxCode = buildCheckMax(expressionCode, node.max); + const checkMaxCode = buildCheckMax( + expressionCode, node.max, context, sp + ); + // For dynamic high boundary we need check the first iteration, because the result can be + // empty. Constant boundaries does not require that check, because they are always >=1 + const firstElemCode = hasBoundedMax + ? checkMaxCode + : expressionCode; const mainLoopCode = buildSequence( // If the low boundary present, then backtracking is possible, so save the current pos hasMin ? [op.PUSH_CURR_POS] : [], // var savedPos = curPos; stack:[ pos ] [op.PUSH_EMPTY_ARRAY], // var result = []; stack:[ pos, [] ] - expressionCode, // var elem = expr(); stack:[ pos, [], elem ] + firstElemCode, // var elem = expr(); stack:[ pos, [], elem ] buildAppendLoop(checkMaxCode), // while(...)r.push(elem); stack:[ pos, [...], elem|peg$FAILED ] - [op.POP] // stack:[ pos, [elem...] ] (pop elem===`peg$FAILED`) + [op.POP] // stack:[ pos, [...] ] (pop elem===`peg$FAILED`) ); // Check the low boundary, if it is defined and not |0|. - return hasMin ? buildCheckMin(mainLoopCode, min) : mainLoopCode; + return hasMin + ? buildCheckMin(mainLoopCode, min, context) + : mainLoopCode; }, group(node, context) { diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js index 8a431608..5e6af926 100644 --- a/lib/compiler/passes/generate-js.js +++ b/lib/compiler/passes/generate-js.js @@ -437,6 +437,14 @@ function generateJS(ast, options) { compileCondition(stack.top() + ".length >= " + bc[ip + 1], 1); break; + case op.IF_LT_DYNAMIC: // IF_LT_DYNAMIC min, t, f + compileCondition(stack.top() + ".length < " + stack.index(bc[ip + 1]) + "|0", 1); + break; + + case op.IF_GE_DYNAMIC: // IF_GE_DYNAMIC max, t, f + compileCondition(stack.top() + ".length >= " + stack.index(bc[ip + 1]) + "|0", 1); + break; + case op.WHILE_NOT_ERROR: // WHILE_NOT_ERROR b compileLoop(stack.top() + " !== peg$FAILED"); break; diff --git a/lib/compiler/passes/inference-match-result.js b/lib/compiler/passes/inference-match-result.js index d793bb3d..577e3aef 100644 --- a/lib/compiler/passes/inference-match-result.js +++ b/lib/compiler/passes/inference-match-result.js @@ -98,10 +98,37 @@ function inferenceMatchResult(ast) { one_or_more: inferenceExpression, repeated(node) { const match = inference(node.expression); - // Handle exact case + // If minimum is `null` it is equals to maximum (parsed from `|exact|` syntax) const min = node.min ? node.min : node.max; - return (node.match = min.value > 0 ? match : ALWAYS_MATCH); + // If any boundary are variable - it can be negative, and it that case + // node does not match, but it may be match with some other values + if (min.type !== "constant" || node.max.type !== "constant") { + return (node.match = SOMETIMES_MATCH); + } + // Now both boundaries is constants + // If the upper boundary is zero or minimum exceeds maximum, + // matching is impossible + if (node.max.value === 0 + || node.max.value !== null && min.value > node.max.value + ) { + return (node.match = NEVER_MATCH); + } + + if (match === NEVER_MATCH) { + // If an expression always fails, a range will also always fail + // (with the one exception - never matched expression repeated + // zero times always match and returns an empty array). + return (node.match = min.value === 0 ? ALWAYS_MATCH : NEVER_MATCH); + } + if (match === ALWAYS_MATCH) { + return (node.match = ALWAYS_MATCH); + } + + // Here an expression sometimes match. If it should be repeated at least once + // the whole range sometimes match, otherwise it will always succeeds (at least + // an empty array guaranteed) + return (node.match = min.value === 0 ? ALWAYS_MATCH : SOMETIMES_MATCH); }, group: inferenceExpression, semantic_and: sometimesMatch, diff --git a/lib/compiler/passes/report-infinite-repetition.js b/lib/compiler/passes/report-infinite-repetition.js index 6cd2ce6f..0620c578 100644 --- a/lib/compiler/passes/report-infinite-repetition.js +++ b/lib/compiler/passes/report-infinite-repetition.js @@ -35,8 +35,15 @@ function reportInfiniteRepetition(ast, options, session) { node.location ); } else { + // If minimum is `null` it is equals to maximum (parsed from `|exact|` syntax) + const min = node.min ? node.min : node.max; + + // Because the high boundary is defined, infinity repetition is not possible + // but the grammar will waste of CPU session.warning( - `An expression always match ${node.max.value} times, because it does not consume any input`, + min.type === "constant" && node.max.type === "constant" + ? `An expression may not consume any input and may always match ${node.max.value} times` + : "An expression may not consume any input and may always match with a maximum repetition count", node.location ); } diff --git a/lib/peg.d.ts b/lib/peg.d.ts index 43f1ff05..dcbadf88 100644 --- a/lib/peg.d.ts +++ b/lib/peg.d.ts @@ -205,8 +205,14 @@ declare namespace ast { value: number; } + interface VariableBoundary extends Boundary<"variable"> { + /** Repetition count - name of the label of the one of preceding expressions. */ + value: string; + } + type RepeatedBoundary - = ConstantBoundary; + = ConstantBoundary + | VariableBoundary; /** Expression repeated from `min` to `max` times. */ interface Repeated extends Expr<"repeated"> { diff --git a/src/parser.pegjs b/src/parser.pegjs index cfc5ce88..895e94c9 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -209,7 +209,7 @@ RepeatedExpression = expression:PrimaryExpression __ "|" __ boundaries:Boundaries __ "|" { let min = boundaries[0]; let max = boundaries[1]; - if (max.value === 0) { + if (max.type === "constant" && max.value === 0) { error("The maximum count of repetitions of the rule must be > 0", max.location); } @@ -233,6 +233,7 @@ Boundaries Boundary = value:Integer { return { type: "constant", value, location: location() }; } + / value:IdentifierName { return { type: "variable", value: value[0], location: location() }; } PrimaryExpression = LiteralMatcher