From ad0a7ce9af7a7a9254ad315c7c8c188b8efc2b54 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 24 Oct 2021 02:42:05 +0500 Subject: [PATCH] Ranges (pegjs/pegjs#30): Add ability to use code blocks as range boundaries --- lib/compiler/passes/generate-bytecode.js | 90 ++++++++++++++++++++---- lib/compiler/passes/generate-js.js | 13 ++-- lib/peg.d.ts | 10 ++- src/parser.pegjs | 8 +++ 4 files changed, 98 insertions(+), 23 deletions(-) diff --git a/lib/compiler/passes/generate-bytecode.js b/lib/compiler/passes/generate-bytecode.js index 795a6aa6..f43401da 100644 --- a/lib/compiler/passes/generate-bytecode.js +++ b/lib/compiler/passes/generate-bytecode.js @@ -387,21 +387,59 @@ function generateBytecode(ast) { ); } + /** + * + * @param {import("../../peg").ast.RepeatedBoundary} boundary + * @param {{ [label: string]: number}} env Mapping of label names to stack positions + * @param {number} sp Number of the first free slot in the stack + * + * @returns {{ pre: number[], post: number[], sp: number}} + * Bytecode that should be added before and after parsing and new + * first free slot in the stack + */ + function buildRangeCall(boundary, env, sp, offset) { + switch (boundary.type) { + case "constant": + return { pre: [], post: [], sp }; + case "variable": + boundary.sp = offset + sp - env[boundary.value]; + return { pre: [], post: [], sp }; + case "function": { + boundary.sp = offset; + + const functionIndex = addFunctionConst( + true, + Object.keys(env), + { code: boundary.value, codeLocation: boundary.codeLocation } + ); + + return { + pre: buildCall(functionIndex, 0, env, sp), + post: [op.NIP], + // +1 for the function result + sp: sp + 1, + }; + } + + // istanbul ignore next Because we never generate invalid boundary type we cannot reach this branch + default: + throw new Error(`Unknown boundary type "${boundary.type}" for the "repeated" node`); + } + } + /* eslint capitalized-comments: "off" */ /** * @param {number[]} expressionCode Bytecode for parsing repetitions * @param {import("../../peg").ast.RepeatedBoundary} max Maximum boundary of repetitions. * If `null`, the maximum boundary is unlimited - * @param {object} context - * @param {number} sp Pointer to the top of the variable stack * * @returns {number[]} Bytecode that performs check of the maximum boundary */ - function buildCheckMax(expressionCode, max, context, sp) { + function buildCheckMax(expressionCode, max) { if (max.value !== null) { const checkCode = max.type === "constant" ? [op.IF_GE, max.value] - : [op.IF_GE_DYNAMIC, sp - context.env[max.value]]; + : [op.IF_GE_DYNAMIC, max.sp]; // Push `peg$FAILED` - this break loop on next iteration, so |result| // will contains not more then |max| elements. @@ -424,10 +462,10 @@ function generateBytecode(ast) { * * @returns {number[]} Bytecode that performs check of the minimum boundary */ - function buildCheckMin(expressionCode, min, context) { + function buildCheckMin(expressionCode, min) { const checkCode = min.type === "constant" ? [op.IF_LT, min.value] - : [op.IF_LT_DYNAMIC, context.sp + 2 - context.env[min.value]]; + : [op.IF_LT_DYNAMIC, min.sp]; return buildSequence( expressionCode, // result = [elem...]; stack:[ pos, [elem...] ] @@ -725,17 +763,33 @@ function generateBytecode(ast) { const min = node.min ? node.min : node.max; const hasMin = min.type !== "constant" || min.value > 0; const hasBoundedMax = node.max.type !== "constant" && node.max.value !== null; - const sp = context.sp + (hasMin ? 2 : 1); + + // +1 for the result slot with an array + // +1 if we have non-constant (i.e. potentially non-zero) or non-zero minimum + // for the position before match for backtracking + const offset = hasMin ? 2 : 1; + + // Do not generate function for "minimum" if grammar used `exact` syntax + const minCode = node.min + ? buildRangeCall( + node.min, + context.env, + context.sp, + // +1 for the result slot with an array + // +1 for the saved position + // +1 if we have a "function" maximum it occupies an additional slot in the stack + 2 + (node.max.type === "function" ? 1 : 0) + ) + : { pre: [], post: [], sp: context.sp }; + const maxCode = buildRangeCall(node.max, context.env, minCode.sp, offset); const expressionCode = generate(node.expression, { - sp, + sp: maxCode.sp + offset, env: cloneEnv(context.env), action: null, }); // Check the high boundary, if it is defined. - const checkMaxCode = buildCheckMax( - expressionCode, node.max, context, sp - ); + const checkMaxCode = buildCheckMax(expressionCode, node.max); // For dynamic high boundary we need check the first iteration, because the result can be // empty. Constant boundaries does not require that check, because they are always >=1 const firstElemCode = hasBoundedMax @@ -750,10 +804,16 @@ function generateBytecode(ast) { [op.POP] // stack:[ pos, [...] ] (pop elem===`peg$FAILED`) ); - // Check the low boundary, if it is defined and not |0|. - return hasMin - ? buildCheckMin(mainLoopCode, min, context) - : mainLoopCode; + return buildSequence( + minCode.pre, + maxCode.pre, + // Check the low boundary, if it is defined and not |0|. + hasMin + ? buildCheckMin(mainLoopCode, min) + : mainLoopCode, + maxCode.post, + minCode.post + ); }, group(node, context) { diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js index 92a74fce..82b6ecbd 100644 --- a/lib/compiler/passes/generate-js.js +++ b/lib/compiler/passes/generate-js.js @@ -312,18 +312,14 @@ function generateJS(ast, options) { parts.push("}"); } - function compileCall() { - const baseLength = 4; + function compileCall(baseLength) { const paramsLength = bc[ip + baseLength - 1]; - const value = f(bc[ip + 1]) + "(" + return f(bc[ip + 1]) + "(" + bc.slice(ip + baseLength, ip + baseLength + paramsLength).map( p => stack.index(p) ).join(", ") + ")"; - stack.pop(bc[ip + 2]); - parts.push(stack.push(value)); - ip += baseLength + paramsLength; } while (ip < end) { @@ -524,7 +520,10 @@ function generateJS(ast, options) { break; case op.CALL: // CALL f, n, pc, p1, p2, ..., pN - compileCall(); + value = compileCall(4); + stack.pop(bc[ip + 2]); + parts.push(stack.push(value)); + ip += 4 + bc[ip + 3]; break; case op.RULE: // RULE r diff --git a/lib/peg.d.ts b/lib/peg.d.ts index 70e4d0eb..aa71c3bc 100644 --- a/lib/peg.d.ts +++ b/lib/peg.d.ts @@ -210,9 +210,17 @@ declare namespace ast { value: string; } + interface FunctionBoundary extends Boundary<"function"> { + /** The code from the grammar. */ + value: string; + /** Span that covers all code between `{` and `}`. */ + codeLocation: LocationRange; + } + type RepeatedBoundary = ConstantBoundary - | VariableBoundary; + | VariableBoundary + | FunctionBoundary; /** Expression repeated from `min` to `max` times. */ interface Repeated extends Expr<"repeated"> { diff --git a/src/parser.pegjs b/src/parser.pegjs index 895e94c9..25683376 100644 --- a/src/parser.pegjs +++ b/src/parser.pegjs @@ -234,6 +234,14 @@ Boundaries Boundary = value:Integer { return { type: "constant", value, location: location() }; } / value:IdentifierName { return { type: "variable", value: value[0], location: location() }; } + / value:CodeBlock { + return { + type: "function", + value: value[0], + codeLocation: value[1], + location: location(), + }; + } PrimaryExpression = LiteralMatcher