From d37f1003d46856096792e232f6dbfe5a1dcf6c78 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 24 Oct 2021 02:42:38 +0500 Subject: [PATCH] Ranges (pegjs/pegjs#30): Add testcases for ranges with function boundaries and regenerate parser --- lib/parser.js | 103 +++++++----- .../generated-parser-behavior.spec.js | 100 ++++++++++++ .../compiler/passes/generate-bytecode.spec.js | 152 ++++++++++++++++++ test/unit/parser.spec.js | 43 ++++- 4 files changed, 352 insertions(+), 46 deletions(-) diff --git a/lib/parser.js b/lib/parser.js index 5c530fa9..564f71a3 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -479,7 +479,15 @@ function peg$parse(input, options) { var peg$f15 = function(exact) { return [null, exact]; }; var peg$f16 = function(value) { return { type: "constant", value, location: location() }; }; var peg$f17 = function(value) { return { type: "variable", value: value[0], location: location() }; }; - var peg$f18 = function(expression) { + var peg$f18 = function(value) { + return { + type: "function", + value: value[0], + codeLocation: value[1], + location: location(), + }; + }; + var peg$f19 = function(expression) { // The purpose of the "group" AST node is just to isolate label scope. We // don't need to put it around nodes that can't contain any labels or // nodes that already isolate label scope themselves. This leaves us with @@ -488,10 +496,10 @@ function peg$parse(input, options) { ? { type: "group", expression, location: location() } : expression; }; - var peg$f19 = function(name) { + var peg$f20 = function(name) { return { type: "rule_ref", name: name[0], location: location() }; }; - var peg$f20 = function(operator, code) { + var peg$f21 = function(operator, code) { return { type: OPS_TO_SEMANTIC_PREDICATE_TYPES[operator], code: code[0], @@ -499,10 +507,10 @@ function peg$parse(input, options) { location: location() }; }; - var peg$f21 = function(head, tail) { + var peg$f22 = function(head, tail) { return [head + tail.join(""), location()]; }; - var peg$f22 = function(value, ignoreCase) { + var peg$f23 = function(value, ignoreCase) { return { type: "literal", value, @@ -510,9 +518,9 @@ function peg$parse(input, options) { location: location() }; }; - var peg$f23 = function(chars) { return chars.join(""); }; var peg$f24 = function(chars) { return chars.join(""); }; - var peg$f25 = function(inverted, parts, ignoreCase) { + var peg$f25 = function(chars) { return chars.join(""); }; + var peg$f26 = function(inverted, parts, ignoreCase) { return { type: "class", parts: parts.filter(part => part !== ""), @@ -521,7 +529,7 @@ function peg$parse(input, options) { location: location() }; }; - var peg$f26 = function(begin, end) { + var peg$f27 = function(begin, end) { if (begin.charCodeAt(0) > end.charCodeAt(0)) { error( "Invalid character range: " + text() + "." @@ -530,23 +538,23 @@ function peg$parse(input, options) { return [begin, end]; }; - var peg$f27 = function() { return ""; }; - var peg$f28 = function() { return "\0"; }; - var peg$f29 = function() { return "\b"; }; - var peg$f30 = function() { return "\f"; }; - var peg$f31 = function() { return "\n"; }; - var peg$f32 = function() { return "\r"; }; - var peg$f33 = function() { return "\t"; }; - var peg$f34 = function() { return "\v"; }; - var peg$f35 = function(digits) { + var peg$f28 = function() { return ""; }; + var peg$f29 = function() { return "\0"; }; + var peg$f30 = function() { return "\b"; }; + var peg$f31 = function() { return "\f"; }; + var peg$f32 = function() { return "\n"; }; + var peg$f33 = function() { return "\r"; }; + var peg$f34 = function() { return "\t"; }; + var peg$f35 = function() { return "\v"; }; + var peg$f36 = function(digits) { return String.fromCharCode(parseInt(digits, 16)); }; - var peg$f36 = function(digits) { + var peg$f37 = function(digits) { return String.fromCharCode(parseInt(digits, 16)); }; - var peg$f37 = function() { return { type: "any", location: location() }; }; - var peg$f38 = function(code) { return [code, location()]; }; - var peg$f39 = function(digits) { return parseInt(digits, 10); }; + var peg$f38 = function() { return { type: "any", location: location() }; }; + var peg$f39 = function(code) { return [code, location()]; }; + var peg$f40 = function(digits) { return parseInt(digits, 10); }; var peg$currPos = 0; var peg$savedPos = 0; var peg$posDetailsCache = [{ line: 1, column: 1 }]; @@ -1348,6 +1356,15 @@ function peg$parse(input, options) { s1 = peg$f17(s1); } s0 = s1; + if (s0 === peg$FAILED) { + s0 = peg$currPos; + s1 = peg$parseCodeBlock(); + if (s1 !== peg$FAILED) { + peg$savedPos = s0; + s1 = peg$f18(s1); + } + s0 = s1; + } } return s0; @@ -1388,7 +1405,7 @@ function peg$parse(input, options) { } if (s5 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f18(s3); + s0 = peg$f19(s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1456,7 +1473,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f19(s1); + s0 = peg$f20(s1); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1479,7 +1496,7 @@ function peg$parse(input, options) { s3 = peg$parseCodeBlock(); if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f20(s1, s3); + s0 = peg$f21(s1, s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -1974,7 +1991,7 @@ function peg$parse(input, options) { s3 = peg$parseIdentifierPart(); } peg$savedPos = s0; - s0 = peg$f21(s1, s2); + s0 = peg$f22(s1, s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2123,7 +2140,7 @@ function peg$parse(input, options) { s2 = null; } peg$savedPos = s0; - s0 = peg$f22(s1, s2); + s0 = peg$f23(s1, s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2165,7 +2182,7 @@ function peg$parse(input, options) { } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f23(s2); + s0 = peg$f24(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2199,7 +2216,7 @@ function peg$parse(input, options) { } if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f24(s2); + s0 = peg$f25(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2432,7 +2449,7 @@ function peg$parse(input, options) { s5 = null; } peg$savedPos = s0; - s0 = peg$f25(s2, s3, s5); + s0 = peg$f26(s2, s3, s5); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2467,7 +2484,7 @@ function peg$parse(input, options) { s3 = peg$parseClassCharacter(); if (s3 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f26(s1, s3); + s0 = peg$f27(s1, s3); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2579,7 +2596,7 @@ function peg$parse(input, options) { s2 = peg$parseLineTerminatorSequence(); if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f27(); + s0 = peg$f28(); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2618,7 +2635,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f28(); + s0 = peg$f29(); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2686,7 +2703,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f29(); + s1 = peg$f30(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2700,7 +2717,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f30(); + s1 = peg$f31(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2714,7 +2731,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f31(); + s1 = peg$f32(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2728,7 +2745,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f32(); + s1 = peg$f33(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2742,7 +2759,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f33(); + s1 = peg$f34(); } s0 = s1; if (s0 === peg$FAILED) { @@ -2756,7 +2773,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f34(); + s1 = peg$f35(); } s0 = s1; } @@ -2875,7 +2892,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f35(s2); + s0 = peg$f36(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2935,7 +2952,7 @@ function peg$parse(input, options) { } if (s2 !== peg$FAILED) { peg$savedPos = s0; - s0 = peg$f36(s2); + s0 = peg$f37(s2); } else { peg$currPos = s0; s0 = peg$FAILED; @@ -2989,7 +3006,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f37(); + s1 = peg$f38(); } s0 = s1; @@ -3042,7 +3059,7 @@ function peg$parse(input, options) { s0 = peg$currPos; s1 = peg$parseCode(); peg$savedPos = s0; - s1 = peg$f38(s1); + s1 = peg$f39(s1); s0 = s1; return s0; @@ -3279,7 +3296,7 @@ function peg$parse(input, options) { } if (s1 !== peg$FAILED) { peg$savedPos = s0; - s1 = peg$f39(s1); + s1 = peg$f40(s1); } s0 = s1; diff --git a/test/behavior/generated-parser-behavior.spec.js b/test/behavior/generated-parser-behavior.spec.js index 350bc031..275edcef 100644 --- a/test/behavior/generated-parser-behavior.spec.js +++ b/test/behavior/generated-parser-behavior.spec.js @@ -1379,6 +1379,106 @@ describe("generated parser behavior", () => { } }); }); + + describe("with function boundaries", () => { + it("|{min}.. | matches correctly", () => { + const parser = peg.generate("start = 'a'|{ return 2; }..|", options); + + expect(parser).to.failToParse(""); + expect(parser).to.failToParse("a"); + expect(parser).to.parse("aa", ["a", "a"]); + expect(parser).to.parse("aaa", ["a", "a", "a"]); + }); + + it("| ..{max}| matches correctly", () => { + const parser = peg.generate("start = 'a'|..{ return 2; }|", options); + + expect(parser).to.parse("", []); + expect(parser).to.parse("a", ["a"]); + expect(parser).to.parse("aa", ["a", "a"]); + expect(parser).to.failToParse("aaa"); + }); + + it("|{min}..{max}| matches correctly", () => { + const parser = peg.generate("start = 'a'|{ return 2; }..{ return 3; }|", options); + + expect(parser).to.failToParse(""); + expect(parser).to.failToParse("a"); + expect(parser).to.parse("aa", ["a", "a"]); + expect(parser).to.parse("aaa", ["a", "a", "a"]); + expect(parser).to.failToParse("aaaa"); + }); + + it("|{val}..{val}| matches correctly", () => { + const parser = peg.generate("start = 'a'|{ return 2; }..{ return 2; }|", options); + + expect(parser).to.failToParse(""); + expect(parser).to.failToParse("a"); + expect(parser).to.parse("aa", ["a", "a"]); + expect(parser).to.failToParse("aaa"); + }); + + it("| {exact} | matches correctly", () => { + const parser = peg.generate("start = 'a'|{ return 2; }|", options); + + expect(parser).to.failToParse(""); + expect(parser).to.failToParse("a"); + expect(parser).to.parse("aa", ["a", "a"]); + expect(parser).to.failToParse("aaa"); + }); + + describe("function called only once", () => { + it("in |{min}.. |", () => { + const parser = peg.generate(` + { let min = 0; } + start = 'a'|{ ++min; return 2; }..| { return min; } + `, options); + + // Always one check - after the loop + expect(parser).to.parse("aa", 1); + expect(parser).to.parse("aaa", 1); + }); + + it("in | ..{max}|", () => { + const parser = peg.generate(` + { let max = 0; } + start = 'a'|..{ ++max; return 3; }| { return max; } + `, options); + + expect(parser).to.parse("aa", 1); + expect(parser).to.parse("aaa", 1); + }); + + it("in |{min}..{max}|", () => { + const parser = peg.generate(` + { + let min = 0; + let max = 0; + } + start = 'a'| + { ++min; return 0; } + .. + { ++max; return 3; } + | { return [min, max]; } + `, options); + + expect(parser).to.parse("", [1, 1]); + expect(parser).to.parse("a", [1, 1]); + expect(parser).to.parse("aa", [1, 1]); + expect(parser).to.parse("aaa", [1, 1]); + }); + + it("in | {exact} |", () => { + const parser = peg.generate(` + { let count = 0; } + start = 'a'|{ ++count; return options.exact; }| { return count; } + `, options); + + expect(parser).to.parse("", 1, { exact: 0 }); + expect(parser).to.parse("aa", 1, { exact: 2 }); + }); + }); + }); }); }); diff --git a/test/unit/compiler/passes/generate-bytecode.spec.js b/test/unit/compiler/passes/generate-bytecode.spec.js index b72ff21f..83c282b7 100644 --- a/test/unit/compiler/passes/generate-bytecode.spec.js +++ b/test/unit/compiler/passes/generate-bytecode.spec.js @@ -977,6 +977,158 @@ describe("compiler pass |generateBytecode|", () => { }); }); }); + + describe("with function boundaries", () => { + describe("| ..x| (edge case -- no min boundary)", () => { + const grammar = "start = 'a'| ..{return 42;}|"; + + it("generates correct bytecode", () => { + expect(pass).to.changeAST(grammar, bytecodeDetails([ + 26, 0, 0, 0, // CALL <0>, pop 0, args [] + + 4, // PUSH_EMPTY_ARRAY + 33, 1, 1, 8, // IF_GE_DYNAMIC <1> + 3, // * PUSH_FAILED + 18, 0, 2, 2, 22, 0, 23, 0, // * + 16, 14, // WHILE_NOT_ERROR + 10, // * APPEND + 33, 1, 1, 8, // IF_GE_DYNAMIC <1> + 3, // * PUSH_FAILED + 18, 0, 2, 2, 22, 0, 23, 0, // * + 6, // POP + + 9, // NIP + ])); + }); + + it("defines correct constants", () => { + expect(pass).to.changeAST(grammar, constsDetails( + ["a"], + [], + [{ type: "literal", value: "a", ignoreCase: false }], + [{ predicate: true, params: [], body: "return 42;" }] + )); + }); + }); + + describe("|x.. | (edge case -- no max boundary)", () => { + const grammar = "start = 'a'|{return 42;}.. |"; + + it("generates correct bytecode", () => { + expect(pass).to.changeAST(grammar, bytecodeDetails([ + 26, 0, 0, 0, // CALL <0>, pop 0, args [] + + 5, // PUSH_CURR_POS + 4, // PUSH_EMPTY_ARRAY + 18, 0, 2, 2, 22, 0, 23, 0, // + 16, 9, // WHILE_NOT_ERROR + 10, // * APPEND + 18, 0, 2, 2, 22, 0, 23, 0, // + 6, // POP + + 32, 2, 3, 1, // IF_LT_DYNAMIC <2> + 6, // * POP + 7, // POP_CURR_POS + 3, // PUSH_FAILED + 9, // * NIP + + 9, // NIP + ])); + }); + + it("defines correct constants", () => { + expect(pass).to.changeAST(grammar, constsDetails( + ["a"], + [], + [{ type: "literal", value: "a", ignoreCase: false }], + [{ predicate: true, params: [], body: "return 42;" }] + )); + }); + }); + + describe("|x..y|", () => { + const grammar = "start = 'a'|{return 41;}..{return 43;}|"; + + it("generates correct bytecode", () => { + expect(pass).to.changeAST(grammar, bytecodeDetails([ + 26, 0, 0, 0, // CALL <0>, pop 0, args [] + 26, 1, 0, 0, // CALL <1>, pop 0, args [] + + 5, // PUSH_CURR_POS + 4, // PUSH_EMPTY_ARRAY + 33, 2, 1, 8, // IF_GE_DYNAMIC <2> + 3, // * PUSH_FAILED + 18, 0, 2, 2, 22, 0, 23, 0, // * + 16, 14, // WHILE_NOT_ERROR + 10, // * APPEND + 33, 2, 1, 8, // IF_GE_DYNAMIC <2> + 3, // * PUSH_FAILED + 18, 0, 2, 2, 22, 0, 23, 0, // * + 6, // POP + + 32, 3, 3, 1, // IF_LT_DYNAMIC <3> + 6, // * POP + 7, // POP_CURR_POS + 3, // PUSH_FAILED + 9, // * NIP + + 9, // NIP + 9, // NIP + ])); + }); + + it("defines correct constants", () => { + expect(pass).to.changeAST(grammar, constsDetails( + ["a"], + [], + [{ type: "literal", value: "a", ignoreCase: false }], + [ + { predicate: true, params: [], body: "return 41;" }, + { predicate: true, params: [], body: "return 43;" }, + ] + )); + }); + }); + + describe("|exact| (edge case -- exact repetitions)", () => { + const grammar = "start = 'a'|{return 42;}|"; + + it("generates correct bytecode", () => { + expect(pass).to.changeAST(grammar, bytecodeDetails([ + 26, 0, 0, 0, // CALL <0>, pop 0, args [] + + 5, // PUSH_CURR_POS + 4, // PUSH_EMPTY_ARRAY + 33, 2, 1, 8, // IF_GE_DYNAMIC <2> + 3, // * PUSH_FAILED + 18, 0, 2, 2, 22, 0, 23, 0, // * + 16, 14, // WHILE_NOT_ERROR + 10, // * APPEND + 33, 2, 1, 8, // IF_GE_DYNAMIC <2> + 3, // * PUSH_FAILED + 18, 0, 2, 2, 22, 0, 23, 0, // * + 6, // POP + + 32, 2, 3, 1, // IF_LT_DYNAMIC <2> + 6, // * POP + 7, // POP_CURR_POS + 3, // PUSH_FAILED + 9, // * NIP + + 9, // NIP + ])); + }); + + it("defines correct constants", () => { + expect(pass).to.changeAST(grammar, constsDetails( + ["a"], + [], + [{ type: "literal", value: "a", ignoreCase: false }], + [{ predicate: true, params: [], body: "return 42;" }] + )); + }); + }); + }); }); }); diff --git a/test/unit/parser.spec.js b/test/unit/parser.spec.js index 4b5099ca..2f46c2fb 100644 --- a/test/unit/parser.spec.js +++ b/test/unit/parser.spec.js @@ -101,14 +101,17 @@ describe("Peggy grammar parser", () => { return oneRuleGrammar({ type: "rule_ref", name }); } - function repeatedGrammar(min, max) { + function repeatedGrammar(min, max, type = "variable") { return oneRuleGrammar({ type: "repeated", - min: { type: typeof min === "string" ? "variable" : "constant", value: min }, - max: { type: typeof max === "string" ? "variable" : "constant", value: max }, + min: { type: typeof min === "string" ? type : "constant", value: min }, + max: { type: typeof max === "string" ? type : "constant", value: max }, expression: literalAbcd, }); } + function repeatedGrammar2(min, max) { + return repeatedGrammar(min, max, "function"); + } const trivialGrammar = literalGrammar("abcd", false); const twoRuleGrammar = { @@ -185,8 +188,10 @@ describe("Peggy grammar parser", () => { repeated(node) { if (node.min) { delete node.min.location; + delete node.min.codeLocation; } delete node.max.location; + delete node.max.codeLocation; delete node.location; strip(node.expression); }, @@ -472,6 +477,26 @@ describe("Peggy grammar parser", () => { expect("start = 'abcd'|exact\n|").to.parseAs(grammar); }); + it("with function boundaries", () => { + let grammar = repeatedGrammar2("min", "max"); + expect("start = 'abcd'|{min}..{max}| ").to.parseAs(grammar); + expect("start = 'abcd'\n|{min}..{max}|").to.parseAs(grammar); + expect("start = 'abcd'|\n{min}..{max}|").to.parseAs(grammar); + expect("start = 'abcd'|{min}\n..{max}|").to.parseAs(grammar); + expect("start = 'abcd'|{min}..\n{max}|").to.parseAs(grammar); + expect("start = 'abcd'|{min}..{max}\n|").to.parseAs(grammar); + + grammar = oneRuleGrammar({ + type: "repeated", + min: null, + max: { type: "function", value: "exact" }, + expression: literalAbcd, + }); + expect("start = 'abcd'\n|{exact}|").to.parseAs(grammar); + expect("start = 'abcd'|\n{exact}|").to.parseAs(grammar); + expect("start = 'abcd'|{exact}\n|").to.parseAs(grammar); + }); + it("with mixed boundaries", () => { let grammar = repeatedGrammar(2, "max"); expect("start = 'abcd'|2..max| ").to.parseAs(grammar); @@ -530,6 +555,18 @@ describe("Peggy grammar parser", () => { })); }); + it("with function boundaries", () => { + expect("start = 'abcd'|{min}.. |").to.parseAs(repeatedGrammar2("min", null)); + expect("start = 'abcd'| ..{max}|").to.parseAs(repeatedGrammar2(0, "max")); + expect("start = 'abcd'|{min}..{max}|").to.parseAs(repeatedGrammar2("min", "max")); + expect("start = 'abcd'|{exact}| ").to.parseAs(oneRuleGrammar({ + type: "repeated", + min: null, + max: { type: "function", value: "exact" }, + expression: literalAbcd, + })); + }); + it("with mixed boundaries", () => { expect("start = 'abcd'|2..max|").to.parseAs(repeatedGrammar(2, "max")); expect("start = 'abcd'|min..3|").to.parseAs(repeatedGrammar("min", 3));