From 98fa3df10b33374f3ccbc31ec3c35bbb6cd04ac3 Mon Sep 17 00:00:00 2001 From: Victor Petrovykh Date: Tue, 2 Oct 2018 20:57:33 -0400 Subject: [PATCH] Bugfix for an apparent infinite loop in grammar. This is a case of semantically equivalent regex not being computationally equivalent. The issue also relates to how negative character classes (e.g. `[^abc]`) are treated. In the end I had to replace something of the form `(A | B)*` with `A* (BA*)*` to avoid grinding the regex engine to a halt. Issue #150. --- grammars/MagicPython.cson | 10 +- grammars/MagicPython.tmLanguage | 10 +- grammars/src/MagicPython.syntax.yaml | 10 +- test/atom-spec/python-spec.js | 144 +++++++++++++++++++++++++++ test/strings/bug1.py | 47 +++++++++ test/strings/bug2.py | 35 +++++++ 6 files changed, 238 insertions(+), 18 deletions(-) create mode 100644 test/strings/bug1.py create mode 100644 test/strings/bug2.py diff --git a/grammars/MagicPython.cson b/grammars/MagicPython.cson index fbcc389f..bb53f04c 100644 --- a/grammars/MagicPython.cson +++ b/grammars/MagicPython.cson @@ -942,7 +942,7 @@ repository: {{ | }} | (?: { - \\w*? (\\.[[:alpha:]_]\\w*? | \\[[^\\]'"]+\\])*? + \\w* (\\.[[:alpha:]_]\\w* | \\[[^\\]'"]+\\])* (![rsa])? ( : \\w? [<>=^]? [-+ ]? \\#? \\d* ,? (\\.\\d+)? [bcdeEfFgGnosxX%]? )? @@ -964,13 +964,11 @@ repository: (?x) ( { - \\w*? (\\.[[:alpha:]_]\\w*? | \\[[^\\]'"]+\\])*? + \\w* (\\.[[:alpha:]_]\\w* | \\[[^\\]'"]+\\])* (![rsa])? (:) - ( - [^'"{}\\n]+? - | - \\{ [^'"}\\n]*? \\} + [^'"{}\\n]* (?: + \\{ [^'"}\\n]*? \\} [^'"{}\\n]* )* } ) diff --git a/grammars/MagicPython.tmLanguage b/grammars/MagicPython.tmLanguage index 6f1edd62..90e33014 100644 --- a/grammars/MagicPython.tmLanguage +++ b/grammars/MagicPython.tmLanguage @@ -1444,7 +1444,7 @@ {{ | }} | (?: { - \w*? (\.[[:alpha:]_]\w*? | \[[^\]'"]+\])*? + \w* (\.[[:alpha:]_]\w* | \[[^\]'"]+\])* (![rsa])? ( : \w? [<>=^]? [-+ ]? \#? \d* ,? (\.\d+)? [bcdeEfFgGnosxX%]? )? @@ -1477,13 +1477,11 @@ (?x) ( { - \w*? (\.[[:alpha:]_]\w*? | \[[^\]'"]+\])*? + \w* (\.[[:alpha:]_]\w* | \[[^\]'"]+\])* (![rsa])? (:) - ( - [^'"{}\n]+? - | - \{ [^'"}\n]*? \} + [^'"{}\n]* (?: + \{ [^'"}\n]*? \} [^'"{}\n]* )* } ) diff --git a/grammars/src/MagicPython.syntax.yaml b/grammars/src/MagicPython.syntax.yaml index 556d0213..b2dc33f9 100644 --- a/grammars/src/MagicPython.syntax.yaml +++ b/grammars/src/MagicPython.syntax.yaml @@ -791,7 +791,7 @@ repository: {{ | }} | (?: { - \w*? (\.[[:alpha:]_]\w*? | \[[^\]'"]+\])*? + \w* (\.[[:alpha:]_]\w* | \[[^\]'"]+\])* (![rsa])? ( : \w? [<>=^]? [-+ ]? \#? \d* ,? (\.\d+)? [bcdeEfFgGnosxX%]? )? @@ -818,13 +818,11 @@ repository: (?x) ( { - \w*? (\.[[:alpha:]_]\w*? | \[[^\]'"]+\])*? + \w* (\.[[:alpha:]_]\w* | \[[^\]'"]+\])* (![rsa])? (:) - ( - [^'"{}\n]+? - | - \{ [^'"}\n]*? \} + [^'"{}\n]* (?: + \{ [^'"}\n]*? \} [^'"{}\n]* )* } ) diff --git a/test/atom-spec/python-spec.js b/test/atom-spec/python-spec.js index 64447b31..ebf9ccd7 100644 --- a/test/atom-spec/python-spec.js +++ b/test/atom-spec/python-spec.js @@ -12853,6 +12853,150 @@ describe("Grammar Tests", function() { expect(tokens[3][13].scopes).toEqual(["source.python","comment.line.number-sign.python"]); }); + it("test/strings/bug1.py", + function() { + tokens = grammar.tokenizeLines("# issue 150\nrecord = {\n \"a\": {k: str(v) for k, v in foo if \"\"}\n}") + expect(tokens[0][0].value).toBe("#"); + expect(tokens[0][0].scopes).toEqual(["source.python","comment.line.number-sign.python","punctuation.definition.comment.python"]); + expect(tokens[0][1].value).toBe(" issue 150"); + expect(tokens[0][1].scopes).toEqual(["source.python","comment.line.number-sign.python"]); + expect(tokens[1][0].value).toBe("record"); + expect(tokens[1][0].scopes).toEqual(["source.python"]); + expect(tokens[1][1].value).toBe(" "); + expect(tokens[1][1].scopes).toEqual(["source.python"]); + expect(tokens[1][2].value).toBe("="); + expect(tokens[1][2].scopes).toEqual(["source.python","keyword.operator.assignment.python"]); + expect(tokens[1][3].value).toBe(" "); + expect(tokens[1][3].scopes).toEqual(["source.python"]); + expect(tokens[1][4].value).toBe("{"); + expect(tokens[1][4].scopes).toEqual(["source.python","punctuation.definition.dict.begin.python"]); + expect(tokens[2][0].value).toBe(" "); + expect(tokens[2][0].scopes).toEqual(["source.python"]); + expect(tokens[2][1].value).toBe("\""); + expect(tokens[2][1].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.begin.python"]); + expect(tokens[2][2].value).toBe("a"); + expect(tokens[2][2].scopes).toEqual(["source.python","string.quoted.single.python"]); + expect(tokens[2][3].value).toBe("\""); + expect(tokens[2][3].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.end.python"]); + expect(tokens[2][4].value).toBe(":"); + expect(tokens[2][4].scopes).toEqual(["source.python","punctuation.separator.dict.python"]); + expect(tokens[2][5].value).toBe(" "); + expect(tokens[2][5].scopes).toEqual(["source.python"]); + expect(tokens[2][6].value).toBe("{"); + expect(tokens[2][6].scopes).toEqual(["source.python","punctuation.definition.dict.begin.python"]); + expect(tokens[2][7].value).toBe("k"); + expect(tokens[2][7].scopes).toEqual(["source.python"]); + expect(tokens[2][8].value).toBe(":"); + expect(tokens[2][8].scopes).toEqual(["source.python","punctuation.separator.dict.python"]); + expect(tokens[2][9].value).toBe(" "); + expect(tokens[2][9].scopes).toEqual(["source.python"]); + expect(tokens[2][10].value).toBe("str"); + expect(tokens[2][10].scopes).toEqual(["source.python","meta.function-call.python","support.type.python"]); + expect(tokens[2][11].value).toBe("("); + expect(tokens[2][11].scopes).toEqual(["source.python","meta.function-call.python","punctuation.definition.arguments.begin.python"]); + expect(tokens[2][12].value).toBe("v"); + expect(tokens[2][12].scopes).toEqual(["source.python","meta.function-call.python","meta.function-call.arguments.python"]); + expect(tokens[2][13].value).toBe(")"); + expect(tokens[2][13].scopes).toEqual(["source.python","meta.function-call.python","punctuation.definition.arguments.end.python"]); + expect(tokens[2][14].value).toBe(" "); + expect(tokens[2][14].scopes).toEqual(["source.python"]); + expect(tokens[2][15].value).toBe("for"); + expect(tokens[2][15].scopes).toEqual(["source.python","keyword.control.flow.python"]); + expect(tokens[2][16].value).toBe(" "); + expect(tokens[2][16].scopes).toEqual(["source.python"]); + expect(tokens[2][17].value).toBe("k"); + expect(tokens[2][17].scopes).toEqual(["source.python"]); + expect(tokens[2][18].value).toBe(","); + expect(tokens[2][18].scopes).toEqual(["source.python","punctuation.separator.element.python"]); + expect(tokens[2][19].value).toBe(" "); + expect(tokens[2][19].scopes).toEqual(["source.python"]); + expect(tokens[2][20].value).toBe("v"); + expect(tokens[2][20].scopes).toEqual(["source.python"]); + expect(tokens[2][21].value).toBe(" "); + expect(tokens[2][21].scopes).toEqual(["source.python"]); + expect(tokens[2][22].value).toBe("in"); + expect(tokens[2][22].scopes).toEqual(["source.python","keyword.operator.logical.python"]); + expect(tokens[2][23].value).toBe(" "); + expect(tokens[2][23].scopes).toEqual(["source.python"]); + expect(tokens[2][24].value).toBe("foo"); + expect(tokens[2][24].scopes).toEqual(["source.python"]); + expect(tokens[2][25].value).toBe(" "); + expect(tokens[2][25].scopes).toEqual(["source.python"]); + expect(tokens[2][26].value).toBe("if"); + expect(tokens[2][26].scopes).toEqual(["source.python","keyword.control.flow.python"]); + expect(tokens[2][27].value).toBe(" "); + expect(tokens[2][27].scopes).toEqual(["source.python"]); + expect(tokens[2][28].value).toBe("\""); + expect(tokens[2][28].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.begin.python"]); + expect(tokens[2][29].value).toBe("\""); + expect(tokens[2][29].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.end.python"]); + expect(tokens[2][30].value).toBe("}"); + expect(tokens[2][30].scopes).toEqual(["source.python","punctuation.definition.dict.end.python"]); + expect(tokens[3][0].value).toBe("}"); + expect(tokens[3][0].scopes).toEqual(["source.python","punctuation.definition.dict.end.python"]); + }); + + it("test/strings/bug2.py", + function() { + tokens = grammar.tokenizeLines("# issue 150\ncmd = \"git-clang-format --style=\\\"{{BasedOnStyle: Google, ColumnLimit: 100, IndentWidth: 2, \" \\\n \"AlignConsecutiveAssignments: true}}\\\" {COMMIT_SHA} -- ./**/*.proto > {OUTPUT}\".format(") + expect(tokens[0][0].value).toBe("#"); + expect(tokens[0][0].scopes).toEqual(["source.python","comment.line.number-sign.python","punctuation.definition.comment.python"]); + expect(tokens[0][1].value).toBe(" issue 150"); + expect(tokens[0][1].scopes).toEqual(["source.python","comment.line.number-sign.python"]); + expect(tokens[1][0].value).toBe("cmd"); + expect(tokens[1][0].scopes).toEqual(["source.python"]); + expect(tokens[1][1].value).toBe(" "); + expect(tokens[1][1].scopes).toEqual(["source.python"]); + expect(tokens[1][2].value).toBe("="); + expect(tokens[1][2].scopes).toEqual(["source.python","keyword.operator.assignment.python"]); + expect(tokens[1][3].value).toBe(" "); + expect(tokens[1][3].scopes).toEqual(["source.python"]); + expect(tokens[1][4].value).toBe("\""); + expect(tokens[1][4].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.begin.python"]); + expect(tokens[1][5].value).toBe("git-clang-format --style="); + expect(tokens[1][5].scopes).toEqual(["source.python","string.quoted.single.python"]); + expect(tokens[1][6].value).toBe("\\\""); + expect(tokens[1][6].scopes).toEqual(["source.python","string.quoted.single.python","constant.character.escape.python"]); + expect(tokens[1][7].value).toBe("{{"); + expect(tokens[1][7].scopes).toEqual(["source.python","string.quoted.single.python","meta.format.brace.python","constant.character.format.placeholder.other.python"]); + expect(tokens[1][8].value).toBe("BasedOnStyle: Google, ColumnLimit: 100, IndentWidth: 2, "); + expect(tokens[1][8].scopes).toEqual(["source.python","string.quoted.single.python"]); + expect(tokens[1][9].value).toBe("\""); + expect(tokens[1][9].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.end.python"]); + expect(tokens[1][10].value).toBe(" "); + expect(tokens[1][10].scopes).toEqual(["source.python"]); + expect(tokens[1][11].value).toBe("\\"); + expect(tokens[1][11].scopes).toEqual(["source.python","punctuation.separator.continuation.line.python"]); + expect(tokens[1][12].value).toBe(""); + expect(tokens[1][12].scopes).toEqual(["source.python"]); + expect(tokens[2][0].value).toBe(" "); + expect(tokens[2][0].scopes).toEqual(["source.python"]); + expect(tokens[2][1].value).toBe("\""); + expect(tokens[2][1].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.begin.python"]); + expect(tokens[2][2].value).toBe("AlignConsecutiveAssignments: true"); + expect(tokens[2][2].scopes).toEqual(["source.python","string.quoted.single.python"]); + expect(tokens[2][3].value).toBe("}}"); + expect(tokens[2][3].scopes).toEqual(["source.python","string.quoted.single.python","meta.format.brace.python","constant.character.format.placeholder.other.python"]); + expect(tokens[2][4].value).toBe("\\\""); + expect(tokens[2][4].scopes).toEqual(["source.python","string.quoted.single.python","constant.character.escape.python"]); + expect(tokens[2][5].value).toBe(" "); + expect(tokens[2][5].scopes).toEqual(["source.python","string.quoted.single.python"]); + expect(tokens[2][6].value).toBe("{COMMIT_SHA}"); + expect(tokens[2][6].scopes).toEqual(["source.python","string.quoted.single.python","meta.format.brace.python","constant.character.format.placeholder.other.python"]); + expect(tokens[2][7].value).toBe(" -- ./**/*.proto > "); + expect(tokens[2][7].scopes).toEqual(["source.python","string.quoted.single.python"]); + expect(tokens[2][8].value).toBe("{OUTPUT}"); + expect(tokens[2][8].scopes).toEqual(["source.python","string.quoted.single.python","meta.format.brace.python","constant.character.format.placeholder.other.python"]); + expect(tokens[2][9].value).toBe("\""); + expect(tokens[2][9].scopes).toEqual(["source.python","string.quoted.single.python","punctuation.definition.string.end.python"]); + expect(tokens[2][10].value).toBe("."); + expect(tokens[2][10].scopes).toEqual(["source.python","punctuation.separator.period.python"]); + expect(tokens[2][11].value).toBe("format"); + expect(tokens[2][11].scopes).toEqual(["source.python","meta.function-call.python","meta.function-call.generic.python"]); + expect(tokens[2][12].value).toBe("("); + expect(tokens[2][12].scopes).toEqual(["source.python","meta.function-call.python","punctuation.definition.arguments.begin.python"]); + }); + it("test/strings/bytes1.py", function() { tokens = grammar.tokenizeLines("a = b\"\"\"\nmultiline \"binary\" string \\\n\n \\xf1 \\u1234aaaa \\U1234aaaa\n\n \\N{BLACK SPADE SUIT}\n\"\"\"") diff --git a/test/strings/bug1.py b/test/strings/bug1.py new file mode 100644 index 00000000..9b43c6de --- /dev/null +++ b/test/strings/bug1.py @@ -0,0 +1,47 @@ +# issue 150 +record = { + "a": {k: str(v) for k, v in foo if ""} +} + + + + +# : comment.line.number-sign.python, punctuation.definition.comment.python, source.python + issue 150 : comment.line.number-sign.python, source.python +record : source.python + : source.python += : keyword.operator.assignment.python, source.python + : source.python +{ : punctuation.definition.dict.begin.python, source.python + : source.python +" : punctuation.definition.string.begin.python, source.python, string.quoted.single.python +a : source.python, string.quoted.single.python +" : punctuation.definition.string.end.python, source.python, string.quoted.single.python +: : punctuation.separator.dict.python, source.python + : source.python +{ : punctuation.definition.dict.begin.python, source.python +k : source.python +: : punctuation.separator.dict.python, source.python + : source.python +str : meta.function-call.python, source.python, support.type.python +( : meta.function-call.python, punctuation.definition.arguments.begin.python, source.python +v : meta.function-call.arguments.python, meta.function-call.python, source.python +) : meta.function-call.python, punctuation.definition.arguments.end.python, source.python + : source.python +for : keyword.control.flow.python, source.python + : source.python +k : source.python +, : punctuation.separator.element.python, source.python + : source.python +v : source.python + : source.python +in : keyword.operator.logical.python, source.python + : source.python +foo : source.python + : source.python +if : keyword.control.flow.python, source.python + : source.python +" : punctuation.definition.string.begin.python, source.python, string.quoted.single.python +" : punctuation.definition.string.end.python, source.python, string.quoted.single.python +} : punctuation.definition.dict.end.python, source.python +} : punctuation.definition.dict.end.python, source.python diff --git a/test/strings/bug2.py b/test/strings/bug2.py new file mode 100644 index 00000000..0ba2bf8f --- /dev/null +++ b/test/strings/bug2.py @@ -0,0 +1,35 @@ +# issue 150 +cmd = "git-clang-format --style=\"{{BasedOnStyle: Google, ColumnLimit: 100, IndentWidth: 2, " \ + "AlignConsecutiveAssignments: true}}\" {COMMIT_SHA} -- ./**/*.proto > {OUTPUT}".format( + + + + +# : comment.line.number-sign.python, punctuation.definition.comment.python, source.python + issue 150 : comment.line.number-sign.python, source.python +cmd : source.python + : source.python += : keyword.operator.assignment.python, source.python + : source.python +" : punctuation.definition.string.begin.python, source.python, string.quoted.single.python +git-clang-format --style= : source.python, string.quoted.single.python +\" : constant.character.escape.python, source.python, string.quoted.single.python +{{ : constant.character.format.placeholder.other.python, meta.format.brace.python, source.python, string.quoted.single.python +BasedOnStyle: Google, ColumnLimit: 100, IndentWidth: 2, : source.python, string.quoted.single.python +" : punctuation.definition.string.end.python, source.python, string.quoted.single.python + : source.python +\ : punctuation.separator.continuation.line.python, source.python + : source.python + : source.python +" : punctuation.definition.string.begin.python, source.python, string.quoted.single.python +AlignConsecutiveAssignments: true : source.python, string.quoted.single.python +}} : constant.character.format.placeholder.other.python, meta.format.brace.python, source.python, string.quoted.single.python +\" : constant.character.escape.python, source.python, string.quoted.single.python + : source.python, string.quoted.single.python +{COMMIT_SHA} : constant.character.format.placeholder.other.python, meta.format.brace.python, source.python, string.quoted.single.python + -- ./**/*.proto > : source.python, string.quoted.single.python +{OUTPUT} : constant.character.format.placeholder.other.python, meta.format.brace.python, source.python, string.quoted.single.python +" : punctuation.definition.string.end.python, source.python, string.quoted.single.python +. : punctuation.separator.period.python, source.python +format : meta.function-call.generic.python, meta.function-call.python, source.python +( : meta.function-call.python, punctuation.definition.arguments.begin.python, source.python