-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
greedy.js
108 lines (96 loc) · 2.16 KB
/
greedy.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"use strict";
const { assert } = require('chai');
const PrismLoader = require('../helper/prism-loader');
const TestCase = require('../helper/test-case');
const TokenStreamTransformer = require('../helper/token-stream-transformer');
function testTokens({ grammar, code, expected }) {
const Prism = PrismLoader.createEmptyPrism();
Prism.languages.test = grammar;
const simpleTokens = TokenStreamTransformer.simplify(TestCase.tokenize(Prism, code, 'test'));
assert.deepStrictEqual(simpleTokens, expected);
}
describe('Greedy matching', function () {
it('should correctly handle tokens with the same name', function () {
testTokens({
grammar: {
'comment': [
/\/\/.*/,
{
pattern: /\/\*[\s\S]*?(?:\*\/|$)/,
greedy: true
}
]
},
code: '// /*\n/* comment */',
expected: [
["comment", "// /*"],
["comment", "/* comment */"]
]
});
});
it('should support patterns with top-level alternatives that do not contain the lookbehind group', function () {
testTokens({
grammar: {
'a': /'[^']*'/,
'b': {
// This pattern has 2 top-level alternatives: foo and (^|[^\\])"[^"]*"
pattern: /foo|(^|[^\\])"[^"]*"/,
lookbehind: true,
greedy: true
}
},
code: 'foo "bar" \'baz\'',
expected: [
["b", "foo"],
["b", "\"bar\""],
["a", "'baz'"]
]
});
});
it('should correctly rematch tokens', function () {
testTokens({
grammar: {
'a': {
pattern: /'[^'\r\n]*'/,
},
'b': {
pattern: /"[^"\r\n]*"/,
greedy: true,
},
'c': {
pattern: /<[^>\r\n]*>/,
greedy: true,
}
},
code: `<'> '' ''\n<"> "" ""`,
expected: [
["c", "<'>"],
" '",
["a", "' '"],
"'\n",
["c", "<\">"],
["b", "\"\""],
["b", "\"\""],
]
});
});
it('should always match tokens against the whole text', function () {
// this is to test for a bug where greedy tokens where matched like non-greedy ones if the token stream ended on
// a string
testTokens({
grammar: {
'a': /a/,
'b': {
pattern: /^b/,
greedy: true
}
},
code: 'bab',
expected: [
["b", "b"],
["a", "a"],
"b"
]
});
});
});