tests/core/greedy.js

"use strict";

const { assert } = require('chai');
const PrismLoader = require('../helper/prism-loader');
const TestCase = require('../helper/test-case');
const TokenStreamTransformer = require('../helper/token-stream-transformer');


function testTokens({ grammar, code, expected }) {
	const Prism = PrismLoader.createEmptyPrism();
	Prism.languages.test = grammar;

	const simpleTokens = TokenStreamTransformer.simplify(TestCase.tokenize(Prism, code, 'test'));

	assert.deepStrictEqual(simpleTokens, expected);
}

describe('Greedy matching', function () {

	it('should correctly handle tokens with the same name', function () {
		testTokens({
			grammar: {
				'comment': [
					/\/\/.*/,
					{
						pattern: /\/\*[\s\S]*?(?:\*\/|$)/,
						greedy: true
					}
				]
			},
			code: '// /*\n/* comment */',
			expected: [
				["comment", "// /*"],
				["comment", "/* comment */"]
			]
		});
	});

	it('should support patterns with top-level alternatives that do not contain the lookbehind group', function () {
		testTokens({
			grammar: {
				'a': /'[^']*'/,
				'b': {
					// This pattern has 2 top-level alternatives:  foo  and  (^|[^\\])"[^"]*"
					pattern: /foo|(^|[^\\])"[^"]*"/,
					lookbehind: true,
					greedy: true
				}
			},
			code: 'foo "bar" \'baz\'',
			expected: [
				["b", "foo"],
				["b", "\"bar\""],
				["a", "'baz'"]
			]
		});
	});

	it('should correctly rematch tokens', function () {
		testTokens({
			grammar: {
				'a': {
					pattern: /'[^'\r\n]*'/,
				},
				'b': {
					pattern: /"[^"\r\n]*"/,
					greedy: true,
				},
				'c': {
					pattern: /<[^>\r\n]*>/,
					greedy: true,
				}
			},
			code: `<'> '' ''\n<"> "" ""`,
			expected: [
				["c", "<'>"],
				" '",
				["a", "' '"],
				"'\n",

				["c", "<\">"],
				["b", "\"\""],
				["b", "\"\""],
			]
		});
	});

	it('should always match tokens against the whole text', function () {
		// this is to test for a bug where greedy tokens where matched like non-greedy ones if the token stream ended on
		// a string
		testTokens({
			grammar: {
				'a': /a/,
				'b': {
					pattern: /^b/,
					greedy: true
				}
			},
			code: 'bab',
			expected: [
				["b", "b"],
				["a", "a"],
				"b"
			]
		});
	});

});