From 4f050440f373c6423902acbdd93b41fc1b921fae Mon Sep 17 00:00:00 2001 From: Dmitry Soshnikov Date: Mon, 24 Apr 2017 15:43:55 -0700 Subject: [PATCH] Implement 're' shorthand with single-escape policy --- README.md | 101 +++++++++---- .../fixtures/integration/expected-subset.js | 2 +- __tests__/fixtures/integration/expected.js | 2 +- __tests__/fixtures/re/expected.js | 3 + __tests__/fixtures/re/input.js | 10 ++ __tests__/fixtures/re/options.json | 3 + __tests__/fixtures/x-flag/expected.js | 2 +- __tests__/modern-regexp-test.js | 10 +- index.js | 134 +++++++++++++++--- 9 files changed, 217 insertions(+), 50 deletions(-) create mode 100644 __tests__/fixtures/re/expected.js create mode 100644 __tests__/fixtures/re/input.js create mode 100644 __tests__/fixtures/re/options.json diff --git a/README.md b/README.md index 0f9e816..1e3fcaa 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,9 @@ Enables modern RegExp features in JavaScript. - [Named capturing groups](#named-capturing-groups) - [Extended x-flag](#extended-x-flag) - [Plugin options](#plugin-options) - - [`includeRuntime` option](#includeruntime-option) - [`features` option](#features-option) + - [`useRe` option](#usere-option) + - [`useRuntime` option](#useruntime-option) - [Usage](#usage) - [Via `.babelrc`](#via-babelrc) - [Via CLI](#via-cli) @@ -99,18 +100,85 @@ new RegExp(` Translated into: ```js -new RegExp('(\\d{4})-(\\d{2})-(\\d{2})', ''); +/(\d{4})-(\d{2})-(\d{2})/; ``` ## Plugin options The plugin supports the following options. -### `includeRuntime` option +### `features` option + +This options allows choosing which specific transformations to apply. Available features are: + +- `dotAll` +- `namedCapturingGroups` +- `xFlag` + +which can be specified as an extra object for the plugin: + +```json +{ + "plugins": ["transform-modern-regexp", { + "features": [ + "namedCapturingGroups", + "xFlag" + ] + }] +} +``` + +> NOTE: if omitted, all features are used by default. + +### `useRe` option + +This option enables a convenient `re` shorthand, which allows using multiline regexes with _single escape for meta-characters_ (just like in regular expression literals). + +Taking example of the date regexep using standard `RegExp` constructor: + +```js +new RegExp(` + + # A regular expression for date. + + (?\\d{4})- # year part of a date + (?\\d{2})- # month part of a date + (?\\d{2}) # day part of a date + +`, 'x'); +``` + +we see inconvenient double-escaping of `\\d` (and similarly for other meta-characters). The `re` shorthand allows using single escaping: -> NOTE: the `includeRuntime` option is not implemented yet. Track [issue #3](https://github.com/DmitrySoshnikov/babel-plugin-transform-modern-regexp/issues/3) for details. +```js +re`/ + + # A regular expression for date. + + (?\d{4})- # year part of a date + (?\d{2})- # month part of a date + (?\d{2}) # day part of a date + +/x`; +``` + +As we can see, `re` accepts a regexp in the _literal notation_, which unifies the usage format. + +In both cases it's translated to simple regexp literal, so no any runtime overhead: -> NOTE: `includeRuntime` is not required: if e.g. named groups are used mostly for readability, the `includeRuntime` can be omitted. If you need to access actual group names on the matched results, the runtime support should be used. +```js +/(\d{4})-(\d{2})-(\d{2})/ +``` + +> NOTE: it supports only template string literals, you can't use expressions there. Be careful also with `/${4}/` -- this is treated as a template literal expression, and should be written as `/\${4}/` instead. + +> NOTE: `\\1` backreferences should still be escaped with _double slashes_. This is due template literal strings do not allow `\1` treating them as Octal numbers. + +### `useRuntime` option + +> NOTE: the `useRuntime` option is not implemented yet. Track [issue #3](https://github.com/DmitrySoshnikov/babel-plugin-transform-modern-regexp/issues/3) for details. + +> NOTE: `useRuntime` is not required: if e.g. named groups are used mostly for readability, the `useRuntime` can be omitted. If you need to access actual group names on the matched results, the runtime support should be used. This option enables usage of a supporting runtime for the transformed regexes. The `RegExpTree` class is a thin wrapper on top of a native regexp, and has identical API. @@ -140,29 +208,6 @@ const result = re.exec('2017-04-17'); console.log(result.groups.year); // 2017 ``` -### `features` option - -This options allows choosing which specific transformations to apply. Available features are: - -- `dotAll` -- `namedCapturingGroups` -- `xFlag` - -which can be specified as an extra object for the plugin: - -```json -{ - "plugins": ["transform-modern-regexp", { - "features": [ - "namedCapturingGroups", - "xFlag" - ] - }] -} -``` - -> NOTE: if omitted, all features are used by default. - ## Usage ### Via `.babelrc` diff --git a/__tests__/fixtures/integration/expected-subset.js b/__tests__/fixtures/integration/expected-subset.js index a00ad4c..414b633 100644 --- a/__tests__/fixtures/integration/expected-subset.js +++ b/__tests__/fixtures/integration/expected-subset.js @@ -1 +1 @@ -const re = new RegExp('(.)+\\1\\1', 'su'); \ No newline at end of file +const re = /(.)+\1\1/su; \ No newline at end of file diff --git a/__tests__/fixtures/integration/expected.js b/__tests__/fixtures/integration/expected.js index d2f97ca..8c064d7 100644 --- a/__tests__/fixtures/integration/expected.js +++ b/__tests__/fixtures/integration/expected.js @@ -1 +1 @@ -const re = new RegExp('([\\0-\\u{10FFFF}])+\\1\\1', 'u'); \ No newline at end of file +const re = /([\0-\u{10FFFF}])+\1\1/u; \ No newline at end of file diff --git a/__tests__/fixtures/re/expected.js b/__tests__/fixtures/re/expected.js new file mode 100644 index 0000000..eb63678 --- /dev/null +++ b/__tests__/fixtures/re/expected.js @@ -0,0 +1,3 @@ +const dateRe = /(\d{4})-(\d{2})-(\d{2})/; + +const otherRe = /(x)\1\1/; \ No newline at end of file diff --git a/__tests__/fixtures/re/input.js b/__tests__/fixtures/re/input.js new file mode 100644 index 0000000..c26a5c0 --- /dev/null +++ b/__tests__/fixtures/re/input.js @@ -0,0 +1,10 @@ +const dateRe = re`/ + # A regular expression for date. + + (?\d{4})- # year part of a date + (?\d{2})- # month part of a date + (?\d{2}) # day part of a date + +/x`; + +const otherRe = re`/(?x)\\1\k/`; \ No newline at end of file diff --git a/__tests__/fixtures/re/options.json b/__tests__/fixtures/re/options.json new file mode 100644 index 0000000..b6242f1 --- /dev/null +++ b/__tests__/fixtures/re/options.json @@ -0,0 +1,3 @@ +{ + "useRe": true +} \ No newline at end of file diff --git a/__tests__/fixtures/x-flag/expected.js b/__tests__/fixtures/x-flag/expected.js index 208b257..3934632 100644 --- a/__tests__/fixtures/x-flag/expected.js +++ b/__tests__/fixtures/x-flag/expected.js @@ -1 +1 @@ -const re = new RegExp('(\\d{4})-(\\d{2})-(\\d{2})', ''); \ No newline at end of file +const re = /(\d{4})-(\d{2})-(\d{2})/; \ No newline at end of file diff --git a/__tests__/modern-regexp-test.js b/__tests__/modern-regexp-test.js index e8a79ab..e9a8eab 100644 --- a/__tests__/modern-regexp-test.js +++ b/__tests__/modern-regexp-test.js @@ -19,9 +19,17 @@ describe('modern-regexp-test', () => { const fixtureDir = path.join(fixturesDir, caseName); const inputPath = path.join(fixtureDir, 'input.js'); + let options = {}; + + const optionsFile = path.join(fixtureDir, 'options.json'); + + if (fs.existsSync(optionsFile)) { + options = require(optionsFile); + } + const actual = transformFileSync(inputPath, { 'plugins': [ - plugin + [plugin, options] ] }).code; diff --git a/index.js b/index.js index 0497788..5e9ea2a 100644 --- a/index.js +++ b/index.js @@ -29,7 +29,20 @@ const regexpTree = require('regexp-tree'); * * /(\d{4})-(\d{2})-(\d{2})/ * - * Note: if `includeRuntime` option is passed, this is transalted into: + * ------------------------------------------------------------------ + * 1. The `features` option. + * + * The `features` option allows specifying specific regexp features + * to be applied. Available are: + * + * - `dotAll` - enables handling of `s` flag + * - namedCapturingGroups - enables handling of named groups + * - xFlag - enables handling of `x` flag + * + * ------------------------------------------------------------------ + * 2. The `useRuntime` option. + * + * Note: if `useRuntime` option is passed, this is transalted into: * * const RegExpTree = require('regexp-tree-runtime'); * @@ -52,53 +65,129 @@ const regexpTree = require('regexp-tree'); * In case of using runtime, it should be included as a dependency in your * package.json. * - * If group names are used mostly for readability, `includeRuntime` may be + * If group names are used mostly for readability, `useRuntime` may be * omitted. + * + * ------------------------------------------------------------------ + * 3. The `re` shorthand (`useRe` option) + * + * The `useRe` option, enables usage of the re`...` pattern. This handles + * global `re` function, where regular expressions can be used with + * single escaping. + * + * Using simple `RegExp` (note double escape `\\d` as per JS strings): + * + * new RegExp(` + * + * (?\\d{2})- + * (?\\d{2})- + * (?\\d{2}) + * + * `, 'x'); + * + * vs. using `re` (not single escape for `\d`): + * + * re`/ + * + * (?\d{2})- + * (?\d{2})- + * (?\d{2}) + * + * /x` */ module.exports = ({types: t}) => { + + /** + * Creates a `RegExpLiteral` node. + */ + function toRegExpLiteral(raw) { + const slashIndex = raw.lastIndexOf('/'); + + const pattern = raw.slice(1, slashIndex); + const flags = raw.slice(slashIndex); + + const re = t.regExpLiteral( + pattern, + flags, + ); + + re.extra = { + raw, + }; + + return re; + } + return { pre(state) { - if (state.opts.includeRuntime) { - throw new Error(`includeRuntime is not implemented yet.`); + if (state.opts.useRuntime) { + throw new Error(`useRuntime is not implemented yet.`); } }, visitor: { - // Handle `/foo/i`. + /** + * Handle `/foo/i`. + */ RegExpLiteral({node}, state) { Object.assign(node, getTranslatedData(node.extra.raw, state)); }, - // Handle `new RegExp('foo', 'i')`. - NewExpression({node}, state) { + /** + * Handle re`//` pattern. + * Translate to `/doubleEscape()/` + */ + TaggedTemplateExpression(path, state) { + const {node} = path; + + if (!state.opts.useRe || !isReTemplate(node)) { + return; + } + + let re = node.quasi.quasis[0].value.raw; + + // Handle \\\\1 -> \\1. In templates \\1 should be used instead of + // \1 since \1 is treated as an octal number, which is not allowed + // in template strings. + re = re.replace(/\\\\(\d+)/g, '\\$1'); + + path.replaceWith(toRegExpLiteral(re)); + }, + + /** + * Handle `new RegExp(, )`. + * + * Translate to // + */ + NewExpression(path, state) { + const {node} = path; + if (!isNewRegExp(node)) { return; } - let origPattern; + let pattern; if (node.arguments[0].type === 'StringLiteral') { - origPattern = node.arguments[0].value; + pattern = node.arguments[0].value; } else if (node.arguments[0].type === 'TemplateLiteral') { - origPattern = node.arguments[0].quasis[0].value.cooked; + pattern = node.arguments[0].quasis[0].value.cooked; } - let origFlags = ''; + let flags = ''; if (node.arguments[1]) { if (node.arguments[1].type === 'StringLiteral') { - origFlags = node.arguments[1].value; + flags = node.arguments[1].value; } else if (node.arguments[1].type === 'TemplateLiteral') { - origFlags = node.arguments[1].quasis[0].value.cooked; + flags = node.arguments[1].quasis[0].value.cooked; } } - const origRe = `/${origPattern}/${origFlags}`; - const {pattern, flags} = getTranslatedData(origRe, state); + const re = `/${pattern}/${flags}`; - node.arguments[0] = t.stringLiteral(pattern); - node.arguments[1] = t.stringLiteral(flags); + path.replaceWith(toRegExpLiteral(re)); } }, }; @@ -136,4 +225,13 @@ function isNewRegExp(node) { node.arguments[0].quasis.length === 1) ) ); -} \ No newline at end of file +} + +function isReTemplate(node) { + return ( + node.tag.type === 'Identifier' && + node.tag.name === 're' && + node.quasi.type === 'TemplateLiteral' && + node.quasi.quasis.length === 1 + ) +}