diff --git a/README.md b/README.md index 9633893f..3507d68c 100644 --- a/README.md +++ b/README.md @@ -13,10 +13,7 @@ written in pure javascript, no dependencies CodeFactor - - -BCH compliance - + Known Vulnerabilities @@ -73,6 +70,7 @@ written in pure javascript, no dependencies * [Basic usage](#basic-usage) * [React usage](#react-usage) * [Vue 2 usage](#vue2-usage) +* [Parse Options](#parse-options) * [Presets](#presets) * [Create your own preset](#create-preset) * [HTML Preset](#html-preset) @@ -111,7 +109,7 @@ import {render} from 'react-dom' import bbobReactRender from '@bbob/react/es/render' import presetReact from '@bbob/preset-react' -const options = { onlyAllowTags: ['i'], enableEscapeTags: true } +const options = { onlyAllowTags: ['i'], enableEscapeTags: true, contextFreeTags: ['code'] } const content = bbobReactRender(`[i]Text[/i]`, presetReact(), options) console.log(render({content})); // Text @@ -156,6 +154,47 @@ Vue.use(VueBbob); ``` More examples available in examples folder +### Parse options + +#### onlyAllowTags + +Parse only allowed tags + +```js +import bbobHTML from '@bbob/html' +import presetHTML5 from '@bbob/preset-html5' + +const processed = bbobHTML(`[i][b]Text[/b][/i]`, presetHTML5(), { onlyAllowTags: ['i'] }) + +console.log(processed); // [b]Text[/b] +``` + +#### contextFreeTags + +Enable context free mode that ignores parsing all tags inside given tags + +```js +import bbobHTML from '@bbob/html' +import presetHTML5 from '@bbob/preset-html5' + +const processed = bbobHTML(`[b]Text[/b][code][b]Text[/b][/code]`, presetHTML5(), { contextFreeTags: ['code'] }) + +console.log(processed); // Text
[b]Text[/b]
+``` + +#### enableEscapeTags + +Enable escape support for tags + +```js +import bbobHTML from '@bbob/html' +import presetHTML5 from '@bbob/preset-html5' + +const processed = bbobHTML(`[b]Text[/b]'\\[b\\]Text\\[/b\\]'`, presetHTML5(), { enableEscapeTags: true }) + +console.log(processed); // Text[b]Text[/b] +``` + ### Presets @@ -330,7 +369,7 @@ Tested on Node v12.18.3 | regex/parser | 6.02 ops/sec ±2.77% | (20 runs sampled) | | ya-bbcode | 10.70 ops/sec ±1.94% | (31 runs sampled) | | xbbcode/parser | 107 ops/sec ±2.29% | (69 runs sampled) | -| @bbob/parser | 137 ops/sec ±1.11% | (78 runs sampled) | +| @bbob/parser | 140 ops/sec ±1.11% | (78 runs sampled) | Developed with <3 using JetBrains diff --git a/package-lock.json b/package-lock.json index 72bd2a47..9bca20b4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,8 +13,8 @@ "@rollup/plugin-node-resolve": "15.0.1", "@rollup/plugin-replace": "5.0.1", "@size-limit/preset-small-lib": "6.0.1", - "@swc/cli": "0.1.57", - "@swc/core": "1.3.16", + "@swc/cli": "^0.1.57", + "@swc/core": "^1.3.16", "@swc/jest": "0.2.23", "bundlesize2": "0.0.31", "cross-env": "7.0.3", @@ -7326,14 +7326,20 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001283", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001283.tgz", - "integrity": "sha512-9RoKo841j1GQFSJz/nCXOj0sD7tHBtlowjYlrqIUS812x9/emfBLBt6IyMz1zIaYc/eRL8Cs6HPUVi2Hzq4sIg==", + "version": "1.0.30001441", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001441.tgz", + "integrity": "sha512-OyxRR4Vof59I3yGWXws6i908EtGbMzVUi3ganaZQHmydk1iwDhRnvaPG2WaR0KcqrDFKrxVZHULT396LEPhXfg==", "dev": true, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - } + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + } + ] }, "node_modules/chalk": { "version": "4.1.2", @@ -23840,9 +23846,9 @@ } }, "caniuse-lite": { - "version": "1.0.30001283", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001283.tgz", - "integrity": "sha512-9RoKo841j1GQFSJz/nCXOj0sD7tHBtlowjYlrqIUS812x9/emfBLBt6IyMz1zIaYc/eRL8Cs6HPUVi2Hzq4sIg==", + "version": "1.0.30001441", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001441.tgz", + "integrity": "sha512-OyxRR4Vof59I3yGWXws6i908EtGbMzVUi3ganaZQHmydk1iwDhRnvaPG2WaR0KcqrDFKrxVZHULT396LEPhXfg==", "dev": true }, "chalk": { diff --git a/packages/bbob-parser/package-lock.json b/packages/bbob-parser/package-lock.json index 68d258d2..520b8f01 100644 --- a/packages/bbob-parser/package-lock.json +++ b/packages/bbob-parser/package-lock.json @@ -12,19 +12,17 @@ "@bbob/plugin-helper": "^2.8.3" } }, - "../bbob-plugin-helper": { - "name": "@bbob/plugin-helper", - "version": "2.8.2", - "license": "MIT" - }, "node_modules/@bbob/plugin-helper": { - "resolved": "../bbob-plugin-helper", - "link": true + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/@bbob/plugin-helper/-/plugin-helper-2.8.3.tgz", + "integrity": "sha512-i1vVQZ7Ja5x6OLVyAXpwbTX/Id++wVJkve8q+wDhRHylW5/MJQqB6B6pZdGuFbyA5AQvUw2us8bsW0h4iZsDew==" } }, "dependencies": { "@bbob/plugin-helper": { - "version": "file:../bbob-plugin-helper" + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/@bbob/plugin-helper/-/plugin-helper-2.8.3.tgz", + "integrity": "sha512-i1vVQZ7Ja5x6OLVyAXpwbTX/Id++wVJkve8q+wDhRHylW5/MJQqB6B6pZdGuFbyA5AQvUw2us8bsW0h4iZsDew==" } } } diff --git a/packages/bbob-parser/src/Token.js b/packages/bbob-parser/src/Token.js index a47b6c17..c9e2dbee 100644 --- a/packages/bbob-parser/src/Token.js +++ b/packages/bbob-parser/src/Token.js @@ -171,5 +171,6 @@ export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME; export const TYPE_ATTR_VALUE = TOKEN_TYPE_ATTR_VALUE; export const TYPE_SPACE = TOKEN_TYPE_SPACE; export const TYPE_NEW_LINE = TOKEN_TYPE_NEW_LINE; + export { Token }; export default Token; diff --git a/packages/bbob-parser/src/lexer.js b/packages/bbob-parser/src/lexer.js index 142728c4..ef1b6013 100644 --- a/packages/bbob-parser/src/lexer.js +++ b/packages/bbob-parser/src/lexer.js @@ -58,16 +58,17 @@ function createLexer(buffer, options = {}) { let tokenIndex = -1; let stateMode = STATE_WORD; let tagMode = TAG_STATE_NAME; + let contextFreeTag = ''; const tokens = new Array(Math.floor(buffer.length)); const openTag = options.openTag || OPEN_BRAKET; const closeTag = options.closeTag || CLOSE_BRAKET; const escapeTags = !!options.enableEscapeTags; + const contextFreeTags = options.contextFreeTags || []; const onToken = options.onToken || (() => { }); const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM]; const NOT_CHAR_TOKENS = [ - // ...(options.enableEscapeTags ? [BACKSLASH] : []), openTag, SPACE, TAB, N, ]; const WHITESPACES = [SPACE, TAB]; @@ -86,6 +87,16 @@ function createLexer(buffer, options = {}) { const unq = (val) => unquote(trimChar(val, QUOTEMARK)); + const checkContextFreeMode = (name, isClosingTag) => { + if (contextFreeTag !== '' && isClosingTag) { + contextFreeTag = ''; + } + + if (contextFreeTag === '' && contextFreeTags.includes(name)) { + contextFreeTag = name; + } + }; + const chars = createCharGrabber(buffer, { onSkip }); /** @@ -177,6 +188,7 @@ function createLexer(buffer, options = {}) { const name = tagChars.grabWhile(validName); emitToken(TYPE_TAG, name); + checkContextFreeMode(name); tagChars.skip(); @@ -192,41 +204,37 @@ function createLexer(buffer, options = {}) { function stateTag() { const currChar = chars.getCurr(); + const nextChar = chars.getNext(); - if (currChar === openTag) { - const nextChar = chars.getNext(); - - chars.skip(); - - // detect case where we have '[My word [tag][/tag]' or we have '[My last line word' - const substr = chars.substrUntilChar(closeTag); - const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0; + chars.skip(); - if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) { - emitToken(TYPE_WORD, currChar); + // detect case where we have '[My word [tag][/tag]' or we have '[My last line word' + const substr = chars.substrUntilChar(closeTag); + const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0; - return STATE_WORD; - } + if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) { + emitToken(TYPE_WORD, currChar); - // [myTag ] - const isNoAttrsInTag = substr.indexOf(EQ) === -1; - // [/myTag] - const isClosingTag = substr[0] === SLASH; + return STATE_WORD; + } - if (isNoAttrsInTag || isClosingTag) { - const name = chars.grabWhile((char) => char !== closeTag); + // [myTag ] + const isNoAttrsInTag = substr.indexOf(EQ) === -1; + // [/myTag] + const isClosingTag = substr[0] === SLASH; - chars.skip(); // skip closeTag + if (isNoAttrsInTag || isClosingTag) { + const name = chars.grabWhile((char) => char !== closeTag); - emitToken(TYPE_TAG, name); + chars.skip(); // skip closeTag - return STATE_WORD; - } + emitToken(TYPE_TAG, name); + checkContextFreeMode(name, isClosingTag); - return STATE_TAG_ATTRS; + return STATE_WORD; } - return STATE_WORD; + return STATE_TAG_ATTRS; } function stateAttrs() { @@ -259,13 +267,24 @@ function createLexer(buffer, options = {}) { } if (isWhiteSpace(chars.getCurr())) { - emitToken(TYPE_SPACE, chars.grabWhile(isWhiteSpace)); + const word = chars.grabWhile(isWhiteSpace); + + emitToken(TYPE_SPACE, word); return STATE_WORD; } if (chars.getCurr() === openTag) { - if (chars.includes(closeTag)) { + if (contextFreeTag) { + const fullTagLen = openTag.length + SLASH.length + contextFreeTag.length; + const fullTagName = `${openTag}${SLASH}${contextFreeTag}`; + const foundTag = chars.grabN(fullTagLen); + const isEndContextFreeMode = foundTag === fullTagName; + + if (isEndContextFreeMode) { + return STATE_TAG; + } + } else if (chars.includes(closeTag)) { return STATE_TAG; } @@ -298,12 +317,16 @@ function createLexer(buffer, options = {}) { const isChar = (char) => isCharToken(char) && !isEscapeChar(char); - emitToken(TYPE_WORD, chars.grabWhile(isChar)); + const word = chars.grabWhile(isChar); + + emitToken(TYPE_WORD, word); return STATE_WORD; } - emitToken(TYPE_WORD, chars.grabWhile(isCharToken)); + const word = chars.grabWhile(isCharToken); + + emitToken(TYPE_WORD, word); return STATE_WORD; } @@ -320,10 +343,8 @@ function createLexer(buffer, options = {}) { stateMode = stateAttrs(); break; case STATE_WORD: - stateMode = stateWord(); - break; default: - stateMode = STATE_WORD; + stateMode = stateWord(); break; } } diff --git a/packages/bbob-parser/src/parse.js b/packages/bbob-parser/src/parse.js index 016c9d9a..9c84a5c4 100644 --- a/packages/bbob-parser/src/parse.js +++ b/packages/bbob-parser/src/parse.js @@ -10,9 +10,10 @@ import { createList } from './utils'; * @param {Object} opts * @param {Function} opts.createTokenizer * @param {Array} opts.onlyAllowTags + * @param {Array} opts.contextFreeTags + * @param {Boolean} opts.enableEscapeTags * @param {String} opts.openTag * @param {String} opts.closeTag - * @param {Boolean} opts.enableEscapeTags * @return {Array} */ const parse = (input, opts = {}) => { @@ -258,9 +259,10 @@ const parse = (input, opts = {}) => { tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, { onToken, - onlyAllowTags: options.onlyAllowTags, openTag, closeTag, + onlyAllowTags: options.onlyAllowTags, + contextFreeTags: options.contextFreeTags, enableEscapeTags: options.enableEscapeTags, }); diff --git a/packages/bbob-parser/src/utils.js b/packages/bbob-parser/src/utils.js index 70b50103..c3b68f60 100644 --- a/packages/bbob-parser/src/utils.js +++ b/packages/bbob-parser/src/utils.js @@ -13,7 +13,7 @@ function CharGrabber(source, options) { const { pos } = cursor; const idx = source.indexOf(char, pos); - return idx >= 0 ? source.substr(pos, idx - pos) : ''; + return idx >= 0 ? source.substring(pos, idx) : ''; }; const includes = (val) => source.indexOf(val, cursor.pos) >= 0; const hasNext = () => cursor.len > cursor.pos; @@ -25,7 +25,8 @@ function CharGrabber(source, options) { options.onSkip(); } }; - const rest = () => source.substr(cursor.pos); + const rest = () => source.substring(cursor.pos); + const grabN = (num = 0) => source.substring(cursor.pos, cursor.pos + num); const curr = () => source[cursor.pos]; const prev = () => { const prevPos = cursor.pos - 1; @@ -48,7 +49,7 @@ function CharGrabber(source, options) { } } - return source.substr(start, cursor.pos - start); + return source.substring(start, cursor.pos); }; /** * @type {skip} @@ -88,6 +89,11 @@ function CharGrabber(source, options) { * @return {String} */ this.grabWhile = grabWhile; + /** + * @param {Number} num + * @return {String} + */ + this.grabN = grabN; /** * Grabs rest of string until it find a char * @param {String} char diff --git a/packages/bbob-parser/test/index.test.js b/packages/bbob-parser/test/index.test.js new file mode 100644 index 00000000..cbc20189 --- /dev/null +++ b/packages/bbob-parser/test/index.test.js @@ -0,0 +1,9 @@ +import { TagNode } from "../src/index"; + +describe('index', () => { + test('tag with content and params', () => { + const tagNode = TagNode.create('test', {test: 1}, ['Hello']); + + expect(String(tagNode)).toBe('[test test="1"]Hello[/test]'); + }); +}) diff --git a/packages/bbob-parser/test/lexer.test.js b/packages/bbob-parser/test/lexer.test.js index c038f801..21e947f5 100644 --- a/packages/bbob-parser/test/lexer.test.js +++ b/packages/bbob-parser/test/lexer.test.js @@ -14,6 +14,7 @@ const TYPE_NAMES = Object.fromEntries(Object.keys(TYPE).map(key => [TYPE[key], k const tokenize = input => (createLexer(input).tokenize()); const tokenizeEscape = input => (createLexer(input, { enableEscapeTags: true }).tokenize()); +const tokenizeContextFreeTags = (input, tags = []) => (createLexer(input, { contextFreeTags: tags }).tokenize()); describe('lexer', () => { expect.extend({ @@ -463,6 +464,24 @@ describe('lexer', () => { expect(tokens).toBeMantchOutput(output); }); + test('context free tag [code]', () => { + const input = '[code] [b]some string[/b][/code]' + const tokens = tokenizeContextFreeTags(input, ['code']); + const output = [ + [TYPE.TAG, 'code', 0, 0], + [TYPE.SPACE, ' ', 0, 0], + [TYPE.WORD, '[', 0, 0], + [TYPE.WORD, 'b]some', 0, 0], + [TYPE.SPACE, ' ', 0, 0], + [TYPE.WORD, 'string', 0, 0], + [TYPE.WORD, '[', 0, 0], + [TYPE.WORD, '/b]', 0, 0], + [TYPE.TAG, '/code', 0, 0], + ] + + expect(tokens).toBeMantchOutput(output); + }) + test('bad closed tag with escaped backslash', () => { const input = `[b]test[\\b]`; const tokens = tokenizeEscape(input); diff --git a/packages/bbob-parser/test/parse.test.js b/packages/bbob-parser/test/parse.test.js index bc981772..494496dc 100644 --- a/packages/bbob-parser/test/parse.test.js +++ b/packages/bbob-parser/test/parse.test.js @@ -25,6 +25,23 @@ describe('Parser', () => { expectOutput(ast, output); }); + test('parse paired tags tokens 2', () => { + const ast = parse('[bar]Foo Bar[/bar]'); + const output = [ + { + tag: 'bar', + attrs: {}, + content: [ + 'Foo', + ' ', + 'Bar', + ], + }, + ]; + + expectOutput(ast, output); + }); + describe('onlyAllowTags', () => { test('parse only allowed tags', () => { const ast = parse('[h1 name=value]Foo [Bar] [/h1]', { @@ -126,6 +143,31 @@ describe('Parser', () => { }); }) + describe('contextFreeTags', () => { + test('context free tag [code]', () => { + const ast = parse('[code] [b]some string[/b][/code]', { + contextFreeTags: ['code'] + }); + const output = [ + { + tag: 'code', + attrs: {}, + content: [ + ' ', + '[', + 'b]some', + ' ', + 'string', + '[', + '/b]' + ] + } + ] + + expectOutput(ast, output); + }) + }) + test('parse inconsistent tags', () => { const ast = parse('[h1 name=value]Foo [Bar] /h1]'); const output = [ diff --git a/packages/bbob-plugin-helper/src/char.js b/packages/bbob-plugin-helper/src/char.js index a50e4560..ec2e63d3 100644 --- a/packages/bbob-plugin-helper/src/char.js +++ b/packages/bbob-plugin-helper/src/char.js @@ -13,23 +13,16 @@ const CLOSE_BRAKET = ']'; const SLASH = '/'; const BACKSLASH = '\\'; -const PLACEHOLDER_SPACE_TAB = ' '; -const PLACEHOLDER_SPACE = ' '; - -// const getChar = String.fromCharCode; - export { N, F, R, - TAB, EQ, - QUOTEMARK, + TAB, SPACE, - OPEN_BRAKET, - CLOSE_BRAKET, SLASH, - PLACEHOLDER_SPACE_TAB, - PLACEHOLDER_SPACE, BACKSLASH, + QUOTEMARK, + OPEN_BRAKET, + CLOSE_BRAKET, };