Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(parser): context free tag mode #165

Merged
merged 4 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
51 changes: 45 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@ written in pure javascript, no dependencies
</a>
<a href="https://www.codefactor.io/repository/github/jilizart/bbob">
<img src="https://www.codefactor.io/repository/github/jilizart/bbob/badge" alt="CodeFactor">
</a>
<a href="https://bettercodehub.com/">
<img src="https://bettercodehub.com/edge/badge/JiLiZART/bbob?branch=master" alt="BCH compliance">
</a>
</a>
<a href="https://snyk.io/test/github/JiLiZART/bbob?targetFile=package.json">
<img src="https://snyk.io/test/github/JiLiZART/bbob/badge.svg?targetFile=package.json" alt="Known Vulnerabilities">
</a>
Expand Down Expand Up @@ -73,6 +70,7 @@ written in pure javascript, no dependencies
* [Basic usage](#basic-usage)
* [React usage](#react-usage)
* [Vue 2 usage](#vue2-usage)
* [Parse Options](#parse-options)
* [Presets](#presets)
* [Create your own preset](#create-preset)
* [HTML Preset](#html-preset)
Expand Down Expand Up @@ -111,7 +109,7 @@ import {render} from 'react-dom'
import bbobReactRender from '@bbob/react/es/render'
import presetReact from '@bbob/preset-react'

const options = { onlyAllowTags: ['i'], enableEscapeTags: true }
const options = { onlyAllowTags: ['i'], enableEscapeTags: true, contextFreeTags: ['code'] }
const content = bbobReactRender(`[i]Text[/i]`, presetReact(), options)

console.log(render(<span>{content}</span>)); // <span><span style="font-style: italic;">Text</span></span>
Expand Down Expand Up @@ -156,6 +154,47 @@ Vue.use(VueBbob);
```
More examples available in <a href="https://github.com/JiLiZART/BBob/tree/master/examples">examples folder</a>

### Parse options <a name="parse-options"></a>

#### onlyAllowTags

Parse only allowed tags

```js
import bbobHTML from '@bbob/html'
import presetHTML5 from '@bbob/preset-html5'

const processed = bbobHTML(`[i][b]Text[/b][/i]`, presetHTML5(), { onlyAllowTags: ['i'] })

console.log(processed); // <span style="font-style: italic;">[b]Text[/b]</span>
```

#### contextFreeTags

Enable context free mode that ignores parsing all tags inside given tags

```js
import bbobHTML from '@bbob/html'
import presetHTML5 from '@bbob/preset-html5'

const processed = bbobHTML(`[b]Text[/b][code][b]Text[/b][/code]`, presetHTML5(), { contextFreeTags: ['code'] })

console.log(processed); // <span style="font-weight: bold;">Text</span><pre>[b]Text[/b]</pre>
```

#### enableEscapeTags

Enable escape support for tags

```js
import bbobHTML from '@bbob/html'
import presetHTML5 from '@bbob/preset-html5'

const processed = bbobHTML(`[b]Text[/b]'\\[b\\]Text\\[/b\\]'`, presetHTML5(), { enableEscapeTags: true })

console.log(processed); // <span style="font-weight: bold;">Text</span>[b]Text[/b]
```


### Presets <a name="basic"></a>

Expand Down Expand Up @@ -330,7 +369,7 @@ Tested on Node v12.18.3
| regex/parser | 6.02 ops/sec ±2.77% | (20 runs sampled) |
| ya-bbcode | 10.70 ops/sec ±1.94% | (31 runs sampled) |
| xbbcode/parser | 107 ops/sec ±2.29% | (69 runs sampled) |
| @bbob/parser | 137 ops/sec ±1.11% | (78 runs sampled) |
| @bbob/parser | 140 ops/sec ±1.11% | (78 runs sampled) |


Developed with <3 using JetBrains
30 changes: 18 additions & 12 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 6 additions & 8 deletions packages/bbob-parser/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/bbob-parser/src/Token.js
Original file line number Diff line number Diff line change
Expand Up @@ -171,5 +171,6 @@ export const TYPE_ATTR_NAME = TOKEN_TYPE_ATTR_NAME;
export const TYPE_ATTR_VALUE = TOKEN_TYPE_ATTR_VALUE;
export const TYPE_SPACE = TOKEN_TYPE_SPACE;
export const TYPE_NEW_LINE = TOKEN_TYPE_NEW_LINE;

export { Token };
export default Token;
85 changes: 53 additions & 32 deletions packages/bbob-parser/src/lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,17 @@ function createLexer(buffer, options = {}) {
let tokenIndex = -1;
let stateMode = STATE_WORD;
let tagMode = TAG_STATE_NAME;
let contextFreeTag = '';
const tokens = new Array(Math.floor(buffer.length));
const openTag = options.openTag || OPEN_BRAKET;
const closeTag = options.closeTag || CLOSE_BRAKET;
const escapeTags = !!options.enableEscapeTags;
const contextFreeTags = options.contextFreeTags || [];
const onToken = options.onToken || (() => {
});

const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM];
const NOT_CHAR_TOKENS = [
// ...(options.enableEscapeTags ? [BACKSLASH] : []),
openTag, SPACE, TAB, N,
];
const WHITESPACES = [SPACE, TAB];
Expand All @@ -86,6 +87,16 @@ function createLexer(buffer, options = {}) {

const unq = (val) => unquote(trimChar(val, QUOTEMARK));

const checkContextFreeMode = (name, isClosingTag) => {
if (contextFreeTag !== '' && isClosingTag) {
contextFreeTag = '';
}

if (contextFreeTag === '' && contextFreeTags.includes(name)) {
contextFreeTag = name;
}
};

const chars = createCharGrabber(buffer, { onSkip });

/**
Expand Down Expand Up @@ -177,6 +188,7 @@ function createLexer(buffer, options = {}) {
const name = tagChars.grabWhile(validName);

emitToken(TYPE_TAG, name);
checkContextFreeMode(name);

tagChars.skip();

Expand All @@ -192,41 +204,37 @@ function createLexer(buffer, options = {}) {

function stateTag() {
const currChar = chars.getCurr();
const nextChar = chars.getNext();

if (currChar === openTag) {
const nextChar = chars.getNext();

chars.skip();

// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = chars.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
chars.skip();

if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
emitToken(TYPE_WORD, currChar);
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
const substr = chars.substrUntilChar(closeTag);
const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;

return STATE_WORD;
}
if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
emitToken(TYPE_WORD, currChar);

// [myTag ]
const isNoAttrsInTag = substr.indexOf(EQ) === -1;
// [/myTag]
const isClosingTag = substr[0] === SLASH;
return STATE_WORD;
}

if (isNoAttrsInTag || isClosingTag) {
const name = chars.grabWhile((char) => char !== closeTag);
// [myTag ]
const isNoAttrsInTag = substr.indexOf(EQ) === -1;
// [/myTag]
const isClosingTag = substr[0] === SLASH;

chars.skip(); // skip closeTag
if (isNoAttrsInTag || isClosingTag) {
const name = chars.grabWhile((char) => char !== closeTag);

emitToken(TYPE_TAG, name);
chars.skip(); // skip closeTag

return STATE_WORD;
}
emitToken(TYPE_TAG, name);
checkContextFreeMode(name, isClosingTag);

return STATE_TAG_ATTRS;
return STATE_WORD;
}

return STATE_WORD;
return STATE_TAG_ATTRS;
}

function stateAttrs() {
Expand Down Expand Up @@ -259,13 +267,24 @@ function createLexer(buffer, options = {}) {
}

if (isWhiteSpace(chars.getCurr())) {
emitToken(TYPE_SPACE, chars.grabWhile(isWhiteSpace));
const word = chars.grabWhile(isWhiteSpace);

emitToken(TYPE_SPACE, word);

return STATE_WORD;
}

if (chars.getCurr() === openTag) {
if (chars.includes(closeTag)) {
if (contextFreeTag) {
const fullTagLen = openTag.length + SLASH.length + contextFreeTag.length;
const fullTagName = `${openTag}${SLASH}${contextFreeTag}`;
const foundTag = chars.grabN(fullTagLen);
const isEndContextFreeMode = foundTag === fullTagName;

if (isEndContextFreeMode) {
return STATE_TAG;
}
} else if (chars.includes(closeTag)) {
return STATE_TAG;
}

Expand Down Expand Up @@ -298,12 +317,16 @@ function createLexer(buffer, options = {}) {

const isChar = (char) => isCharToken(char) && !isEscapeChar(char);

emitToken(TYPE_WORD, chars.grabWhile(isChar));
const word = chars.grabWhile(isChar);

emitToken(TYPE_WORD, word);

return STATE_WORD;
}

emitToken(TYPE_WORD, chars.grabWhile(isCharToken));
const word = chars.grabWhile(isCharToken);

emitToken(TYPE_WORD, word);

return STATE_WORD;
}
Expand All @@ -320,10 +343,8 @@ function createLexer(buffer, options = {}) {
stateMode = stateAttrs();
break;
case STATE_WORD:
stateMode = stateWord();
break;
default:
stateMode = STATE_WORD;
stateMode = stateWord();
break;
}
}
Expand Down
6 changes: 4 additions & 2 deletions packages/bbob-parser/src/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ import { createList } from './utils';
* @param {Object} opts
* @param {Function} opts.createTokenizer
* @param {Array<string>} opts.onlyAllowTags
* @param {Array<string>} opts.contextFreeTags
* @param {Boolean} opts.enableEscapeTags
* @param {String} opts.openTag
* @param {String} opts.closeTag
* @param {Boolean} opts.enableEscapeTags
* @return {Array}
*/
const parse = (input, opts = {}) => {
Expand Down Expand Up @@ -258,9 +259,10 @@ const parse = (input, opts = {}) => {

tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, {
onToken,
onlyAllowTags: options.onlyAllowTags,
openTag,
closeTag,
onlyAllowTags: options.onlyAllowTags,
contextFreeTags: options.contextFreeTags,
enableEscapeTags: options.enableEscapeTags,
});

Expand Down
12 changes: 9 additions & 3 deletions packages/bbob-parser/src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ function CharGrabber(source, options) {
const { pos } = cursor;
const idx = source.indexOf(char, pos);

return idx >= 0 ? source.substr(pos, idx - pos) : '';
return idx >= 0 ? source.substring(pos, idx) : '';
};
const includes = (val) => source.indexOf(val, cursor.pos) >= 0;
const hasNext = () => cursor.len > cursor.pos;
Expand All @@ -25,7 +25,8 @@ function CharGrabber(source, options) {
options.onSkip();
}
};
const rest = () => source.substr(cursor.pos);
const rest = () => source.substring(cursor.pos);
const grabN = (num = 0) => source.substring(cursor.pos, cursor.pos + num);
const curr = () => source[cursor.pos];
const prev = () => {
const prevPos = cursor.pos - 1;
Expand All @@ -48,7 +49,7 @@ function CharGrabber(source, options) {
}
}

return source.substr(start, cursor.pos - start);
return source.substring(start, cursor.pos);
};
/**
* @type {skip}
Expand Down Expand Up @@ -88,6 +89,11 @@ function CharGrabber(source, options) {
* @return {String}
*/
this.grabWhile = grabWhile;
/**
* @param {Number} num
* @return {String}
*/
this.grabN = grabN;
/**
* Grabs rest of string until it find a char
* @param {String} char
Expand Down