Skip to content

Commit

Permalink
Remove xmlMode reliance
Browse files Browse the repository at this point in the history
Markbind relies on xmlMode parsing of htmlparser2 to parse 'custom self
closing tags' such as '<panel />', and avoid the auto lower case
conversion of attribute names.

This introduces very tight coupling between the order and variations of
this option across html rendering and parsing calls.

In addition, xmlMode is not compliant with various html specs, such as
opening tags being able to act as closing tags in certain cases, or
more common cases like erroneously expanding <br> into <br>...</br>.

Let's use htmlparser2's recognizeSelfClosing tag and
lowerCaseAttributeName options to do the same instead.
  • Loading branch information
ang-zeyu committed Jun 12, 2020
1 parent ada4d76 commit 63d9f82
Show file tree
Hide file tree
Showing 19 changed files with 53 additions and 41 deletions.
15 changes: 7 additions & 8 deletions src/Page.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const cheerio = require('cheerio');
const cheerio = require('cheerio'); require('./lib/markbind/src/patches/htmlparser2');
const fm = require('fastmatter');
const fs = require('fs-extra-promise');
const htmlBeautify = require('js-beautify').html;
Expand Down Expand Up @@ -53,7 +53,6 @@ const {
TEMP_DROPDOWN_PLACEHOLDER_CLASS,
} = require('./constants');

cheerio.prototype.options.xmlMode = true; // Enable xml mode for self-closing tag
cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities

class Page {
Expand Down Expand Up @@ -703,7 +702,7 @@ class Page {
const siteNavHtml = md.render(navigationElements.length === 0
? siteNavMappedData.replace(SITE_NAV_EMPTY_LINE_REGEX, '\n')
: navigationElements.html().replace(SITE_NAV_EMPTY_LINE_REGEX, '\n'));
const $nav = cheerio.load(siteNavHtml, { xmlMode: false });
const $nav = cheerio.load(siteNavHtml);

// Add anchor classes and highlight current page's anchor, if any.
const currentPageHtmlPath = this.src.replace(/\.(md|mbd)$/, '.html');
Expand Down Expand Up @@ -832,7 +831,7 @@ class Page {
*/
buildPageNav() {
if (this.isPageNavigationSpecifierValid()) {
const $ = cheerio.load(this.content, { xmlMode: false });
const $ = cheerio.load(this.content);
this.navigableHeadings = {};
this.collectNavigableHeadings($(`#${CONTENT_WRAPPER_ID}`).html());
const pageNavTitleHtml = this.generatePageNavTitleHtml();
Expand Down Expand Up @@ -877,7 +876,7 @@ class Page {
const headFileMappedData = this.variablePreprocessor.renderSiteVariables(this.sourcePath,
headFileContent).trim();
// Split top and bottom contents
const $ = cheerio.load(headFileMappedData, { xmlMode: false });
const $ = cheerio.load(headFileMappedData);
if ($('head-top').length) {
collectedTopContent.push($('head-top').html().trim().replace(/\n\s*\n/g, '\n')
.replace(/\n/g, '\n '));
Expand Down Expand Up @@ -906,7 +905,7 @@ class Page {
}

collectPageSection(section) {
const $ = cheerio.load(this.content, { xmlMode: false });
const $ = cheerio.load(this.content);
const pageSection = $(section);
if (pageSection.length === 0) {
return;
Expand Down Expand Up @@ -1075,7 +1074,7 @@ class Page {
}

if (domTagSourcesMap) {
const $ = cheerio.load(content, { xmlMode: true });
const $ = cheerio.load(content);

domTagSourcesMap.forEach(([tagName, attrName]) => {
if (!_.isString(tagName) || !_.isString(attrName)) {
Expand Down Expand Up @@ -1154,7 +1153,7 @@ class Page {
* @return String html of the element, with the attribute's asset resolved
*/
getResolvedAssetElement(assetElementHtml, tagName, attrName, plugin, pluginName) {
const $ = cheerio.load(assetElementHtml, { xmlMode: false });
const $ = cheerio.load(assetElementHtml);
const el = $(`${tagName}[${attrName}]`);

el.attr(attrName, (i, assetPath) => {
Expand Down
2 changes: 1 addition & 1 deletion src/Site.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const cheerio = require('cheerio');
const cheerio = require('cheerio'); require('./lib/markbind/src/patches/htmlparser2');
const fs = require('fs-extra-promise');
const ghpages = require('gh-pages');
const ignore = require('ignore');
Expand Down
15 changes: 4 additions & 11 deletions src/lib/markbind/src/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ _.isEmpty = require('lodash/isEmpty');
const md = require('./lib/markdown-it');
const utils = require('./utils');

cheerio.prototype.options.xmlMode = true; // Enable xml mode for self-closing tag
cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities

class Parser {
Expand All @@ -41,7 +40,7 @@ class Parser {
}

static processDynamicResources(context, html, config) {
const $ = cheerio.load(html, { xmlMode: false });
const $ = cheerio.load(html);

function getAbsoluteResourcePath(elem, relativeResourcePath) {
const firstParent = elem.closest('div[data-included-from], span[data-included-from]');
Expand Down Expand Up @@ -83,7 +82,7 @@ class Parser {
}

static unwrapIncludeSrc(html) {
const $ = cheerio.load(html, { xmlMode: false });
const $ = cheerio.load(html);
$('div[data-included-from], span[data-included-from]').each(function () {
$(this).replaceWith($(this).contents());
});
Expand Down Expand Up @@ -124,15 +123,11 @@ class Parser {
switch (node.name) {
case 'md':
node.name = 'span';
cheerio.prototype.options.xmlMode = false;
node.children = cheerio.parseHTML(md.renderInline(cheerio.html(node.children)), true);
cheerio.prototype.options.xmlMode = true;
break;
case 'markdown':
node.name = 'div';
cheerio.prototype.options.xmlMode = false;
node.children = cheerio.parseHTML(md.render(cheerio.html(node.children)), true);
cheerio.prototype.options.xmlMode = true;
break;
default:
break;
Expand Down Expand Up @@ -199,7 +194,7 @@ class Parser {
});
resolve(cheerio.html(nodes));
});
const parser = new htmlparser.Parser(handler, { xmlMode: true });
const parser = new htmlparser.Parser(handler);

const renderedContent = this.variablePreprocessor.renderPage(file, content, additionalVariables);

Expand Down Expand Up @@ -238,11 +233,9 @@ class Parser {
nodes.forEach((d) => {
this._trimNodes(d);
});
cheerio.prototype.options.xmlMode = false;
resolve(cheerio.html(nodes));
cheerio.prototype.options.xmlMode = true;
});
const parser = new htmlparser.Parser(handler, { xmlMode: true });
const parser = new htmlparser.Parser(handler);
const fileExt = utils.getExt(filePath);
if (utils.isMarkdownFileExt(fileExt)) {
const renderedContent = md.render(content);
Expand Down
1 change: 0 additions & 1 deletion src/lib/markbind/src/parsers/componentParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ const {
ATTRIB_CWF,
} = require('../constants');

cheerio.prototype.options.xmlMode = true; // Enable xml mode for self-closing tag
cheerio.prototype.options.decodeEntities = false; // Don't escape HTML entities

/*
Expand Down
21 changes: 20 additions & 1 deletion src/lib/markbind/src/patches/htmlparser2.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,23 @@
const { Tokenizer } = require('htmlparser2');
/*
* Four behaviours of htmlparser2 are patched here, the first 2 being 'convenience' patches
* to avoid repeated passing of lowerCaseAttributeNames and recognizeSelfClosing options.
* 1. Defaulting to self closing tag recognition
* 2. Disabling automatic attribute name conversion to lower case
* 3. Recognising text in markdown inline code / code blocks as raw text
* 4. Ability to inject/whitelist certain tags to be parsed like script/style tags do. (special tags)
*/

const { Tokenizer, Parser } = require('htmlparser2');

// Self closing tag recognition patch
Parser.prototype.onselfclosingtag = function () {
this._closeCurrentTag();
};

// Disable automatic lower case conversion
Parser.prototype.onattribname = function (name) {
this._attribname = name;
};

const MARKDOWN = Symbol('MARKDOWN');

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/algolia.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function buildAlgoliaInitScript(pluginContext) {
}

function addNoIndexClasses(content) {
const $ = cheerio.load(content, { xmlMode: false });
const $ = cheerio.load(content);
const noIndexSelectors = [
'dropdown',
'b-modal',
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/codeBlockCopyButtons.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ const copyCodeBlockScript = `<script>
module.exports = {
getScripts: () => [copyCodeBlockScript],
postRender: (content) => {
const $ = cheerio.load(content, { xmlMode: false });
const $ = cheerio.load(content);
const codeBlockSelector = 'pre';
const buttonHTML = getButtonHTML();
$(codeBlockSelector).each((i, codeBlock) => {
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/default/markbind-plugin-anchors.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const CSS_FILE_NAME = 'markbind-plugin-anchors.css';
module.exports = {
getLinks: (content, pluginContext, frontMatter, utils) => [utils.buildStylesheet(CSS_FILE_NAME)],
postRender: (content) => {
const $ = cheerio.load(content, { xmlMode: false });
const $ = cheerio.load(content);
$(':header').each((i, heading) => {
if ($(heading).attr('id')) {
$(heading).append(
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/default/markbind-plugin-footnotes-popovers.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const { parseComponents } = require('../../lib/markbind/src/parsers/componentPar

module.exports = {
postRender: (content) => {
const $ = cheerio.load(content, { xmlMode: false });
const $ = cheerio.load(content);
let popoversHtml = '';
$('li.footnote-item').each((index, li) => {
const id = `pop:footnote${index + 1}`;
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/default/markbind-plugin-plantuml.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ module.exports = {
// Clear <puml> tags processed before for live reload
processedDiagrams.clear();
// Processes all <puml> tags
const $ = cheerio.load(content, { xmlMode: true });
const $ = cheerio.load(content);
$('puml').each((i, tag) => {
tag.name = 'pic';
const { cwf } = tag.attribs;
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/default/markbind-plugin-shorthandSyntax.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ function convertPanelHeadings($) {
*/
module.exports = {
postRender: (content) => {
const $ = cheerio.load(content, { xmlMode: false });
const $ = cheerio.load(content);
convertPanelHeadings($);
return $.html();
},
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/filterTags.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ function filterTags(tags, content) {
if (!tags) {
return content;
}
const $ = cheerio.load(content, { xmlMode: false });
const $ = cheerio.load(content);
const tagOperations = tags.map(tag => ({
// Trim leading + or -, replace * with .*, add ^ and $
tagExp: `^${escapeRegExp(tag.replace(/^(\+|-)/g, '')).replace(/\\\*/, '.*')}$`,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ Also, the boilerplate file name (e.g. `inside.md`) and the file that it is suppo

This file should behaves as if it is in the `requirements` folder:

<panel header="Tested with the folllowing include" src="NonFunctionalRequirements.md" >
<panel header="Tested with the folllowing include" src="NonFunctionalRequirements.md" />
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module.exports = {
preRender: (content, pluginContext) =>
content.replace('Markbind Plugin Pre-render Placeholder', `${pluginContext.pre}`),
postRender: (content, pluginContext) => {
const $ = cheerio.load(content, { xmlMode: false });
const $ = cheerio.load(content);
$('#test-markbind-plugin').append(`${pluginContext.post}`);
return $.html();
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module.exports = {
preRender: (content, pluginContext) =>
content.replace('Markbind Plugin Pre-render Placeholder', `${pluginContext.pre}`),
postRender: (content, pluginContext) => {
const $ = cheerio.load(content, { xmlMode: false });
const $ = cheerio.load(content);
$('#test-markbind-plugin').append(`${pluginContext.post}`);
return $.html();
},
Expand Down
12 changes: 7 additions & 5 deletions test/functional/test_site/expected/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ <h1 id="panel-without-heading-with-keyword">Panel without heading with keyword<a
<h1 id="keyword-should-be-tagged-to-this-heading-not-the-panel-heading">Keyword should be tagged to this heading, not the panel heading<a class="fa fa-anchor" href="#keyword-should-be-tagged-to-this-heading-not-the-panel-heading" onclick="event.stopPropagation()"></a></h1>
<p><span class="keyword">panel keyword</span></p>
</panel>
<p></p>
<p><strong>Unexpanded panel with heading with keyword</strong>
<panel id="panel-with-heading-with-keyword"><template slot="_header">
<h1 id="panel-with-heading-with-keyword">Panel with heading with keyword<a class="fa fa-anchor" href="#panel-with-heading-with-keyword" onclick="event.stopPropagation()"></a></h1>
Expand Down Expand Up @@ -356,11 +357,11 @@ <h1 id="path-within-the-boilerplate-folder-is-separately-specified">Path within
<p>Like static include, pages within the site should be able to use files located in folders within boilerplate.</p>
<p>Also, the boilerplate file name (e.g. <code v-pre="">inside.md</code>) and the file that it is supposed to act as (<code v-pre="">notInside.md</code>) can be different.</p>
<p>This file should behaves as if it is in the <code v-pre="">requirements</code> folder:</p>
<panel src="/test_site/requirements/NonFunctionalRequirements._include_.html"><template slot="_header">
<p>Tested with the folllowing include</p>
</template>
<p></p>
</panel>
<p>
<panel src="/test_site/requirements/NonFunctionalRequirements._include_.html"><template slot="_header">
<p>Tested with the folllowing include</p>
</template></panel>
</p>
</div>
<p><strong>Nested include</strong></p>
<div>
Expand Down Expand Up @@ -714,6 +715,7 @@ <h6 class="always-index" id="level-6-header-outside-headingsearchindex-with-alwa




<nav id="page-nav" class="navbar navbar-light bg-transparent">
<div class="border-left-grey nav-inner position-sticky slim-scroll">
<a class="navbar-brand page-nav-title" href="#">Testing Page Navigation</a>
Expand Down
2 changes: 2 additions & 0 deletions test/functional/test_site_special_tags/expected/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ <h2 id="so-far-as-to-comply-with-the-commonmark-spec">So far as to comply with t
*/
</p>
</abc>
<p></p>
<hr>
<p>There are two self closing special tags below, which should display nothing, but are present in the output.
There is then one special tag with both and opening and closing tag with some text in it (<code v-pre="">lorem ipsum...</code>).
Expand All @@ -97,6 +98,7 @@ <h2 id="so-far-as-to-comply-with-the-commonmark-spec">So far as to comply with t
alert(x);
}</p>
<p>success!</testtag>
<p></p>
</div>
<hr>
</div>
Expand Down
2 changes: 1 addition & 1 deletion test/unit/parser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ test('includeFile replaces <include src="doesNotExist.md" optional> with empty <

const expected = [
'# Index',
'<div/>',
'<div></div>',
'',
].join('\n');

Expand Down
4 changes: 1 addition & 3 deletions test/unit/parsers/componentParser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ const parseAndVerifyTemplate = (template, expectedTemplate, postParse = false) =
expect(result).toEqual(expectedTemplate);
});

const htmlParser = new htmlparser.Parser(handler, {
xmlMode: true,
});
const htmlParser = new htmlparser.Parser(handler);

htmlParser.parseComplete(template);
};
Expand Down

0 comments on commit 63d9f82

Please sign in to comment.