Permalink
Browse files

[feature] add support for HTML comment and CDATA

  • Loading branch information...
AndreasMadsen committed Mar 22, 2013
1 parent 7641434 commit b4a936e3bc177bb0b5188d48ef68923ed6fca6e0
Showing with 171 additions and 1 deletion.
  1. +30 −1 lib/parse.js
  2. +141 −0 test/simple/document-skiptag-parser.js
View
@@ -52,6 +52,18 @@ function parse(content) {
tag = nextTag(content, pos);
if (tag === null) break;
+ // skip <![CDATA[<greeting>Hello, world!</greeting>]]> and <!-- Hallo, world -->
+ // First do a fast tjeck for ! followed by <, if thats the case then this
+ // is likely something there should be handled and we can spend some resorces
+ // on that.
+ if (content[tag.start + 1] === '!') {
+
+ // Update position and search for next tag if the comment was recognized
+ // as a comment
+ pos = skipTag(content, tag.start);
+ if (pos !== null) continue;
+ }
+
// move position to the end of the lastest found tag
pos = tag.end;
@@ -99,6 +111,22 @@ function endTag(elem, tag) {
elem.pos.afterend = tag.end - tag.start;
}
+// Find the end of a tag there should be skiped, can be CDATA or HTML comment
+function skipTag(content, position) {
+ // Most likely case is a HTML comment
+ if (content.slice(position, position + 4) === '<!--') {
+ return content.indexOf('-->', position);
+ }
+ // Second most likely case is XML CDATA
+ else if (content.slice(position, position + 9) === '<![CDATA[') {
+ return content.indexOf(']]>', position);
+ }
+ // Very unlikely case is that it make sense somehow
+ else {
+ return null;
+ }
+}
+
function createTag(content, tag, parent) {
// this should have the same order as in copy.js
var elem = {
@@ -302,4 +330,5 @@ function createTag(content, tag, parent) {
}
return elem;
-}
+
+}
@@ -0,0 +1,141 @@
+/**
+ * Copyright (c) 2012 Andreas Madsen
+ * MIT License
+ */
+
+var chai = require('chai');
+var common = require('../common.js');
+var domstream = common.domstream;
+
+describe('testing pretag parser', function () {
+ var assert = chai.assert;
+
+ describe('when parsing HTML comments', function () {
+ var content = '<doc><!-- <b></b> --></doc>';
+ var doc = domstream(content);
+
+ it('the content should be parsed as expected', function () {
+ assert.equal(content.slice(
+ doc.tree.childrens[0].pos.afterbegin + 1,
+ doc.tree.childrens[0].pos.beforeend
+ ), '<!-- <b></b> -->');
+
+ common.matchTree(doc.tree, {
+ "isRoot": true,
+ "pos": {
+ "beforebegin": 0,
+ "afterbegin": 0,
+ "beforeend": 26,
+ "afterend": 0
+ },
+ "childrens": [
+ {
+ "pos": {
+ "beforebegin": 0,
+ "afterbegin": 4,
+ "beforeend": 21,
+ "afterend": 5
+ },
+ "modify": false,
+ "singleton": false,
+ "tagname": "doc",
+ "keys": [],
+ "attr": {},
+ "childrens": []
+ }
+ ]
+ });
+ });
+ });
+
+ describe('when parsing XML CDATA', function () {
+ var content = '<doc><![CDATA[ <b></b> ]]></doc>';
+ var doc = domstream(content);
+
+ it('the content should be parsed as expected', function () {
+ assert.equal(content.slice(
+ doc.tree.childrens[0].pos.afterbegin + 1,
+ doc.tree.childrens[0].pos.beforeend
+ ), '<![CDATA[ <b></b> ]]>');
+
+ common.matchTree(doc.tree, {
+ "isRoot": true,
+ "pos": {
+ "beforebegin": 0,
+ "afterbegin": 0,
+ "beforeend": 31,
+ "afterend": 0
+ },
+ "childrens": [
+ {
+ "pos": {
+ "beforebegin": 0,
+ "afterbegin": 4,
+ "beforeend": 26,
+ "afterend": 5
+ },
+ "modify": false,
+ "singleton": false,
+ "tagname": "doc",
+ "keys": [],
+ "attr": {},
+ "childrens": []
+ }
+ ]
+ });
+ });
+ });
+
+ describe('when parsing something wired', function () {
+ var content = '<doc><!wired> </!wired></doc>';
+ var doc = domstream(content);
+
+ it('the content should be parsed as expected', function () {
+ assert.equal(content.slice(
+ doc.tree.childrens[0].pos.afterbegin + 1,
+ doc.tree.childrens[0].pos.beforeend
+ ), '<!wired> </!wired>');
+
+ common.matchTree(doc.tree, {
+ "isRoot": true,
+ "pos": {
+ "beforebegin": 0,
+ "afterbegin": 0,
+ "beforeend": 28,
+ "afterend": 0
+ },
+ "childrens": [
+ {
+ "pos": {
+ "beforebegin": 0,
+ "afterbegin": 4,
+ "beforeend": 23,
+ "afterend": 5
+ },
+ "modify": false,
+ "singleton": false,
+ "tagname": "doc",
+ "keys": [],
+ "attr": {},
+ "childrens": [
+ {
+ "pos": {
+ "beforebegin": 5,
+ "afterbegin": 7,
+ "beforeend": 14,
+ "afterend": 8
+ },
+ "modify": false,
+ "singleton": false,
+ "tagname": "!wired",
+ "keys": [],
+ "attr": {},
+ "childrens": []
+ }
+ ]
+ }
+ ]
+ });
+ });
+ });
+});

0 comments on commit b4a936e

Please sign in to comment.