Skip to content

Commit

Permalink
couple more tests for the html5 parser to demonstrate to can parse ug…
Browse files Browse the repository at this point in the history
…ly and really ugly html
  • Loading branch information
thatcher committed Nov 15, 2009
1 parent 4eabf20 commit d1b4b77
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 70 deletions.
3 changes: 1 addition & 2 deletions dist/env.js
Expand Up @@ -2013,12 +2013,11 @@ __extend__(DOMElement.prototype, {

// serialize Attribute declarations
var attrs = this.attributes.xml;
if (attrs.length > 0) attrs = ""+ attrs;

// serialize this Element
ret += "<" + this.nodeName.toLowerCase() + ns + attrs +">";
ret += this.childNodes.xml;
ret += "</" + this.nodeName.toLowerCase()+">";
ret += "</" + this.nodeName.toLowerCase() + ">";

return ret;
},
Expand Down
3 changes: 1 addition & 2 deletions dist/env.rhino.js
Expand Up @@ -2643,12 +2643,11 @@ __extend__(DOMElement.prototype, {

// serialize Attribute declarations
var attrs = this.attributes.xml;
if (attrs.length > 0) attrs = ""+ attrs;

// serialize this Element
ret += "<" + this.nodeName.toLowerCase() + ns + attrs +">";
ret += this.childNodes.xml;
ret += "</" + this.nodeName.toLowerCase()+">";
ret += "</" + this.nodeName.toLowerCase() + ">";

return ret;
},
Expand Down
3 changes: 1 addition & 2 deletions src/dom/element.js
Expand Up @@ -190,12 +190,11 @@ __extend__(DOMElement.prototype, {

// serialize Attribute declarations
var attrs = this.attributes.xml;
if (attrs.length > 0) attrs = ""+ attrs;

// serialize this Element
ret += "<" + this.nodeName.toLowerCase() + ns + attrs +">";
ret += this.childNodes.xml;
ret += "</" + this.nodeName.toLowerCase()+">";
ret += "</" + this.nodeName.toLowerCase() + ">";

return ret;
},
Expand Down
154 changes: 90 additions & 64 deletions test/unit/parser.js
@@ -1,86 +1,112 @@
// environment mocking for parser
$w = { }
$env = { debug: function() {} }
$openingWindow = $parentWindow = $initTop = null;
module("Html5Parser");

load("src/window/window.js", "src/dom/parser.js", "src/dom/entities.js");
var should = function(msg, options){
try{
if(options.be&&options.be=='equal'){
equals(
options.expected,
options.actual,
msg
);
}else if(options.be&&options.be==='safe'){
options.test();
ok(true, msg);
}else{
ok(false, 'unknown test '+options.be);
}
}catch(e){
//no nothing
equals(true, false, options.msg||'This test failed.');
}finally{
//TODO: might as well keep score here
return this;
}
};

module("parser");

test("HTML Standard Entities: Spot Check", function() {

expect(3);

test("XML Standard Entities: Spot Check", function() {

expect(2);
var htmlstr =
"<div id='xmlentity'>&lt;Hello&gt;, &quot;W&apos;rld&quot;!</div>",
"<div id='xmlentity' \
style='&lt;Hello&gt;, &quot;W&apos;rld&quot;!'\
>&lt;Hello&gt;, &quot;W&apos;rld&quot;!</div>",
domParser = new DOMParser(),
doc = domParser.parseFromString(htmlstr),
actual,
expected;
doc = domParser.parseFromString(htmlstr);

actual = doc.
getElementById('xmlentity').
childNodes[0].
nodeValue;
expected = '<Hello>, "W\'rld"!';
equals(
actual,
expected,
"parser replaces entities"
);

actual = doc.
getElementById('xmlentity').
innerHTML;
expected = '&lt;Hello&gt;, "W\'rld"!';
equals(
actual,
expected,
"innerHTML serializes back only &amp;, &lt; and &gt; for TextNode"
);
should("Replace entities at nodeValue",{
be:'equal',
actual : doc.
getElementById('xmlentity').
childNodes[0].
nodeValue,
expected : '<Hello>, "W\'rld"!'
}).
should("serialize only &amp;, &lt; and &gt; for TextNode with innerHTML",{
be: 'equal',
actual : doc.
getElementById('xmlentity').
innerHTML,
expected : '&lt;Hello&gt;, "W\'rld"!'
});

});

test("HTML Standard Entities: Spot Check", function() {

htmlstr = "<div id='htmlentity'>&quot; &amp; &lt; &gt; "+
expect(1);
var htmlstr = "<div id='htmlentity'>&quot; &amp; &lt; &gt; "+
"&nbsp; &copy; &reg; &yen; &para; " +
"&Ecirc; &Otilde; &aelig; &divide; &Kappa; &theta; "+
"&bull; &hellip; &trade; &rArr; &sum; &clubs; " +
"&ensp; &mdash;</body></html>";
expected = '" &amp; &lt; &gt; '+
'\xA0 \xA9 \xAE \xA5 \xB6 '+
'\xCA \xD5 \xE6 \xF7 \u039A \u03B8 '+
'\u2022 \u2026 \u2122 \u21D2 \u2211 \u2663 '+
'\u2002 \u2014';

domParser = new DOMParser();
doc = domParser.parseFromString(htmlstr);
actual = doc.
getElementById('htmlentity').
innerHTML;
"&ensp; &mdash;</div>",
domParser = new DOMParser(),
doc = domParser.parseFromString(htmlstr);

equals(
actual,
expected,
"html entities are not serialized back with innerHTML"
);
should("serialize only &amp;, &lt; and &gt; for TextNode with innerHTML",{
be:'equal',
actual:doc.
getElementById('htmlentity').
innerHTML,
expected : '" &amp; &lt; &gt; '+
'\xA0 \xA9 \xAE \xA5 \xB6 '+
'\xCA \xD5 \xE6 \xF7 \u039A \u03B8 '+
'\u2022 \u2026 \u2122 \u21D2 \u2211 \u2663 '+
'\u2002 \u2014'
});

});

test("HTML Serialization Convention", function(){
test("Serialization Conventions", function(){

});

test("Ugly HTML Parsing", function() {

test("Ugly HTML", function() {
expect(1);
//setup
var domParser = new DOMParser(),
html = '<div id="pig"><p>this is a pig... &apos;oink! oink!&apos;</div>',
doc = domParser.parseFromString(html),
expected = '<div id="pig"><p>this is a pig... \'oink! oink!\'</p></div>',
actual = doc.getElementById('pig').xml;

equals(
actual,
expected,
'got expected well formed html'
);
doc = domParser.parseFromString(html);

should('correct the unclosed p tag',{
be:'equal',
actual:doc.
getElementById('pig').
xml,
expected:'<div id="pig"><p>this is a pig... \'oink! oink!\'</p></div>'
});

});

test("Really Ugly HTML Parsing", function() {

expect(1);

should('parse the document without error',{
be:'safe',
test:function(){
window.open('html/malformed.html');
}
});

});

0 comments on commit d1b4b77

Please sign in to comment.