Skip to content

Commit

Permalink
More forgiving handling of unquoted attributes in html elements.
Browse files Browse the repository at this point in the history
  • Loading branch information
BartJongejan committed Sep 24, 2014
1 parent f6b22c2 commit b520958
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 34 deletions.
4 changes: 4 additions & 0 deletions Changelog
@@ -1,3 +1,7 @@
24 September
Made handling of unquoted attributes in html elements more forgiving. Added
form feed to list of white space characters in html/xml parsing.

23 September
xml.c: Fixed bugs in handling close tags inside script (or style) cdata.
Removed allowance for white space between < / and element name.
Expand Down
10 changes: 7 additions & 3 deletions src/bracmat.c
Expand Up @@ -65,10 +65,14 @@ Test coverage:

*/

#define DATUM "23 September 2014"
#define DATUM "24 September 2014"
#define VERSION "6"
#define BUILD "189"
/* 23 September
#define BUILD "190"
/* 24 September
Made handling of unquoted attributes in html elements more forgiving. Added
form feed to list of white space characters in html/xml parsing.

23 September
xml.c: Fixed bugs in handling close tags inside script (or style) cdata.
Removed allowance for white space between < / and element name.
(Between / and > white space is still allowed. This is not comme il faut.)
Expand Down
90 changes: 59 additions & 31 deletions src/xml.c
Expand Up @@ -713,9 +713,10 @@ static estate lt(int kar)
/*
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
*/
return tag;
case 's':
Expand Down Expand Up @@ -751,6 +752,7 @@ static estate lt_cdata(int kar)
return tag;
/*
case ' ':
case '\f':
case '\n':
case '\r':
case '\t':
Expand Down Expand Up @@ -802,18 +804,15 @@ static estate scriptOrStyleEndElementL(int kar) /* <sc or <SC or <Sc or <sC or <
isMarkup = 1;
putOperatorChar(' ');
putOperatorChar('(');
/*cbStartMarkUp();*/
putOperatorChar('.');
cbEndElementName();
/*nxput(StaRt,endElementName ? endElementName : ch);
endElementName = NULL;
putOperatorChar('.');*/
cbEndElementName();
putOperatorChar(')');
def = def_pcdata;
tagState = def;
StaRt = ch+1;
return endoftag;
case ' ':
case '\f':
case '\n':
case '\r':
case '\t':
Expand All @@ -834,6 +833,7 @@ static estate lts_cdata(int kar)
switch(kar)
{
case ' ':
case '\f':
case '\n':
case '\r':
case '\t':
Expand Down Expand Up @@ -890,9 +890,10 @@ static estate element(int kar)
return tag;
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
cbEndElementName();
tagState = atts;
return tag;
Expand Down Expand Up @@ -941,9 +942,10 @@ static estate elementonly(int kar)
return tag;
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
if(endElementName == NULL)
endElementName = ch;
tagState = gt;
Expand Down Expand Up @@ -978,9 +980,10 @@ static estate gt(int kar)
return endoftag;
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
return tag;
default:
tagState = def;
Expand All @@ -1002,9 +1005,10 @@ static estate emptytag(int kar)
/*
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
return tag;*/
default:
tagState = def;
Expand All @@ -1027,9 +1031,10 @@ static estate atts(int kar)
return endoftag;
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
return tag;
case '/':
putOperatorChar(',');
Expand Down Expand Up @@ -1076,9 +1081,10 @@ static estate name(int kar)
return tag;
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
cbEndAttributeName();
tagState = atts_or_value;
return tag;
Expand Down Expand Up @@ -1115,9 +1121,10 @@ static estate value(int kar)
return notag;
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
return tag;
case '\'':
tagState = singlequotes;
Expand All @@ -1126,17 +1133,17 @@ static estate value(int kar)
tagState = doublequotes;
return tag;
default:
if(('0' <= kar && kar <= '9') || ('A' <= kar && kar <= 'Z') || ('a' <= kar && kar <= 'z') || (kar & 0x80))
{
/* if(('0' <= kar && kar <= '9') || ('A' <= kar && kar <= 'Z') || ('a' <= kar && kar <= 'z') || (kar & 0x80))
{*/
StaRt = ch;
tagState = invalue;
return tag;
}
/* }
else
{
tagState = def;
return notag;
}
}*/
}
}

Expand All @@ -1163,9 +1170,10 @@ static estate atts_or_value(int kar)
return notag;
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
return tag;
case '/':
cbEndAttribute();
Expand Down Expand Up @@ -1193,6 +1201,9 @@ static estate atts_or_value(int kar)
}
}

/* This is far from conforming to and more forgiving than
https://html.spec.whatwg.org/multipage/syntax.html#unquoted
*/
static estate invalue(int kar)
{
switch(kar)
Expand All @@ -1212,23 +1223,34 @@ static estate invalue(int kar)
return endoftag;
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
nxput(StaRt,ch);
cbEndAttribute();
tagState = atts;
return tag;
case '/':
nxput(StaRt,ch);
cbEndAttribute();
putOperatorChar(',');
putOperatorChar(')');
tagState = emptytag;
return tag;
default:
if(('0' <= kar && kar <= '9') || ('A' <= kar && kar <= 'Z') || ('a' <= kar && kar <= 'z') || (kar & 0x80))
{
/* if(('0' <= kar && kar <= '9') || ('A' <= kar && kar <= 'Z') || ('a' <= kar && kar <= 'z') || (kar & 0x80))
{*/
return tag;
}
/* Returning notag does not solve the problem,
because we can't undo the already emitted name and attributes.
*/
/* }
else
{
tagState = def;
return notag;
}
}*/
}
}

Expand Down Expand Up @@ -1308,9 +1330,10 @@ static estate endvalue(int kar)
return endoftag;
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
tagState = atts;
return tag;
case '/':
Expand Down Expand Up @@ -1523,9 +1546,10 @@ static estate DOCTYPE7(int kar) /* <!DOCTYPE */
cbEndDOCTYPE();
return endoftag;
case ' ':
case '\t':
case '\r':
case '\f':
case '\n':
case '\r':
case '\t':
StaRt = ch;
tagState = DOCTYPE8;
return tag;
Expand Down Expand Up @@ -1581,9 +1605,10 @@ static estate DOCTYPE10(int kar) /* <!DOCTYPE S [ ] */
cbEndDOCTYPE();
return endoftag;
case ' ':
case '\t':
case '\r':
case '\f':
case '\n':
case '\r':
case '\t':
tagState = DOCTYPE10;
return tag;
default:
Expand Down Expand Up @@ -1737,9 +1762,10 @@ static estate endtag(int kar)
*/ /*
case 0xA0:
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
return tag;*/
/*default:*/
if(('A' <= kar && kar <= 'Z') || ('a' <= kar && kar <= 'z') || (kar & 0x80))
Expand Down Expand Up @@ -1809,9 +1835,10 @@ void XMLtext(FILE * fpi,char * bron,int trim,int html,int xml)
switch(kar)
{
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
{
if(!whitespace)
{
Expand Down Expand Up @@ -1842,9 +1869,10 @@ void XMLtext(FILE * fpi,char * bron,int trim,int html,int xml)
switch(kar)
{
case ' ':
case '\t':
case '\f':
case '\n':
case '\r':
case '\t':
{
if(!whitespace)
{
Expand Down

0 comments on commit b520958

Please sign in to comment.