Skip to content

Commit

Permalink
boosting common commnent parsing code, it was really slow. added spre…
Browse files Browse the repository at this point in the history
…cific

* parser.c: boosting common commnent parsing code, it was really
  slow.
* test/comment[3-5].xml result//comment[3-5].xml*: added sprecific
  regression tests
Daniel
  • Loading branch information
Daniel Veillard committed Jan 23, 2005
1 parent 0714c5b commit 4c778d8
Show file tree
Hide file tree
Showing 20 changed files with 1,272 additions and 32 deletions.
7 changes: 7 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
Sun Jan 23 18:35:00 CET 2005 Daniel Veillard <daniel@veillard.com>

* parser.c: boosting common commnent parsing code, it was really
slow.
* test/comment[3-5].xml result//comment[3-5].xml*: added sprecific
regression tests

Sun Jan 23 01:00:09 CET 2005 Daniel Veillard <daniel@veillard.com>

* parser.c: small optimization back.
Expand Down
226 changes: 194 additions & 32 deletions parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -3475,42 +3475,35 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
}

/**
* xmlParseComment:
* xmlParseCommentComplex:
* @ctxt: an XML parser context
* @buf: the already parsed part of the buffer
* @len: number of bytes filles in the buffer
* @size: allocated size of the buffer
*
* Skip an XML (SGML) comment <!-- .... -->
* The spec says that "For compatibility, the string "--" (double-hyphen)
* must not occur within comments. "
* This is the slow routine in case the accelerator for ascii didn't work
*
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
*/
void
xmlParseComment(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int len;
int size = XML_PARSER_BUFFER_SIZE;
static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
int q, ql;
int r, rl;
int cur, l;
xmlParserInputState state;
xmlParserInputPtr input = ctxt->input;
int count = 0;

/*
* Check that there is a comment right here.
*/
if ((RAW != '<') || (NXT(1) != '!') ||
(NXT(2) != '-') || (NXT(3) != '-')) return;

state = ctxt->instate;
ctxt->instate = XML_PARSER_COMMENT;
SHRINK;
SKIP(4);
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
if (buf == NULL) {
xmlErrMemory(ctxt, NULL);
ctxt->instate = state;
return;
len = 0;
size = XML_PARSER_BUFFER_SIZE;
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
if (buf == NULL) {
xmlErrMemory(ctxt, NULL);
return;
}
}
q = CUR_CHAR(ql);
if (q == 0)
Expand All @@ -3523,7 +3516,6 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
cur = CUR_CHAR(l);
if (cur == 0)
goto not_terminated;
len = 0;
while (IS_CHAR(cur) && /* checked */
((cur != '>') ||
(r != '-') || (q != '-'))) {
Expand All @@ -3537,7 +3529,6 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
if (new_buf == NULL) {
xmlFree (buf);
xmlErrMemory(ctxt, NULL);
ctxt->instate = state;
return;
}
buf = new_buf;
Expand Down Expand Up @@ -3577,13 +3568,164 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
ctxt->sax->comment(ctxt->userData, buf);
xmlFree(buf);
}
ctxt->instate = state;
return;
not_terminated:
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment not terminated\n", NULL);
xmlFree(buf);
}
/**
* xmlParseComment:
* @ctxt: an XML parser context
*
* Skip an XML (SGML) comment <!-- .... -->
* The spec says that "For compatibility, the string "--" (double-hyphen)
* must not occur within comments. "
*
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
*/
void
xmlParseComment(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int size = XML_PARSER_BUFFER_SIZE;
int len;
xmlParserInputState state;
const xmlChar *in;
int nbchar = 0, ccol;

/*
* Check that there is a comment right here.
*/
if ((RAW != '<') || (NXT(1) != '!') ||
(NXT(2) != '-') || (NXT(3) != '-')) return;

state = ctxt->instate;
ctxt->instate = XML_PARSER_COMMENT;
SKIP(4);
SHRINK;
GROW;

/*
* Accelerated common case where input don't need to be
* modified before passing it to the handler.
*/
in = ctxt->input->cur;
do {
if (*in == 0xA) {
ctxt->input->line++; ctxt->input->col = 1;
in++;
while (*in == 0xA) {
ctxt->input->line++; ctxt->input->col = 1;
in++;
}
}
get_more:
ccol = ctxt->input->col;
while (((*in > '-') && (*in <= 0x7F)) ||
((*in >= 0x20) && (*in < '-')) ||
(*in == 0x09)) {
in++;
ccol++;
}
ctxt->input->col = ccol;
if (*in == 0xA) {
ctxt->input->line++; ctxt->input->col = 1;
in++;
while (*in == 0xA) {
ctxt->input->line++; ctxt->input->col = 1;
in++;
}
goto get_more;
}
nbchar = in - ctxt->input->cur;
/*
* save current set of data
*/
if (nbchar > 0) {
if ((ctxt->sax != NULL) &&
(ctxt->sax->comment != NULL)) {
if (buf == NULL) {
if ((*in == '-') && (in[1] == '-'))
size = nbchar + 1;
else
size = XML_PARSER_BUFFER_SIZE + nbchar;
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
if (buf == NULL) {
xmlErrMemory(ctxt, NULL);
ctxt->instate = state;
return;
}
len = 0;
} else if (len + nbchar + 1 >= size) {
xmlChar *new_buf;
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
new_buf = (xmlChar *) xmlRealloc(buf,
size * sizeof(xmlChar));
if (new_buf == NULL) {
xmlFree (buf);
xmlErrMemory(ctxt, NULL);
ctxt->instate = state;
return;
}
buf = new_buf;
}
memcpy(&buf[len], ctxt->input->cur, nbchar);
len += nbchar;
buf[len] = 0;
}
}
ctxt->input->cur = in;
if (*in == 0xA)

if (*in == 0xD) {
in++;
if (*in == 0xA) {
ctxt->input->cur = in;
in++;
ctxt->input->line++; ctxt->input->col = 1;
continue; /* while */
}
in--;
}
SHRINK;
GROW;
in = ctxt->input->cur;
if (*in == '-') {
if (in[1] == '-') {
if (in[2] == '>') {
SKIP(3);
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
(!ctxt->disableSAX)) {
if (buf != NULL)
ctxt->sax->comment(ctxt->userData, buf);
else
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
}
if (buf != NULL)
xmlFree(buf);
ctxt->instate = state;
return;
}
if (buf != NULL)
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment not terminated \n<!--%.50s\n",
buf);
else
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
"Comment not terminated \n", NULL);
in++;
ctxt->input->col++;
}
in++;
ctxt->input->col++;
goto get_more;
}
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
xmlParseCommentComplex(ctxt, buf, len, size);
ctxt->instate = state;
return;
}


/**
* xmlParsePITarget:
Expand Down Expand Up @@ -3924,7 +4066,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
xmlChar *orig = NULL;
int skipped;

GROW;
/* GROW; done in the caller */
if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
xmlParserInputPtr input = ctxt->input;
SHRINK;
Expand Down Expand Up @@ -5008,7 +5150,7 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
int ret = -1;
xmlElementContentPtr content = NULL;

GROW;
/* GROW; done in the caller */
if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
xmlParserInputPtr input = ctxt->input;

Expand Down Expand Up @@ -5251,12 +5393,32 @@ xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
GROW;
xmlParseElementDecl(ctxt);
xmlParseAttributeListDecl(ctxt);
xmlParseEntityDecl(ctxt);
xmlParseNotationDecl(ctxt);
xmlParsePI(ctxt);
xmlParseComment(ctxt);
if (CUR == '<') {
if (NXT(1) == '!') {
switch (NXT(2)) {
case 'E':
if (NXT(3) == 'L')
xmlParseElementDecl(ctxt);
else if (NXT(3) == 'N')
xmlParseEntityDecl(ctxt);
break;
case 'A':
xmlParseAttributeListDecl(ctxt);
break;
case 'N':
xmlParseNotationDecl(ctxt);
break;
case '-':
xmlParseComment(ctxt);
break;
default:
/* there is an error but it will be detected later */
break;
}
} else if (NXT(1) == '?') {
xmlParsePI(ctxt);
}
}
/*
* This is only for internal subset. On external entities,
* the replacement is done before parsing stage
Expand Down
Loading

0 comments on commit 4c778d8

Please sign in to comment.