Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Use buffers when constructing string node lists.

Before this change xmlStringGetNodeList would perform a realloc() of the
entire new content for every XML entity in the assigned text in order to
merge together adjacent text nodes. This had the effect of making
xmlSetNodeContent O(n^2), which led to unexpectedly bad performance on
inputs that contained a large number of XML entities.

After this change the memory management is done by the buffer API,
avoiding the need to continually re-measure and realloc() the string.

For my test data (6MB of 80 character lines, each ending with 
)
this takes the time to xmlSetNodeContent from about 500 seconds to
around 50ms. I have not profiled smaller cases, though I tried to
minimize the performance impact of my change by avoiding unnecessary
string copying.

Signed-off-by: Conrad Irwin <conrad.irwin@gmail.com>
  • Loading branch information...
commit b368604d9ba04fec68dbc93733e1c2737766747e 1 parent a0cd075
Conrad Irwin authored
Showing with 117 additions and 81 deletions.
  1. +2 −0  include/libxml/tree.h
  2. +115 −81 tree.c
2  include/libxml/tree.h
View
@@ -694,6 +694,8 @@ XMLPUBFUN void XMLCALL
xmlBufferEmpty (xmlBufferPtr buf);
XMLPUBFUN const xmlChar* XMLCALL
xmlBufferContent (const xmlBufferPtr buf);
+XMLPUBFUN xmlChar* XMLCALL
+ xmlBufferDetach (const xmlBufferPtr buf);
XMLPUBFUN void XMLCALL
xmlBufferSetAllocationScheme(xmlBufferPtr buf,
xmlBufferAllocationScheme scheme);
196 tree.c
View
@@ -1261,9 +1261,13 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
const xmlChar *cur = value, *end = cur + len;
const xmlChar *q;
xmlEntityPtr ent;
+ xmlBufferPtr buffer;
if (value == NULL) return(NULL);
+ buffer = xmlBufferCreateSize(0);
+ if (buffer == NULL) return(NULL);
+
q = cur;
while ((cur < end) && (*cur != 0)) {
if (cur[0] == '&') {
@@ -1274,19 +1278,8 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
* Save the current text.
*/
if (cur != q) {
- if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
- xmlNodeAddContentLen(last, q, cur - q);
- } else {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- node->prev = last;
- last = node;
- }
- }
+ if (xmlBufferAdd(buffer, q, cur - q))
+ goto out;
}
q = cur;
if ((cur + 2 < end) && (cur[1] == '#') && (cur[2] == 'x')) {
@@ -1351,7 +1344,7 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
if ((cur >= end) || (*cur == 0)) {
xmlTreeErr(XML_TREE_UNTERMINATED_ENTITY, (xmlNodePtr) doc,
(const char *) q);
- return(ret);
+ goto out;
}
if (cur != q) {
/*
@@ -1361,23 +1354,36 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
ent = xmlGetDocEntity(doc, val);
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
- if (last == NULL) {
- node = xmlNewDocText(doc, ent->content);
- last = ret = node;
- } else if (last->type != XML_TEXT_NODE) {
- node = xmlNewDocText(doc, ent->content);
- last = xmlAddNextSibling(last, node);
- } else
- xmlNodeAddContent(last, ent->content);
+
+ if (xmlBufferCat(buffer, ent->content))
+ goto out;
} else {
/*
+ * Flush buffer so far
+ */
+ if (buffer->use) {
+ node = xmlNewDocText(doc, NULL);
+ if (node == NULL) {
+ if (val != NULL) xmlFree(val);
+ goto out;
+ }
+ node->content = xmlBufferDetach(buffer);
+
+ if (last == NULL) {
+ last = ret = node;
+ } else {
+ last = xmlAddNextSibling(last, node);
+ }
+ }
+
+ /*
* Create a new REFERENCE_REF node
*/
node = xmlNewReference(doc, val);
if (node == NULL) {
if (val != NULL) xmlFree(val);
- return(ret);
+ goto out;
}
else if ((ent != NULL) && (ent->children == NULL)) {
xmlNodePtr temp;
@@ -1409,35 +1415,39 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
l = xmlCopyCharMultiByte(buf, charval);
buf[l] = 0;
- node = xmlNewDocText(doc, buf);
- if (node != NULL) {
- if (last == NULL) {
- last = ret = node;
- } else {
- last = xmlAddNextSibling(last, node);
- }
- }
+
+ if (xmlBufferCat(buffer, buf))
+ goto out;
charval = 0;
}
} else
cur++;
}
- if ((cur != q) || (ret == NULL)) {
+
+ if (cur != q) {
/*
* Handle the last piece of text.
*/
- if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
- xmlNodeAddContentLen(last, q, cur - q);
+ if (xmlBufferAdd(buffer, q, cur - q))
+ goto out;
+ }
+
+ if (buffer->use) {
+ node = xmlNewDocText(doc, NULL);
+ if (node == NULL) goto out;
+ node->content = xmlBufferDetach(buffer);
+
+ if (last == NULL) {
+ last = ret = node;
} else {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL) {
- ret = node;
- } else {
- xmlAddNextSibling(last, node);
- }
+ last = xmlAddNextSibling(last, node);
}
+ } else if (ret == NULL) {
+ ret = xmlNewDocText(doc, BAD_CAST "");
}
+
+out:
+ xmlBufferFree(buffer);
return(ret);
}
@@ -1458,9 +1468,13 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
const xmlChar *cur = value;
const xmlChar *q;
xmlEntityPtr ent;
+ xmlBufferPtr buffer;
if (value == NULL) return(NULL);
+ buffer = xmlBufferCreateSize(0);
+ if (buffer == NULL) return(NULL);
+
q = cur;
while (*cur != 0) {
if (cur[0] == '&') {
@@ -1471,19 +1485,8 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
* Save the current text.
*/
if (cur != q) {
- if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
- xmlNodeAddContentLen(last, q, cur - q);
- } else {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- node->prev = last;
- last = node;
- }
- }
+ if (xmlBufferAdd(buffer, q, cur - q))
+ goto out;
}
q = cur;
if ((cur[1] == '#') && (cur[2] == 'x')) {
@@ -1536,7 +1539,7 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
if (*cur == 0) {
xmlTreeErr(XML_TREE_UNTERMINATED_ENTITY,
(xmlNodePtr) doc, (const char *) q);
- return(ret);
+ goto out;
}
if (cur != q) {
/*
@@ -1546,23 +1549,32 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
ent = xmlGetDocEntity(doc, val);
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
- if (last == NULL) {
- node = xmlNewDocText(doc, ent->content);
- last = ret = node;
- } else if (last->type != XML_TEXT_NODE) {
- node = xmlNewDocText(doc, ent->content);
- last = xmlAddNextSibling(last, node);
- } else
- xmlNodeAddContent(last, ent->content);
+
+ if (xmlBufferCat(buffer, ent->content))
+ goto out;
} else {
/*
+ * Flush buffer so far
+ */
+ if (buffer->use) {
+ node = xmlNewDocText(doc, NULL);
+ node->content = xmlBufferDetach(buffer);
+
+ if (last == NULL) {
+ last = ret = node;
+ } else {
+ last = xmlAddNextSibling(last, node);
+ }
+ }
+
+ /*
* Create a new REFERENCE_REF node
*/
node = xmlNewReference(doc, val);
if (node == NULL) {
if (val != NULL) xmlFree(val);
- return(ret);
+ goto out;
}
else if ((ent != NULL) && (ent->children == NULL)) {
xmlNodePtr temp;
@@ -1593,14 +1605,10 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
len = xmlCopyCharMultiByte(buf, charval);
buf[len] = 0;
- node = xmlNewDocText(doc, buf);
- if (node != NULL) {
- if (last == NULL) {
- last = ret = node;
- } else {
- last = xmlAddNextSibling(last, node);
- }
- }
+
+ if (xmlBufferCat(buffer, buf))
+ goto out;
+ charval = 0;
}
} else
cur++;
@@ -1609,18 +1617,22 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
/*
* Handle the last piece of text.
*/
- if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
- xmlNodeAddContentLen(last, q, cur - q);
+ xmlBufferAdd(buffer, q, cur - q);
+ }
+
+ if (buffer->use) {
+ node = xmlNewDocText(doc, NULL);
+ node->content = xmlBufferDetach(buffer);
+
+ if (last == NULL) {
+ last = ret = node;
} else {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL) {
- last = ret = node;
- } else {
- last = xmlAddNextSibling(last, node);
- }
+ last = xmlAddNextSibling(last, node);
}
}
+
+out:
+ xmlBufferFree(buffer);
return(ret);
}
@@ -6916,6 +6928,28 @@ xmlBufferCreateSize(size_t size) {
}
/**
+ * xmlBufferDetach
+ * @buf: the buffer
+ *
+ * Returns the previous string contained by the buffer.
+ *
+ */
+xmlChar *
+xmlBufferDetach(xmlBufferPtr buf) {
+ xmlChar *ret;
+
+ if (buf == NULL) return(NULL);
+
+ ret = buf->content;
+ buf->content = NULL;
+ buf->size = 0;
+ buf->use = 0;
+
+ return ret;
+}
+
+
+/**
* xmlBufferCreateStatic:
* @mem: the memory area
* @size: the size in byte
Please sign in to comment.
Something went wrong with that request. Please try again.