Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Use buffers when constructing string node lists.

Before this change xmlStringGetNodeList would perform a realloc() of the
entire new content for every XML entity in the assigned text in order to
merge together adjacent text nodes. This had the effect of making
xmlSetNodeContent O(n^2), which led to unexpectedly bad performance on
inputs that contained a large number of XML entities.

After this change the memory management is done by the buffer API,
avoiding the need to continually re-measure and realloc() the string.

For my test data (6MB of 80 character lines, each ending with 
)
this takes the time to xmlSetNodeContent from about 500 seconds to
around 50ms. I have not profiled smaller cases, though I tried to
minimize the performance impact of my change by avoiding unnecessary
string copying.

Signed-off-by: Conrad Irwin <conrad.irwin@gmail.com>
  • Loading branch information...
commit b368604d9ba04fec68dbc93733e1c2737766747e 1 parent a0cd075
@ConradIrwin authored
Showing with 117 additions and 81 deletions.
  1. +2 −0  include/libxml/tree.h
  2. +115 −81 tree.c
View
2  include/libxml/tree.h
@@ -694,6 +694,8 @@ XMLPUBFUN void XMLCALL
xmlBufferEmpty (xmlBufferPtr buf);
XMLPUBFUN const xmlChar* XMLCALL
xmlBufferContent (const xmlBufferPtr buf);
+XMLPUBFUN xmlChar* XMLCALL
+ xmlBufferDetach (const xmlBufferPtr buf);
XMLPUBFUN void XMLCALL
xmlBufferSetAllocationScheme(xmlBufferPtr buf,
xmlBufferAllocationScheme scheme);
View
196 tree.c
@@ -1261,9 +1261,13 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
const xmlChar *cur = value, *end = cur + len;
const xmlChar *q;
xmlEntityPtr ent;
+ xmlBufferPtr buffer;
if (value == NULL) return(NULL);
+ buffer = xmlBufferCreateSize(0);
+ if (buffer == NULL) return(NULL);
+
q = cur;
while ((cur < end) && (*cur != 0)) {
if (cur[0] == '&') {
@@ -1274,19 +1278,8 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
* Save the current text.
*/
if (cur != q) {
- if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
- xmlNodeAddContentLen(last, q, cur - q);
- } else {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- node->prev = last;
- last = node;
- }
- }
+ if (xmlBufferAdd(buffer, q, cur - q))
+ goto out;
}
q = cur;
if ((cur + 2 < end) && (cur[1] == '#') && (cur[2] == 'x')) {
@@ -1351,7 +1344,7 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
if ((cur >= end) || (*cur == 0)) {
xmlTreeErr(XML_TREE_UNTERMINATED_ENTITY, (xmlNodePtr) doc,
(const char *) q);
- return(ret);
+ goto out;
}
if (cur != q) {
/*
@@ -1361,23 +1354,36 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
ent = xmlGetDocEntity(doc, val);
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
- if (last == NULL) {
- node = xmlNewDocText(doc, ent->content);
- last = ret = node;
- } else if (last->type != XML_TEXT_NODE) {
- node = xmlNewDocText(doc, ent->content);
- last = xmlAddNextSibling(last, node);
- } else
- xmlNodeAddContent(last, ent->content);
+
+ if (xmlBufferCat(buffer, ent->content))
+ goto out;
} else {
/*
+ * Flush buffer so far
+ */
+ if (buffer->use) {
+ node = xmlNewDocText(doc, NULL);
+ if (node == NULL) {
+ if (val != NULL) xmlFree(val);
+ goto out;
+ }
+ node->content = xmlBufferDetach(buffer);
+
+ if (last == NULL) {
+ last = ret = node;
+ } else {
+ last = xmlAddNextSibling(last, node);
+ }
+ }
+
+ /*
* Create a new REFERENCE_REF node
*/
node = xmlNewReference(doc, val);
if (node == NULL) {
if (val != NULL) xmlFree(val);
- return(ret);
+ goto out;
}
else if ((ent != NULL) && (ent->children == NULL)) {
xmlNodePtr temp;
@@ -1409,35 +1415,39 @@ xmlStringLenGetNodeList(xmlDocPtr doc, const xmlChar *value, int len) {
l = xmlCopyCharMultiByte(buf, charval);
buf[l] = 0;
- node = xmlNewDocText(doc, buf);
- if (node != NULL) {
- if (last == NULL) {
- last = ret = node;
- } else {
- last = xmlAddNextSibling(last, node);
- }
- }
+
+ if (xmlBufferCat(buffer, buf))
+ goto out;
charval = 0;
}
} else
cur++;
}
- if ((cur != q) || (ret == NULL)) {
+
+ if (cur != q) {
/*
* Handle the last piece of text.
*/
- if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
- xmlNodeAddContentLen(last, q, cur - q);
+ if (xmlBufferAdd(buffer, q, cur - q))
+ goto out;
+ }
+
+ if (buffer->use) {
+ node = xmlNewDocText(doc, NULL);
+ if (node == NULL) goto out;
+ node->content = xmlBufferDetach(buffer);
+
+ if (last == NULL) {
+ last = ret = node;
} else {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL) {
- ret = node;
- } else {
- xmlAddNextSibling(last, node);
- }
+ last = xmlAddNextSibling(last, node);
}
+ } else if (ret == NULL) {
+ ret = xmlNewDocText(doc, BAD_CAST "");
}
+
+out:
+ xmlBufferFree(buffer);
return(ret);
}
@@ -1458,9 +1468,13 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
const xmlChar *cur = value;
const xmlChar *q;
xmlEntityPtr ent;
+ xmlBufferPtr buffer;
if (value == NULL) return(NULL);
+ buffer = xmlBufferCreateSize(0);
+ if (buffer == NULL) return(NULL);
+
q = cur;
while (*cur != 0) {
if (cur[0] == '&') {
@@ -1471,19 +1485,8 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
* Save the current text.
*/
if (cur != q) {
- if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
- xmlNodeAddContentLen(last, q, cur - q);
- } else {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL)
- last = ret = node;
- else {
- last->next = node;
- node->prev = last;
- last = node;
- }
- }
+ if (xmlBufferAdd(buffer, q, cur - q))
+ goto out;
}
q = cur;
if ((cur[1] == '#') && (cur[2] == 'x')) {
@@ -1536,7 +1539,7 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
if (*cur == 0) {
xmlTreeErr(XML_TREE_UNTERMINATED_ENTITY,
(xmlNodePtr) doc, (const char *) q);
- return(ret);
+ goto out;
}
if (cur != q) {
/*
@@ -1546,23 +1549,32 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
ent = xmlGetDocEntity(doc, val);
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
- if (last == NULL) {
- node = xmlNewDocText(doc, ent->content);
- last = ret = node;
- } else if (last->type != XML_TEXT_NODE) {
- node = xmlNewDocText(doc, ent->content);
- last = xmlAddNextSibling(last, node);
- } else
- xmlNodeAddContent(last, ent->content);
+
+ if (xmlBufferCat(buffer, ent->content))
+ goto out;
} else {
/*
+ * Flush buffer so far
+ */
+ if (buffer->use) {
+ node = xmlNewDocText(doc, NULL);
+ node->content = xmlBufferDetach(buffer);
+
+ if (last == NULL) {
+ last = ret = node;
+ } else {
+ last = xmlAddNextSibling(last, node);
+ }
+ }
+
+ /*
* Create a new REFERENCE_REF node
*/
node = xmlNewReference(doc, val);
if (node == NULL) {
if (val != NULL) xmlFree(val);
- return(ret);
+ goto out;
}
else if ((ent != NULL) && (ent->children == NULL)) {
xmlNodePtr temp;
@@ -1593,14 +1605,10 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
len = xmlCopyCharMultiByte(buf, charval);
buf[len] = 0;
- node = xmlNewDocText(doc, buf);
- if (node != NULL) {
- if (last == NULL) {
- last = ret = node;
- } else {
- last = xmlAddNextSibling(last, node);
- }
- }
+
+ if (xmlBufferCat(buffer, buf))
+ goto out;
+ charval = 0;
}
} else
cur++;
@@ -1609,18 +1617,22 @@ xmlStringGetNodeList(xmlDocPtr doc, const xmlChar *value) {
/*
* Handle the last piece of text.
*/
- if ((last != NULL) && (last->type == XML_TEXT_NODE)) {
- xmlNodeAddContentLen(last, q, cur - q);
+ xmlBufferAdd(buffer, q, cur - q);
+ }
+
+ if (buffer->use) {
+ node = xmlNewDocText(doc, NULL);
+ node->content = xmlBufferDetach(buffer);
+
+ if (last == NULL) {
+ last = ret = node;
} else {
- node = xmlNewDocTextLen(doc, q, cur - q);
- if (node == NULL) return(ret);
- if (last == NULL) {
- last = ret = node;
- } else {
- last = xmlAddNextSibling(last, node);
- }
+ last = xmlAddNextSibling(last, node);
}
}
+
+out:
+ xmlBufferFree(buffer);
return(ret);
}
@@ -6916,6 +6928,28 @@ xmlBufferCreateSize(size_t size) {
}
/**
+ * xmlBufferDetach
+ * @buf: the buffer
+ *
+ * Returns the previous string contained by the buffer.
+ *
+ */
+xmlChar *
+xmlBufferDetach(xmlBufferPtr buf) {
+ xmlChar *ret;
+
+ if (buf == NULL) return(NULL);
+
+ ret = buf->content;
+ buf->content = NULL;
+ buf->size = 0;
+ buf->use = 0;
+
+ return ret;
+}
+
+
+/**
* xmlBufferCreateStatic:
* @mem: the memory area
* @size: the size in byte
Please sign in to comment.
Something went wrong with that request. Please try again.