Permalink
Browse files

added a parser XML_PARSE_COMPACT option to allocate small text nodes …

…(less

* HTMLparser.c parser.c SAX2.c debugXML.c tree.c valid.c xmlreader.c
  xmllint.c include/libxml/HTMLparser.h include/libxml/parser.h:
  added a parser XML_PARSE_COMPACT option to allocate small
  text nodes (less than 8 bytes on 32bits, less than 16bytes on 64bits)
  directly within the node, various changes to cope with this.
* result/XPath/tests/* result/XPath/xptr/* result/xmlid/*: this
  slightly change the output
Daniel
  • Loading branch information...
1 parent 3854c57 commit 8874b94cd2e2086f4cefe026286e0f64cac6ec9a Daniel Veillard committed Aug 25, 2005
View
@@ -1,3 +1,13 @@
+Thu Aug 25 15:14:56 CEST 2005 Daniel Veillard <daniel@veillard.com>
+
+ * HTMLparser.c parser.c SAX2.c debugXML.c tree.c valid.c xmlreader.c
+ xmllint.c include/libxml/HTMLparser.h include/libxml/parser.h:
+ added a parser XML_PARSE_COMPACT option to allocate small
+ text nodes (less than 8 bytes on 32bits, less than 16bytes on 64bits)
+ directly within the node, various changes to cope with this.
+ * result/XPath/tests/* result/XPath/xptr/* result/xmlid/*: this
+ slightly change the output
+
Thu Aug 25 12:16:26 CEST 2005 Daniel Veillard <daniel@veillard.com>
* configure.in: patch from Andrew W. Nosenko, use se $GCC = 'yes'
View
@@ -5820,6 +5820,10 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
ctxt->recovery = 1;
} else
ctxt->recovery = 0;
+ if (options & HTML_PARSE_COMPACT) {
+ ctxt->options |= HTML_PARSE_COMPACT;
+ options -= HTML_PARSE_COMPACT;
+ }
ctxt->dictNames = 0;
return (options);
}
View
18 SAX2.c
@@ -1777,14 +1777,22 @@ xmlSAX2TextNode(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
xmlErrMemory(ctxt, "xmlSAX2Characters");
return(NULL);
}
+ memset(ret, 0, sizeof(xmlNode));
/*
* intern the formatting blanks found between tags, or the
* very short strings
*/
if (ctxt->dictNames) {
xmlChar cur = str[len];
- if ((len <= 3) && ((cur == '"') || (cur == '\'') ||
+ if ((len < (int) (2 * sizeof(void *))) &&
+ (ctxt->options & XML_PARSE_COMPACT)) {
+ /* store the string in the node overrithing properties and nsDef */
+ xmlChar *tmp = (xmlChar *) &(ret->properties);
+ memcpy(tmp, str, len);
+ tmp[len] = 0;
+ intern = tmp;
+ } else if ((len <= 3) && ((cur == '"') || (cur == '\'') ||
((cur == '<') && (str[len + 1] != '!')))) {
intern = xmlDictLookup(ctxt->dict, str, len);
} else if (IS_BLANK_CH(*str) && (len < 60) && (cur == '<') &&
@@ -1798,7 +1806,6 @@ xmlSAX2TextNode(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
}
}
skip:
- memset(ret, 0, sizeof(xmlNode));
ret->type = XML_TEXT_NODE;
ret->name = xmlStringText;
@@ -2407,8 +2414,11 @@ xmlSAX2Characters(void *ctx, const xmlChar *ch, int len)
* We try to minimaze realloc() uses and avoid copying
* and recomputing length over and over.
*/
- if ((ctxt->nodemem == ctxt->nodelen + 1) &&
- (xmlDictOwns(ctxt->dict, lastChild->content))) {
+ if (lastChild->content == (xmlChar *)&(lastChild->properties)) {
+ lastChild->content = xmlStrdup(lastChild->content);
+ lastChild->properties = NULL;
+ } else if ((ctxt->nodemem == ctxt->nodelen + 1) &&
+ (xmlDictOwns(ctxt->dict, lastChild->content))) {
lastChild->content = xmlStrdup(lastChild->content);
}
if (ctxt->nodelen + len >= ctxt->nodemem) {
View
@@ -902,9 +902,15 @@ xmlCtxtDumpOneNode(xmlDebugCtxtPtr ctxt, xmlNodePtr node)
if (!ctxt->check) {
xmlCtxtDumpSpaces(ctxt);
if (node->name == (const xmlChar *) xmlStringTextNoenc)
- fprintf(ctxt->output, "TEXT no enc\n");
+ fprintf(ctxt->output, "TEXT no enc");
else
- fprintf(ctxt->output, "TEXT\n");
+ fprintf(ctxt->output, "TEXT");
+ if (node->content == (xmlChar *) &(node->properties))
+ fprintf(ctxt->output, " compact\n");
+ else if (xmlDictOwns(ctxt->dict, node->content) == 1)
+ fprintf(ctxt->output, " interned\n");
+ else
+ fprintf(ctxt->output, "\n");
}
break;
case XML_CDATA_SECTION_NODE:
@@ -1005,9 +1011,9 @@ xmlCtxtDumpOneNode(xmlDebugCtxtPtr ctxt, xmlNodePtr node)
fprintf(ctxt->output, "PBM: doc == NULL !!!\n");
}
ctxt->depth++;
- if (node->nsDef != NULL)
+ if ((node->type == XML_ELEMENT_NODE) && (node->nsDef != NULL))
xmlCtxtDumpNamespaceList(ctxt, node->nsDef);
- if (node->properties != NULL)
+ if ((node->type == XML_ELEMENT_NODE) && (node->properties != NULL))
xmlCtxtDumpAttrList(ctxt, node->properties);
if (node->type != XML_ENTITY_REF_NODE) {
if ((node->type != XML_ELEMENT_NODE) && (node->content != NULL)) {
@@ -178,7 +178,8 @@ typedef enum {
HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
- HTML_PARSE_NONET = 1<<11 /* Forbid network access */
+ HTML_PARSE_NONET = 1<<11,/* Forbid network access */
+ HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */
} htmlParserOption;
XMLPUBFUN void XMLCALL
View
@@ -1088,7 +1088,8 @@ typedef enum {
XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */
XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */
XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */
- XML_PARSE_NOXINCNODE= 1<<15 /* do not generate XINCLUDE START/END nodes */
+ XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */
+ XML_PARSE_COMPACT = 1<<16 /* compact small text nodes */
} xmlParserOption;
XMLPUBFUN void XMLCALL
View
@@ -684,7 +684,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
}
/*
- * plit the element name into prefix:localname , the string found
+ * Split the element name into prefix:localname , the string found
* are within the DTD and hen not associated to namespace names.
*/
name = xmlSplitQName3(fullattr, &len);
@@ -11319,8 +11319,9 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
*lst = cur;
while (cur != NULL) {
#ifdef LIBXML_VALID_ENABLED
- if (oldctxt->validate && oldctxt->wellFormed &&
- oldctxt->myDoc && oldctxt->myDoc->intSubset) {
+ if ((oldctxt->validate) && (oldctxt->wellFormed) &&
+ (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
+ (cur->type == XML_ELEMENT_NODE)) {
oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
oldctxt->myDoc, cur);
}
@@ -12843,6 +12844,10 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
ctxt->options |= XML_PARSE_NONET;
options -= XML_PARSE_NONET;
}
+ if (options & XML_PARSE_COMPACT) {
+ ctxt->options |= XML_PARSE_COMPACT;
+ options -= XML_PARSE_COMPACT;
+ }
ctxt->linenumbers = 1;
return (options);
}
@@ -72,10 +72,10 @@ Set contains 1 nodes:
Expression: /child::EXAMPLE/child::head/node()
Object is a Node Set :
Set contains 3 nodes:
-1 TEXT
+1 TEXT compact
content=
2 ELEMENT title
-3 TEXT
+3 TEXT compact
content=
========================
@@ -5,7 +5,7 @@ Object is a Node Set :
Set contains 1 nodes:
1 ELEMENT EXAMPLE
ATTRIBUTE id
- TEXT
+ TEXT compact
content=root
ATTRIBUTE prop1
TEXT
@@ -5,30 +5,30 @@ Object is a Node Set :
Set contains 9 nodes:
1 ELEMENT b
ATTRIBUTE lang
- TEXT
+ TEXT compact
content=en
2 ELEMENT x
3 ELEMENT x
4 ELEMENT para
ATTRIBUTE lang
- TEXT
+ TEXT compact
content=en
5 ELEMENT div
ATTRIBUTE lang
- TEXT
+ TEXT compact
content=en
6 ELEMENT para
7 ELEMENT para
ATTRIBUTE lang
- TEXT
+ TEXT compact
content=EN
8 ELEMENT para
ATTRIBUTE lang
- TEXT
+ TEXT compact
content=en-us
9 ELEMENT para
ATTRIBUTE lang
- TEXT
+ TEXT compact
content=EN-US
========================
@@ -37,11 +37,11 @@ Object is a Node Set :
Set contains 2 nodes:
1 ELEMENT para
ATTRIBUTE lang
- TEXT
+ TEXT compact
content=en-us
2 ELEMENT para
ATTRIBUTE lang
- TEXT
+ TEXT compact
content=EN-US
========================
@@ -5,7 +5,7 @@ Object is a Node Set :
Set contains 1 nodes:
1 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=root
========================
@@ -14,11 +14,11 @@ Object is a Node Set :
Set contains 2 nodes:
1 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=root
2 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=p1
========================
@@ -27,11 +27,11 @@ Object is a Node Set :
Set contains 2 nodes:
1 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=root
2 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=p1
========================
@@ -40,11 +40,11 @@ Object is a Node Set :
Set contains 2 nodes:
1 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=root
2 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=p1
========================
@@ -53,15 +53,15 @@ Object is a Node Set :
Set contains 3 nodes:
1 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=root
2 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=p1
3 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=p2
========================
@@ -70,16 +70,16 @@ Object is a Node Set :
Set contains 4 nodes:
1 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=root
2 ELEMENT p1
3 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=p1
4 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=p2
========================
@@ -88,14 +88,14 @@ Object is a Node Set :
Set contains 4 nodes:
1 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=root
2 ELEMENT p1
3 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=p1
4 ELEMENT s
ATTRIBUTE p
- TEXT
+ TEXT compact
content=p2
@@ -45,7 +45,7 @@ Object is a Node Set :
Set contains 2 nodes:
1 TEXT
content=bla bla bla ...
-2 TEXT
+2 TEXT compact
content=...
========================
@@ -54,7 +54,7 @@ Object is a Node Set :
Set contains 2 nodes:
1 TEXT
content=bla bla bla ...
-2 TEXT
+2 TEXT compact
content=...
========================
@@ -63,7 +63,7 @@ Object is a Node Set :
Set contains 2 nodes:
1 TEXT
content=bla bla bla ...
-2 TEXT
+2 TEXT compact
content=...
========================
@@ -77,5 +77,5 @@ Set contains 1 nodes:
Expression: (//p/text())[position()=last()]
Object is a Node Set :
Set contains 1 nodes:
-1 TEXT
+1 TEXT compact
content=...
@@ -53,10 +53,10 @@ Set contains 1 nodes:
Expression: /child::EXAMPLE/child::head/node()
Object is a Node Set :
Set contains 3 nodes:
-1 TEXT
+1 TEXT compact
content=
2 ELEMENT title
-3 TEXT
+3 TEXT compact
content=
========================
@@ -5,8 +5,8 @@ Object is a Node Set :
Set contains 1 nodes:
1 ELEMENT ITEM
ATTRIBUTE monto
- TEXT
+ TEXT compact
content=50.12
ATTRIBUTE divisa
- TEXT
+ TEXT compact
content=DOL
Oops, something went wrong.

0 comments on commit 8874b94

Please sign in to comment.