Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

This is the 2.0.0-beta, lots and lots and lots of changes

Have a look at http://xmlsoft.org/upgrade.html

Daniel
  • Loading branch information...
commit cf46199c5e2c94781ef647612b3b43618b36b940 1 parent 76234da
Daniel Veillard authored
Showing with 9,978 additions and 5,547 deletions.
  1. +8 −0 ChangeLog
  2. +71 −26 HTMLparser.c
  3. +10 −10 HTMLtree.c
  4. +265 −87 SAX.c
  5. +4 −4 configure.in
  6. +418 −36 debugXML.c
  7. +439 −88 encoding.c
  8. +6 −4 encoding.h
  9. +461 −167 entities.c
  10. +47 −17 entities.h
  11. +6 −4 include/libxml/encoding.h
  12. +47 −17 include/libxml/entities.h
  13. +37 −2 include/libxml/parser.h
  14. +36 −109 include/libxml/parserInternals.h
  15. +94 −43 include/libxml/tree.h
  16. +24 −0 include/libxml/valid.h
  17. +1 −1  include/libxml/xmlmemory.h
  18. +1 −2  libxml.spec.in
  19. +29 −5 nanoftp.c
  20. +1 −1  nanohttp.c
  21. +2,736 −777 parser.c
  22. +37 −2 parser.h
  23. +36 −109 parserInternals.h
  24. +0 −1  result/SVG/bike.xml
  25. +3 −0  result/VC/OneID
  26. +3 −0  result/VC/OneID2
  27. +3 −3 result/VC/OneID3
  28. +1 −1  result/VC/UniqueElementTypeDeclaration
  29. +1 −1  result/comment.xml
  30. +1 −1  result/comment2.xml
  31. +2 −1  result/dtd1
  32. +3 −3 result/dtd10
  33. +2 −2 result/dtd12
  34. +7 −0 result/dtd13
  35. +3 −3 result/dtd6
  36. +2 −2 result/dtd7
  37. +2 −2 result/dtd8
  38. +2 −2 result/dtd9
  39. +1 −1  result/ent5
  40. +1 −1  result/ent7
  41. +3 −3 result/ent8
  42. +2 −1  result/eve.xml
  43. +1 −1  result/noent/comment.xml
  44. +1 −1  result/noent/comment2.xml
  45. +2 −1  result/noent/dtd1
  46. +3 −3 result/noent/dtd10
  47. +3 −3 result/noent/dtd12
  48. +3 −3 result/noent/dtd6
  49. +2 −2 result/noent/dtd7
  50. +2 −2 result/noent/dtd8
  51. +2 −2 result/noent/dtd9
  52. +1 −1  result/noent/ent5
  53. +1 −1  result/noent/ent7
  54. +3 −3 result/noent/ent8
  55. +2 −1  result/noent/eve.xml
  56. +17 −20 result/noent/p3p
  57. +1 −1  result/noent/pi.xml
  58. +1 −1  result/noent/pi2.xml
  59. +4 −4 result/noent/rdf2
  60. +13 −1 result/noent/slashdot.rdf
  61. +12 −12 result/noent/slashdot.xml
  62. +158 −158 result/noent/svg1
  63. +53 −51 result/noent/svg2
  64. +720 −720 result/noent/svg3
  65. +1 −1  result/noent/xml1
  66. +1 −1  result/noent/xml2
  67. +17 −20 result/p3p
  68. +1 −1  result/pi.xml
  69. +1 −1  result/pi2.xml
  70. +4 −4 result/rdf2
  71. +13 −1 result/slashdot.rdf
  72. +12 −12 result/slashdot.xml
  73. +158 −158 result/svg1
  74. +53 −51 result/svg2
  75. +720 −720 result/svg3
  76. +1,048 −1,053 result/valid/REC-xml-19980210.xml
  77. +111 −111 result/valid/dia.xml
  78. +208 −208 result/valid/xlink.xml
  79. +1 −1  result/valid/xlink.xml.err
  80. +1 −1  result/xml2
  81. +2 −2 test/dtd12
  82. +17 −0 testSAX.c
  83. +312 −12 tester.c
  84. +441 −384 tree.c
  85. +94 −43 tree.h
  86. +791 −197 valid.c
  87. +24 −0 valid.h
  88. +15 −1 xml-error.h
  89. +33 −10 xmlIO.c
  90. +1 −1  xmlmemory.h
  91. +37 −22 xpath.c
8 ChangeLog
View
@@ -1,3 +1,11 @@
+Tue Mar 14 19:11:29 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>
+
+ * all: tagged LIB_XML_1_X
+ * *.c *.h : updated from W3C CVS tree
+ * configure.in : 2.0.0-beta
+ * libxml.spec.in : libxml2 package nam
+ * result/* : new version of the tests output
+
Mon Mar 6 09:34:52 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>
* doc/xml.html, doc/update.html: updated docs, 1.8.7
97 HTMLparser.c
View
@@ -121,36 +121,81 @@ PUSH_AND_POP(extern, xmlChar*, name)
* COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
*/
-#define CUR (*ctxt->input->cur)
+#define CUR ((int) (*ctxt->input->cur))
+
#define UPPER (toupper(*ctxt->input->cur))
+
#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
+
#define NXT(val) ctxt->input->cur[(val)]
+
#define UPP(val) (toupper(ctxt->input->cur[(val)]))
+
#define CUR_PTR ctxt->input->cur
+
#define SHRINK xmlParserInputShrink(ctxt->input)
+
#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK)
-#define SKIP_BLANKS \
- while (IS_BLANK(*(ctxt->input->cur))) NEXT
-
-#ifndef USE_UTF_8
-#define CURRENT (*ctxt->input->cur)
-#define NEXT { \
- if ((*ctxt->input->cur == 0) && \
- (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
- xmlPopInput(ctxt); \
- } else { \
- if (*(ctxt->input->cur) == '\n') { \
- ctxt->input->line++; ctxt->input->col = 1; \
- } else ctxt->input->col++; \
- ctxt->input->cur++; \
- ctxt->nbChars++; \
- if (*ctxt->input->cur == 0) \
- xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
- }}
+#define CURRENT ((int) (*ctxt->input->cur))
-#else
-#endif
+#define NEXT htmlNextChar(ctxt);
+
+#define SKIP_BLANKS htmlSkipBlankChars(ctxt);
+
+/**
+ * htmlNextChar:
+ * @ctxt: the HTML parser context
+ *
+ * Skip to the next char input char.
+ */
+
+void
+htmlNextChar(htmlParserCtxtPtr ctxt) {
+ if ((*ctxt->input->cur == 0) &&
+ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
+ xmlPopInput(ctxt);
+ } else {
+ if (*(ctxt->input->cur) == '\n') {
+ ctxt->input->line++; ctxt->input->col = 1;
+ } else ctxt->input->col++;
+ ctxt->input->cur++;
+ ctxt->nbChars++;
+ if (*ctxt->input->cur == 0)
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ }
+}
+
+/**
+ * htmlSkipBlankChars:
+ * @ctxt: the HTML parser context
+ *
+ * skip all blanks character found at that point in the input streams.
+ *
+ * Returns the number of space chars skipped
+ */
+
+int
+htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
+ int res = 0;
+
+ while (IS_BLANK(*(ctxt->input->cur))) {
+ if ((*ctxt->input->cur == 0) &&
+ (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
+ xmlPopInput(ctxt);
+ } else {
+ if (*(ctxt->input->cur) == '\n') {
+ ctxt->input->line++; ctxt->input->col = 1;
+ } else ctxt->input->col++;
+ ctxt->input->cur++;
+ ctxt->nbChars++;
+ if (*ctxt->input->cur == 0)
+ xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+ }
+ res++;
+ }
+ return(res);
+}
@@ -475,7 +520,7 @@ htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
if (elem == NULL) return(1);
if (!xmlStrcmp(name, elem->name)) return(0);
if (htmlCheckAutoClose(elem->name, name)) return(1);
- child = elem->childs;
+ child = elem->children;
while (child != NULL) {
if (htmlAutoCloseTag(doc, name, child)) return(1);
child = child->next;
@@ -499,7 +544,7 @@ htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
htmlNodePtr child;
if (elem == NULL) return(1);
- child = elem->childs;
+ child = elem->children;
while (child != NULL) {
if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
child = child->next;
@@ -1275,7 +1320,7 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
else
xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI);
cur->name = NULL;
- cur->root = NULL;
+ cur->children = NULL;
cur->extSubset = NULL;
cur->oldNs = NULL;
cur->encoding = NULL;
@@ -1285,7 +1330,6 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
cur->refs = NULL;
#ifndef XML_WITHOUT_CORBA
cur->_private = NULL;
- cur->vepv = NULL;
#endif
return(cur);
}
@@ -1667,7 +1711,8 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
}
} else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
+ ctxt->sax->error(ctxt->userData,
+ "SystemLiteral \" or ' expected\n");
ctxt->wellFormed = 0;
}
20 HTMLtree.c
View
@@ -80,7 +80,7 @@ htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
}
xmlBufferWriteChar(buf, " ");
xmlBufferWriteCHAR(buf, cur->name);
- value = xmlNodeListGetString(doc, cur->val, 0);
+ value = xmlNodeListGetString(doc, cur->children, 0);
if (value) {
xmlBufferWriteChar(buf, "=");
xmlBufferWriteQuotedString(buf, value);
@@ -212,7 +212,7 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
}
return;
}
- if ((cur->content == NULL) && (cur->childs == NULL)) {
+ if ((cur->content == NULL) && (cur->children == NULL)) {
if ((info != NULL) && (info->endTag != 0))
xmlBufferWriteChar(buf, ">");
else {
@@ -242,15 +242,15 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
xmlFree(buffer);
}
}
- if (cur->childs != NULL) {
- if ((cur->childs->type != HTML_TEXT_NODE) &&
- (cur->childs->type != HTML_ENTITY_REF_NODE) &&
- (cur->childs != cur->last))
+ if (cur->children != NULL) {
+ if ((cur->children->type != HTML_TEXT_NODE) &&
+ (cur->children->type != HTML_ENTITY_REF_NODE) &&
+ (cur->children != cur->last))
xmlBufferWriteChar(buf, "\n");
- htmlNodeListDump(buf, doc, cur->childs);
+ htmlNodeListDump(buf, doc, cur->children);
if ((cur->last->type != HTML_TEXT_NODE) &&
(cur->last->type != HTML_ENTITY_REF_NODE) &&
- (cur->childs != cur->last))
+ (cur->children != cur->last))
xmlBufferWriteChar(buf, "\n");
}
if (!htmlIsAutoClosed(doc, cur)) {
@@ -307,8 +307,8 @@ htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
}
- if (cur->root != NULL) {
- htmlNodeListDump(buf, cur, cur->root);
+ if (cur->children != NULL) {
+ htmlNodeListDump(buf, cur, cur->children);
}
xmlBufferWriteChar(buf, "\n");
cur->type = type;
352 SAX.c
View
@@ -158,66 +158,112 @@ internalSubset(void *ctx, const xmlChar *name,
name, ExternalID, SystemID);
#endif
xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID);
+}
+
+/**
+ * externalSubset:
+ * @ctx: the user data (XML parser context)
+ *
+ * Callback on external subset declaration.
+ */
+void
+externalSubset(void *ctx, const xmlChar *name,
+ const xmlChar *ExternalID, const xmlChar *SystemID)
+{
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
+#ifdef DEBUG_SAX
+ fprintf(stderr, "SAX.externalSubset(%s, %s, %s)\n",
+ name, ExternalID, SystemID);
+#endif
if (((ExternalID != NULL) || (SystemID != NULL)) &&
(ctxt->validate && ctxt->wellFormed && ctxt->myDoc)) {
/*
* Try to fetch and parse the external subset.
*/
- xmlDtdPtr ret = NULL;
- xmlParserCtxtPtr dtdCtxt;
+ xmlParserInputPtr oldinput;
+ int oldinputNr;
+ int oldinputMax;
+ xmlParserInputPtr *oldinputTab;
+ int oldwellFormed;
xmlParserInputPtr input = NULL;
xmlCharEncoding enc;
- dtdCtxt = xmlNewParserCtxt();
- if (dtdCtxt == NULL) return;
-
/*
* Ask the Entity resolver to load the damn thing
*/
- if ((ctxt->directory != NULL) && (dtdCtxt->directory == NULL))
- dtdCtxt->directory = (char *) xmlStrdup(BAD_CAST ctxt->directory);
-
- if ((dtdCtxt->sax != NULL) && (dtdCtxt->sax->resolveEntity != NULL))
- input = dtdCtxt->sax->resolveEntity(dtdCtxt->userData, ExternalID,
+ if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
+ input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
SystemID);
if (input == NULL) {
- xmlFreeParserCtxt(dtdCtxt);
return;
}
+ xmlNewDtd(ctxt->myDoc, name, ExternalID, SystemID);
+
/*
- * plug some encoding conversion routines here. !!!
+ * make sure we won't destroy the main document context
*/
- xmlPushInput(dtdCtxt, input);
- enc = xmlDetectCharEncoding(dtdCtxt->input->cur);
- xmlSwitchEncoding(dtdCtxt, enc);
+ oldinput = ctxt->input;
+ oldinputNr = ctxt->inputNr;
+ oldinputMax = ctxt->inputMax;
+ oldinputTab = ctxt->inputTab;
+ oldwellFormed = ctxt->wellFormed;
+
+ ctxt->inputTab = (xmlParserInputPtr *)
+ xmlMalloc(5 * sizeof(xmlParserInputPtr));
+ if (ctxt->inputTab == NULL) {
+ ctxt->errNo = XML_ERR_NO_MEMORY;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "externalSubset: out of memory\n");
+ ctxt->errNo = XML_ERR_NO_MEMORY;
+ ctxt->input = oldinput;
+ ctxt->inputNr = oldinputNr;
+ ctxt->inputMax = oldinputMax;
+ ctxt->inputTab = oldinputTab;
+ return;
+ }
+ ctxt->inputNr = 0;
+ ctxt->inputMax = 5;
+ ctxt->input = NULL;
+ xmlPushInput(ctxt, input);
+
+ /*
+ * On the fly encoding conversion if needed
+ */
+ enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
+ xmlSwitchEncoding(ctxt, enc);
if (input->filename == NULL)
input->filename = (char *) xmlStrdup(SystemID);
input->line = 1;
input->col = 1;
- input->base = dtdCtxt->input->cur;
- input->cur = dtdCtxt->input->cur;
+ input->base = ctxt->input->cur;
+ input->cur = ctxt->input->cur;
input->free = NULL;
/*
* let's parse that entity knowing it's an external subset.
*/
- xmlParseExternalSubset(dtdCtxt, ExternalID, SystemID);
-
- if (dtdCtxt->myDoc != NULL) {
- if (dtdCtxt->wellFormed) {
- ret = dtdCtxt->myDoc->intSubset;
- dtdCtxt->myDoc->intSubset = NULL;
- } else {
- ret = NULL;
- }
- xmlFreeDoc(dtdCtxt->myDoc);
- dtdCtxt->myDoc = NULL;
- }
- xmlFreeParserCtxt(dtdCtxt);
-
- ctxt->myDoc->extSubset = ret;
+ xmlParseExternalSubset(ctxt, ExternalID, SystemID);
+
+ /*
+ * Free up the external entities
+ */
+
+ while (ctxt->inputNr > 1)
+ xmlPopInput(ctxt);
+ xmlFreeInputStream(ctxt->input);
+ xmlFree(ctxt->inputTab);
+
+ /*
+ * Restore the parsing context of the main entity
+ */
+ ctxt->input = oldinput;
+ ctxt->inputNr = oldinputNr;
+ ctxt->inputMax = oldinputMax;
+ ctxt->inputTab = oldinputTab;
+ /* ctxt->wellFormed = oldwellFormed; */
}
}
@@ -316,13 +362,23 @@ entityDecl(void *ctx, const xmlChar *name, int type,
fprintf(stderr, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
name, type, publicId, systemId, content);
#endif
- xmlAddDocEntity(ctxt->myDoc, name, type, publicId, systemId, content);
+ if (ctxt->inSubset == 1)
+ xmlAddDocEntity(ctxt->myDoc, name, type, publicId,
+ systemId, content);
+ else if (ctxt->inSubset == 2)
+ xmlAddDtdEntity(ctxt->myDoc, name, type, publicId,
+ systemId, content);
+ else {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt,
+ "SAX.entityDecl(%s) called while not in subset\n", name);
+ }
}
/**
* attributeDecl:
* @ctx: the user data (XML parser context)
- * @name: the attribute name
+ * @fullname: the attribute name
* @type: the attribute type
* @publicId: The public ID of the attribute
* @systemId: The system ID of the attribute
@@ -331,24 +387,40 @@ entityDecl(void *ctx, const xmlChar *name, int type,
* An attribute definition has been parsed
*/
void
-attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *name,
+attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *fullname,
int type, int def, const xmlChar *defaultValue,
xmlEnumerationPtr tree)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlAttributePtr attr;
+ xmlChar *name = NULL, *prefix = NULL;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
- elem, name, type, def, defaultValue);
+ elem, fullname, type, def, defaultValue);
#endif
- attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem,
- name, type, def, defaultValue, tree);
+ name = xmlSplitQName(ctxt, fullname, &prefix);
+ if (ctxt->inSubset == 1)
+ attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem,
+ name, prefix, type, def, defaultValue, tree);
+ else if (ctxt->inSubset == 2)
+ attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, elem,
+ name, prefix, type, def, defaultValue, tree);
+ else {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt,
+ "SAX.attributeDecl(%s) called while not in subset\n", name);
+ return;
+ }
if (attr == 0) ctxt->valid = 0;
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateAttributeDecl(&ctxt->vctxt, ctxt->myDoc,
attr);
+ if (prefix != NULL)
+ xmlFree(prefix);
+ if (name != NULL)
+ xmlFree(name);
}
/**
@@ -367,16 +439,26 @@ elementDecl(void *ctx, const xmlChar *name, int type,
xmlElementContentPtr content)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
- xmlElementPtr elem;
+ xmlElementPtr elem = NULL;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.elementDecl(%s, %d, ...)\n",
- name, type);
+ fullname, type);
#endif
- elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->intSubset,
+ if (ctxt->inSubset == 1)
+ elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->intSubset,
+ name, type, content);
+ else if (ctxt->inSubset == 2)
+ elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->extSubset,
name, type, content);
- if (elem == 0) ctxt->valid = 0;
+ else {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt,
+ "SAX.elementDecl(%s) called while not in subset\n", name);
+ return;
+ }
+ if (elem == NULL) ctxt->valid = 0;
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateElementDecl(&ctxt->vctxt, ctxt->myDoc, elem);
@@ -396,15 +478,25 @@ notationDecl(void *ctx, const xmlChar *name,
const xmlChar *publicId, const xmlChar *systemId)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
- xmlNotationPtr nota;
+ xmlNotationPtr nota = NULL;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.notationDecl(%s, %s, %s)\n", name, publicId, systemId);
#endif
- nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name,
+ if (ctxt->inSubset == 1)
+ nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name,
publicId, systemId);
- if (nota == 0) ctxt->valid = 0;
+ else if (ctxt->inSubset == 2)
+ nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name,
+ publicId, systemId);
+ else {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt,
+ "SAX.notationDecl(%s) called while not in subset\n", name);
+ return;
+ }
+ if (nota == NULL) ctxt->valid = 0;
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateNotationDecl(&ctxt->vctxt, ctxt->myDoc,
@@ -518,6 +610,7 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlAttrPtr ret;
xmlChar *name;
xmlChar *ns;
+ xmlChar *nval;
xmlNsPtr namespace;
/****************
@@ -528,7 +621,15 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
/*
* Split the full name into a namespace prefix and the tag name
*/
- name = xmlSplitQName(fullname, &ns);
+ name = xmlSplitQName(ctxt, fullname, &ns);
+
+ /*
+ * Do the last stave of the attribute normalization
+ */
+ nval = xmlValidNormalizeAttributeValue(ctxt->myDoc,
+ ctxt->node, fullname, value);
+ if (nval != NULL)
+ value = nval;
/*
* Check whether it's a namespace definition
@@ -540,15 +641,28 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlNewNs(ctxt->node, value, NULL);
if (name != NULL)
xmlFree(name);
+ if (nval != NULL)
+ xmlFree(nval);
return;
}
if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
(ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
+ /*
+ * Validate also for namespace decls, they are attributes from
+ * an XML-1.0 perspective
+ TODO ... doesn't map well with current API
+ if (ctxt->validate && ctxt->wellFormed &&
+ ctxt->myDoc && ctxt->myDoc->intSubset)
+ ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
+ ctxt->node, ret, value);
+ */
/* a standard namespace definition */
xmlNewNs(ctxt->node, value, name);
xmlFree(ns);
if (name != NULL)
xmlFree(name);
+ if (nval != NULL)
+ xmlFree(nval);
return;
}
@@ -562,17 +676,52 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
ret = xmlNewNsProp(ctxt->node, namespace, name, NULL);
if (ret != NULL) {
- if ((ctxt->replaceEntities == 0) && (!ctxt->html))
- ret->val = xmlStringGetNodeList(ctxt->myDoc, value);
- else
- ret->val = xmlNewDocText(ctxt->myDoc, value);
+ if ((ctxt->replaceEntities == 0) && (!ctxt->html)) {
+ xmlNodePtr tmp;
+
+ ret->children = xmlStringGetNodeList(ctxt->myDoc, value);
+ tmp = ret->children;
+ while (tmp != NULL) {
+ tmp->parent = (xmlNodePtr) ret;
+ if (tmp->next == NULL)
+ ret->last = tmp;
+ tmp = tmp->next;
+ }
+ } else {
+ ret->children = xmlNewDocText(ctxt->myDoc, value);
+ ret->last = ret->children;
+ if (ret->children != NULL)
+ ret->children->parent = (xmlNodePtr) ret;
+ }
}
if (ctxt->validate && ctxt->wellFormed &&
- ctxt->myDoc && ctxt->myDoc->intSubset)
- ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
+ ctxt->myDoc && ctxt->myDoc->intSubset) {
+
+ /*
+ * If we don't substitute entities, the validation should be
+ * done on a value with replaced entities anyway.
+ */
+ if (!ctxt->replaceEntities) {
+ xmlChar *val;
+
+ ctxt->depth++;
+ val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
+ 0,0,0);
+ ctxt->depth--;
+ if (val == NULL)
+ ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
+ ctxt->myDoc, ctxt->node, ret, value);
+ else {
+ ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
+ ctxt->myDoc, ctxt->node, ret, val);
+ xmlFree(val);
+ }
+ } else {
+ ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
ctxt->node, ret, value);
- else {
+ }
+ } else {
/*
* when validating, the ID registration is done at the attribute
* validation level. Otherwise we have to do specific handling here.
@@ -583,6 +732,8 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
xmlAddRef(&ctxt->vctxt, ctxt->myDoc, value, ret);
}
+ if (nval != NULL)
+ xmlFree(nval);
if (name != NULL)
xmlFree(name);
if (ns != NULL)
@@ -634,7 +785,7 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
/*
* Split the full name into a namespace prefix and the tag name
*/
- name = xmlSplitQName(fullname, &prefix);
+ name = xmlSplitQName(ctxt, fullname, &prefix);
/*
@@ -644,13 +795,13 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
*/
ret = xmlNewDocNode(ctxt->myDoc, NULL, name, NULL);
if (ret == NULL) return;
- if (ctxt->myDoc->root == NULL) {
+ if (ctxt->myDoc->children == NULL) {
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "Setting %s as root\n", name);
#endif
- ctxt->myDoc->root = ret;
+ xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
} else if (parent == NULL) {
- parent = ctxt->myDoc->root;
+ parent = ctxt->myDoc->children;
}
/*
@@ -680,6 +831,15 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
}
/*
+ * If it's the Document root, finish the Dtd validation and
+ * check the document root element for validity
+ */
+ if ((ctxt->validate) && (ctxt->vctxt.finishDtd == 0)) {
+ ctxt->valid &= xmlValidateDtdFinal(&ctxt->vctxt, ctxt->myDoc);
+ ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
+ ctxt->vctxt.finishDtd = 1;
+ }
+ /*
* process all the attributes whose name start with "xml"
*/
if (atts != NULL) {
@@ -790,7 +950,10 @@ reference(void *ctx, const xmlChar *name)
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.reference(%s)\n", name);
#endif
- ret = xmlNewReference(ctxt->myDoc, name);
+ if (name[0] == '#')
+ ret = xmlNewCharRef(ctxt->myDoc, name);
+ else
+ ret = xmlNewReference(ctxt->myDoc, name);
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "add reference %s to %s \n", name, ctxt->node->name);
#endif
@@ -884,30 +1047,34 @@ processingInstruction(void *ctx, const xmlChar *target,
ret = xmlNewPI(target, data);
if (ret == NULL) return;
- ret->doc = ctxt->myDoc;
- if (ctxt->myDoc->root == NULL) {
+ parent = ctxt->node;
+
+ if (ctxt->inSubset == 1) {
+ xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret);
+ return;
+ } else if (ctxt->inSubset == 2) {
+ xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret);
+ return;
+ }
+ if ((ctxt->myDoc->children == NULL) || (parent == NULL)) {
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "Setting PI %s as root\n", target);
#endif
- ctxt->myDoc->root = ret;
- } else if (parent == NULL) {
- parent = ctxt->myDoc->root;
+ xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
+ return;
}
- if (parent != NULL) {
- if (parent->type == XML_ELEMENT_NODE) {
+ if (parent->type == XML_ELEMENT_NODE) {
#ifdef DEBUG_SAX_TREE
- fprintf(stderr, "adding PI child %s to %s\n", target, parent->name);
+ fprintf(stderr, "adding PI %s child to %s\n", target, parent->name);
#endif
- xmlAddChild(parent, ret);
- } else {
+ xmlAddChild(parent, ret);
+ } else {
#ifdef DEBUG_SAX_TREE
- fprintf(stderr, "adding PI sibling %s to ", target);
- xmlDebugDumpOneNode(stderr, parent, 0);
+ fprintf(stderr, "adding PI %s sibling to ", target);
+ xmlDebugDumpOneNode(stderr, parent, 0);
#endif
- xmlAddSibling(parent, ret);
- }
+ xmlAddSibling(parent, ret);
}
-
}
/**
@@ -1064,27 +1231,31 @@ comment(void *ctx, const xmlChar *value)
ret = xmlNewDocComment(ctxt->myDoc, value);
if (ret == NULL) return;
- if (ctxt->myDoc->root == NULL) {
+ if (ctxt->inSubset == 1) {
+ xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret);
+ return;
+ } else if (ctxt->inSubset == 2) {
+ xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret);
+ return;
+ }
+ if ((ctxt->myDoc->children == NULL) || (parent == NULL)) {
#ifdef DEBUG_SAX_TREE
fprintf(stderr, "Setting comment as root\n");
#endif
- ctxt->myDoc->root = ret;
- } else if (parent == NULL) {
- parent = ctxt->myDoc->root;
+ xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
+ return;
}
- if (parent != NULL) {
- if (parent->type == XML_ELEMENT_NODE) {
+ if (parent->type == XML_ELEMENT_NODE) {
#ifdef DEBUG_SAX_TREE
- fprintf(stderr, "adding comment child to %s\n", parent->name);
+ fprintf(stderr, "adding comment child to %s\n", parent->name);
#endif
- xmlAddChild(parent, ret);
- } else {
+ xmlAddChild(parent, ret);
+ } else {
#ifdef DEBUG_SAX_TREE
- fprintf(stderr, "adding comment sibling to ");
- xmlDebugDumpOneNode(stderr, parent, 0);
+ fprintf(stderr, "adding comment sibling to ");
+ xmlDebugDumpOneNode(stderr, parent, 0);
#endif
- xmlAddSibling(parent, ret);
- }
+ xmlAddSibling(parent, ret);
}
}
@@ -1148,6 +1319,7 @@ xmlSAXHandler xmlDefaultSAXHandler = {
xmlParserError,
getParameterEntity,
cdataBlock,
+ externalSubset,
};
/**
@@ -1159,6 +1331,7 @@ void
xmlDefaultSAXHandlerInit(void)
{
xmlDefaultSAXHandler.internalSubset = internalSubset;
+ xmlDefaultSAXHandler.externalSubset = externalSubset;
xmlDefaultSAXHandler.isStandalone = isStandalone;
xmlDefaultSAXHandler.hasInternalSubset = hasInternalSubset;
xmlDefaultSAXHandler.hasExternalSubset = hasExternalSubset;
@@ -1181,7 +1354,10 @@ xmlDefaultSAXHandlerInit(void)
xmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace;
xmlDefaultSAXHandler.processingInstruction = processingInstruction;
xmlDefaultSAXHandler.comment = comment;
- xmlDefaultSAXHandler.warning = xmlParserWarning;
+ if (xmlGetWarningsDefaultValue == 0)
+ xmlDefaultSAXHandler.warning = NULL;
+ else
+ xmlDefaultSAXHandler.warning = xmlParserWarning;
xmlDefaultSAXHandler.error = xmlParserError;
xmlDefaultSAXHandler.fatalError = xmlParserError;
}
@@ -1216,6 +1392,7 @@ xmlSAXHandler htmlDefaultSAXHandler = {
xmlParserError,
getParameterEntity,
NULL,
+ NULL,
};
/**
@@ -1227,6 +1404,7 @@ void
htmlDefaultSAXHandlerInit(void)
{
htmlDefaultSAXHandler.internalSubset = NULL;
+ htmlDefaultSAXHandler.externalSubset = NULL;
htmlDefaultSAXHandler.isStandalone = NULL;
htmlDefaultSAXHandler.hasInternalSubset = NULL;
htmlDefaultSAXHandler.hasExternalSubset = NULL;
8 configure.in
View
@@ -3,9 +3,9 @@ AC_PREREQ(2.2)
AC_INIT(entities.h)
AM_CONFIG_HEADER(config.h)
-LIBXML_MAJOR_VERSION=1
-LIBXML_MINOR_VERSION=8
-LIBXML_MICRO_VERSION=7
+LIBXML_MAJOR_VERSION=2
+LIBXML_MINOR_VERSION=0
+LIBXML_MICRO_VERSION=0
LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
@@ -15,7 +15,7 @@ AC_SUBST(LIBXML_MICRO_VERSION)
AC_SUBST(LIBXML_VERSION)
AC_SUBST(LIBXML_VERSION_INFO)
-VERSION=$LIBXML_VERSION
+VERSION=$LIBXML_VERSION-beta
AM_INIT_AUTOMAKE(libxml, $VERSION)
454 debugXML.c
View
@@ -22,6 +22,7 @@
#include "xmlmemory.h"
#include "tree.h"
#include "parser.h"
+#include "valid.h"
#include "debugXML.h"
#include "HTMLtree.h"
#include "HTMLparser.h"
@@ -38,6 +39,315 @@ void xmlDebugDumpString(FILE *output, const xmlChar *str) {
fprintf(output, "...");
}
+void xmlDebugDumpDtd(FILE *output, xmlDtdPtr dtd, int depth) {
+ int i;
+ char shift[100];
+
+ for (i = 0;((i < depth) && (i < 25));i++)
+ shift[2 * i] = shift[2 * i + 1] = ' ';
+ shift[2 * i] = shift[2 * i + 1] = 0;
+
+ fprintf(output, shift);
+
+ if (dtd->type != XML_DTD_NODE) {
+ fprintf(output, "PBM: not a DTD\n");
+ return;
+ }
+ if (dtd->name != NULL)
+ fprintf(output, "DTD(%s)", dtd->name);
+ else
+ fprintf(output, "DTD");
+ if (dtd->ExternalID != NULL)
+ fprintf(output, ", PUBLIC %s", dtd->ExternalID);
+ if (dtd->SystemID != NULL)
+ fprintf(output, ", SYSTEM %s", dtd->SystemID);
+ fprintf(output, "\n");
+ /*
+ * Do a bit of checking
+ */
+ if (dtd->parent == NULL)
+ fprintf(output, "PBM: Dtd has no parent\n");
+ if (dtd->doc == NULL)
+ fprintf(output, "PBM: Dtd has no doc\n");
+ if ((dtd->parent != NULL) && (dtd->doc != dtd->parent->doc))
+ fprintf(output, "PBM: Dtd doc differs from parent's one\n");
+ if (dtd->prev == NULL) {
+ if ((dtd->parent != NULL) && (dtd->parent->children != (xmlNodePtr)dtd))
+ fprintf(output, "PBM: Dtd has no prev and not first of list\n");
+ } else {
+ if (dtd->prev->next != (xmlNodePtr) dtd)
+ fprintf(output, "PBM: Dtd prev->next : back link wrong\n");
+ }
+ if (dtd->next == NULL) {
+ if ((dtd->parent != NULL) && (dtd->parent->last != (xmlNodePtr) dtd))
+ fprintf(output, "PBM: Dtd has no next and not last of list\n");
+ } else {
+ if (dtd->next->prev != (xmlNodePtr) dtd)
+ fprintf(output, "PBM: Dtd next->prev : forward link wrong\n");
+ }
+}
+
+void xmlDebugDumpAttrDecl(FILE *output, xmlAttributePtr attr, int depth) {
+ int i;
+ char shift[100];
+
+ for (i = 0;((i < depth) && (i < 25));i++)
+ shift[2 * i] = shift[2 * i + 1] = ' ';
+ shift[2 * i] = shift[2 * i + 1] = 0;
+
+ fprintf(output, shift);
+
+ if (attr->type != XML_ATTRIBUTE_DECL) {
+ fprintf(output, "PBM: not a Attr\n");
+ return;
+ }
+ if (attr->name != NULL)
+ fprintf(output, "ATTRDECL(%s)", attr->name);
+ else
+ fprintf(output, "PBM ATTRDECL noname!!!");
+ if (attr->elem != NULL)
+ fprintf(output, " for %s", attr->elem);
+ else
+ fprintf(output, " PBM noelem!!!");
+ switch (attr->atype) {
+ case XML_ATTRIBUTE_CDATA:
+ fprintf(output, " CDATA");
+ break;
+ case XML_ATTRIBUTE_ID:
+ fprintf(output, " ID");
+ break;
+ case XML_ATTRIBUTE_IDREF:
+ fprintf(output, " IDREF");
+ break;
+ case XML_ATTRIBUTE_IDREFS:
+ fprintf(output, " IDREFS");
+ break;
+ case XML_ATTRIBUTE_ENTITY:
+ fprintf(output, " ENTITY");
+ break;
+ case XML_ATTRIBUTE_ENTITIES:
+ fprintf(output, " ENTITIES");
+ break;
+ case XML_ATTRIBUTE_NMTOKEN:
+ fprintf(output, " NMTOKEN");
+ break;
+ case XML_ATTRIBUTE_NMTOKENS:
+ fprintf(output, " NMTOKENS");
+ break;
+ case XML_ATTRIBUTE_ENUMERATION:
+ fprintf(output, " ENUMERATION");
+ break;
+ case XML_ATTRIBUTE_NOTATION:
+ fprintf(output, " NOTATION ");
+ break;
+ }
+ if (attr->tree != NULL) {
+ int i;
+ xmlEnumerationPtr cur = attr->tree;
+
+ for (i = 0;i < 5; i++) {
+ if (i != 0)
+ fprintf(output, "|%s", cur->name);
+ else
+ fprintf(output, " (%s", cur->name);
+ cur = cur->next;
+ if (cur == NULL) break;
+ }
+ if (cur == NULL)
+ fprintf(output, ")");
+ else
+ fprintf(output, "...)");
+ }
+ switch (attr->def) {
+ case XML_ATTRIBUTE_NONE:
+ break;
+ case XML_ATTRIBUTE_REQUIRED:
+ fprintf(output, " REQUIRED");
+ break;
+ case XML_ATTRIBUTE_IMPLIED:
+ fprintf(output, " IMPLIED");
+ break;
+ case XML_ATTRIBUTE_FIXED:
+ fprintf(output, " FIXED");
+ break;
+ }
+ if (attr->defaultValue != NULL) {
+ fprintf(output, "\"");
+ xmlDebugDumpString(output, attr->defaultValue);
+ fprintf(output, "\"");
+ }
+ printf("\n");
+
+ /*
+ * Do a bit of checking
+ */
+ if (attr->parent == NULL)
+ fprintf(output, "PBM: Attr has no parent\n");
+ if (attr->doc == NULL)
+ fprintf(output, "PBM: Attr has no doc\n");
+ if ((attr->parent != NULL) && (attr->doc != attr->parent->doc))
+ fprintf(output, "PBM: Attr doc differs from parent's one\n");
+ if (attr->prev == NULL) {
+ if ((attr->parent != NULL) && (attr->parent->children != (xmlNodePtr)attr))
+ fprintf(output, "PBM: Attr has no prev and not first of list\n");
+ } else {
+ if (attr->prev->next != (xmlNodePtr) attr)
+ fprintf(output, "PBM: Attr prev->next : back link wrong\n");
+ }
+ if (attr->next == NULL) {
+ if ((attr->parent != NULL) && (attr->parent->last != (xmlNodePtr) attr))
+ fprintf(output, "PBM: Attr has no next and not last of list\n");
+ } else {
+ if (attr->next->prev != (xmlNodePtr) attr)
+ fprintf(output, "PBM: Attr next->prev : forward link wrong\n");
+ }
+}
+
+void xmlDebugDumpElemDecl(FILE *output, xmlElementPtr elem, int depth) {
+ int i;
+ char shift[100];
+
+ for (i = 0;((i < depth) && (i < 25));i++)
+ shift[2 * i] = shift[2 * i + 1] = ' ';
+ shift[2 * i] = shift[2 * i + 1] = 0;
+
+ fprintf(output, shift);
+
+ if (elem->type != XML_ELEMENT_DECL) {
+ fprintf(output, "PBM: not a Elem\n");
+ return;
+ }
+ if (elem->name != NULL)
+ fprintf(output, "ELEMDECL(%s)", elem->name);
+ else
+ fprintf(output, "PBM ELEMDECL noname!!!");
+ switch (elem->etype) {
+ case XML_ELEMENT_TYPE_EMPTY:
+ fprintf(output, ", EMPTY");
+ break;
+ case XML_ELEMENT_TYPE_ANY:
+ fprintf(output, ", ANY");
+ break;
+ case XML_ELEMENT_TYPE_MIXED:
+ fprintf(output, ", MIXED ");
+ break;
+ case XML_ELEMENT_TYPE_ELEMENT:
+ fprintf(output, ", MIXED ");
+ break;
+ }
+ if (elem->content != NULL) {
+ char buf[1001];
+
+ buf[0] = 0;
+ xmlSprintfElementContent(buf, elem->content, 1);
+ buf[1000] = 0;
+ fprintf(output, "%s", buf);
+ }
+ printf("\n");
+
+ /*
+ * Do a bit of checking
+ */
+ if (elem->parent == NULL)
+ fprintf(output, "PBM: Elem has no parent\n");
+ if (elem->doc == NULL)
+ fprintf(output, "PBM: Elem has no doc\n");
+ if ((elem->parent != NULL) && (elem->doc != elem->parent->doc))
+ fprintf(output, "PBM: Elem doc differs from parent's one\n");
+ if (elem->prev == NULL) {
+ if ((elem->parent != NULL) && (elem->parent->children != (xmlNodePtr)elem))
+ fprintf(output, "PBM: Elem has no prev and not first of list\n");
+ } else {
+ if (elem->prev->next != (xmlNodePtr) elem)
+ fprintf(output, "PBM: Elem prev->next : back link wrong\n");
+ }
+ if (elem->next == NULL) {
+ if ((elem->parent != NULL) && (elem->parent->last != (xmlNodePtr) elem))
+ fprintf(output, "PBM: Elem has no next and not last of list\n");
+ } else {
+ if (elem->next->prev != (xmlNodePtr) elem)
+ fprintf(output, "PBM: Elem next->prev : forward link wrong\n");
+ }
+}
+
+void xmlDebugDumpEntityDecl(FILE *output, xmlEntityPtr ent, int depth) {
+ int i;
+ char shift[100];
+
+ for (i = 0;((i < depth) && (i < 25));i++)
+ shift[2 * i] = shift[2 * i + 1] = ' ';
+ shift[2 * i] = shift[2 * i + 1] = 0;
+
+ fprintf(output, shift);
+
+ if (ent->type != XML_ENTITY_DECL) {
+ fprintf(output, "PBM: not a Entity decl\n");
+ return;
+ }
+ if (ent->name != NULL)
+ fprintf(output, "ENTITYDECL(%s)", ent->name);
+ else
+ fprintf(output, "PBM ENTITYDECL noname!!!");
+ switch (ent->etype) {
+ case XML_INTERNAL_GENERAL_ENTITY:
+ fprintf(output, ", internal\n");
+ break;
+ case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
+ fprintf(output, ", external parsed\n");
+ break;
+ case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
+ fprintf(output, ", unparsed\n");
+ break;
+ case XML_INTERNAL_PARAMETER_ENTITY:
+ fprintf(output, ", parameter\n");
+ break;
+ case XML_EXTERNAL_PARAMETER_ENTITY:
+ fprintf(output, ", external parameter\n");
+ break;
+ case XML_INTERNAL_PREDEFINED_ENTITY:
+ fprintf(output, ", predefined\n");
+ break;
+ }
+ if (ent->ExternalID) {
+ fprintf(output, shift);
+ fprintf(output, "ExternalID=%s\n", ent->ExternalID);
+ }
+ if (ent->SystemID) {
+ fprintf(output, shift);
+ fprintf(output, "SystemID=%s\n", ent->SystemID);
+ }
+ if (ent->content) {
+ fprintf(output, shift);
+ fprintf(output, "content=");
+ xmlDebugDumpString(output, ent->content);
+ fprintf(output, "\n");
+ }
+
+ /*
+ * Do a bit of checking
+ */
+ if (ent->parent == NULL)
+ fprintf(output, "PBM: Ent has no parent\n");
+ if (ent->doc == NULL)
+ fprintf(output, "PBM: Ent has no doc\n");
+ if ((ent->parent != NULL) && (ent->doc != ent->parent->doc))
+ fprintf(output, "PBM: Ent doc differs from parent's one\n");
+ if (ent->prev == NULL) {
+ if ((ent->parent != NULL) && (ent->parent->children != (xmlNodePtr)ent))
+ fprintf(output, "PBM: Ent has no prev and not first of list\n");
+ } else {
+ if (ent->prev->next != (xmlNodePtr) ent)
+ fprintf(output, "PBM: Ent prev->next : back link wrong\n");
+ }
+ if (ent->next == NULL) {
+ if ((ent->parent != NULL) && (ent->parent->last != (xmlNodePtr) ent))
+ fprintf(output, "PBM: Ent has no next and not last of list\n");
+ } else {
+ if (ent->next->prev != (xmlNodePtr) ent)
+ fprintf(output, "PBM: Ent next->prev : forward link wrong\n");
+ }
+}
+
void xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) {
int i;
char shift[100];
@@ -74,7 +384,7 @@ void xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) {
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
- switch (ent->type) {
+ switch (ent->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
fprintf(output, "INTERNAL_GENERAL_ENTITY ");
break;
@@ -91,7 +401,7 @@ void xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) {
fprintf(output, "EXTERNAL_PARAMETER_ENTITY ");
break;
default:
- fprintf(output, "ENTITY_%d ! ", ent->type);
+ fprintf(output, "ENTITY_%d ! ", ent->etype);
}
fprintf(output, "%s\n", ent->name);
if (ent->ExternalID) {
@@ -119,9 +429,31 @@ void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth) {
shift[2 * i] = shift[2 * i + 1] = 0;
fprintf(output, shift);
+
fprintf(output, "ATTRIBUTE %s\n", attr->name);
- if (attr->val != NULL)
- xmlDebugDumpNodeList(output, attr->val, depth + 1);
+ if (attr->children != NULL)
+ xmlDebugDumpNodeList(output, attr->children, depth + 1);
+
+ /*
+ * Do a bit of checking
+ */
+ if (attr->parent == NULL)
+ fprintf(output, "PBM: Attr has no parent\n");
+ if (attr->doc == NULL)
+ fprintf(output, "PBM: Attr has no doc\n");
+ if ((attr->parent != NULL) && (attr->doc != attr->parent->doc))
+ fprintf(output, "PBM: Attr doc differs from parent's one\n");
+ if (attr->prev == NULL) {
+ if ((attr->parent != NULL) && (attr->parent->properties != attr))
+ fprintf(output, "PBM: Attr has no prev and not first of list\n");
+ } else {
+ if (attr->prev->next != attr)
+ fprintf(output, "PBM: Attr prev->next : back link wrong\n");
+ }
+ if (attr->next != NULL) {
+ if (attr->next->prev != attr)
+ fprintf(output, "PBM: Attr next->prev : forward link wrong\n");
+ }
}
void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth) {
@@ -139,9 +471,9 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
shift[2 * i] = shift[2 * i + 1] = ' ';
shift[2 * i] = shift[2 * i + 1] = 0;
- fprintf(output, shift);
switch (node->type) {
case XML_ELEMENT_NODE:
+ fprintf(output, shift);
fprintf(output, "ELEMENT ");
if (node->ns != NULL)
fprintf(output, "%s:%s\n", node->ns->prefix, node->name);
@@ -149,40 +481,63 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
fprintf(output, "%s\n", node->name);
break;
case XML_ATTRIBUTE_NODE:
+ fprintf(output, shift);
fprintf(output, "Error, ATTRIBUTE found here\n");
break;
case XML_TEXT_NODE:
+ fprintf(output, shift);
fprintf(output, "TEXT\n");
break;
case XML_CDATA_SECTION_NODE:
+ fprintf(output, shift);
fprintf(output, "CDATA_SECTION\n");
break;
case XML_ENTITY_REF_NODE:
- fprintf(output, "ENTITY_REF\n");
+ fprintf(output, shift);
+ fprintf(output, "ENTITY_REF(%s)\n", node->name);
break;
case XML_ENTITY_NODE:
+ fprintf(output, shift);
fprintf(output, "ENTITY\n");
break;
case XML_PI_NODE:
+ fprintf(output, shift);
fprintf(output, "PI %s\n", node->name);
break;
case XML_COMMENT_NODE:
+ fprintf(output, shift);
fprintf(output, "COMMENT\n");
break;
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
+ fprintf(output, shift);
fprintf(output, "Error, DOCUMENT found here\n");
break;
case XML_DOCUMENT_TYPE_NODE:
+ fprintf(output, shift);
fprintf(output, "DOCUMENT_TYPE\n");
break;
case XML_DOCUMENT_FRAG_NODE:
+ fprintf(output, shift);
fprintf(output, "DOCUMENT_FRAG\n");
break;
case XML_NOTATION_NODE:
fprintf(output, "NOTATION\n");
break;
+ case XML_DTD_NODE:
+ xmlDebugDumpDtd(output, (xmlDtdPtr) node, depth);
+ return;
+ case XML_ELEMENT_DECL:
+ xmlDebugDumpElemDecl(output, (xmlElementPtr) node, depth);
+ return;
+ case XML_ATTRIBUTE_DECL:
+ xmlDebugDumpAttrDecl(output, (xmlAttributePtr) node, depth);
+ return;
+ case XML_ENTITY_DECL:
+ xmlDebugDumpEntityDecl(output, (xmlEntityPtr) node, depth);
+ return;
default:
+ fprintf(output, shift);
fprintf(output, "NODE_%d\n", node->type);
}
if (node->doc == NULL) {
@@ -210,12 +565,35 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
if (ent != NULL)
xmlDebugDumpEntity(output, ent, depth + 1);
}
+ /*
+ * Do a bit of checking
+ */
+ if (node->parent == NULL)
+ fprintf(output, "PBM: Node has no parent\n");
+ if (node->doc == NULL)
+ fprintf(output, "PBM: Node has no doc\n");
+ if ((node->parent != NULL) && (node->doc != node->parent->doc))
+ fprintf(output, "PBM: Node doc differs from parent's one\n");
+ if (node->prev == NULL) {
+ if ((node->parent != NULL) && (node->parent->children != node))
+ fprintf(output, "PBM: Node has no prev and not first of list\n");
+ } else {
+ if (node->prev->next != node)
+ fprintf(output, "PBM: Node prev->next : back link wrong\n");
+ }
+ if (node->next == NULL) {
+ if ((node->parent != NULL) && (node->parent->last != node))
+ fprintf(output, "PBM: Node has no next and not last of list\n");
+ } else {
+ if (node->next->prev != node)
+ fprintf(output, "PBM: Node next->prev : forward link wrong\n");
+ }
}
void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth) {
xmlDebugDumpOneNode(output, node, depth);
- if (node->childs != NULL)
- xmlDebugDumpNodeList(output, node->childs, depth + 1);
+ if (node->children != NULL)
+ xmlDebugDumpNodeList(output, node->children, depth + 1);
}
void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth) {
@@ -306,8 +684,8 @@ void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) {
xmlDebugDumpDocumentHead(output, doc);
if (((doc->type == XML_DOCUMENT_NODE) ||
(doc->type == XML_HTML_DOCUMENT_NODE)) &&
- (doc->root != NULL))
- xmlDebugDumpNodeList(output, doc->root, 1);
+ (doc->children != NULL))
+ xmlDebugDumpNodeList(output, doc->children, 1);
}
void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
@@ -368,27 +746,27 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
doc->intSubset->entities;
fprintf(output, "Entities in internal subset\n");
for (i = 0;i < table->nb_entities;i++) {
- cur = &table->table[i];
+ cur = table->table[i];
fprintf(output, "%d : %s : ", i, cur->name);
- switch (cur->type) {
+ switch (cur->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
- fprintf(output, "INTERNAL GENERAL");
+ fprintf(output, "INTERNAL GENERAL, ");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
- fprintf(output, "EXTERNAL PARSED");
+ fprintf(output, "EXTERNAL PARSED, ");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
- fprintf(output, "EXTERNAL UNPARSED");
+ fprintf(output, "EXTERNAL UNPARSED, ");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
- fprintf(output, "INTERNAL PARAMETER");
+ fprintf(output, "INTERNAL PARAMETER, ");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
- fprintf(output, "EXTERNAL PARAMETER");
+ fprintf(output, "EXTERNAL PARAMETER, ");
break;
default:
fprintf(output, "UNKNOWN TYPE %d",
- cur->type);
+ cur->etype);
}
if (cur->ExternalID != NULL)
fprintf(output, "ID \"%s\"", cur->ExternalID);
@@ -407,27 +785,27 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
doc->extSubset->entities;
fprintf(output, "Entities in external subset\n");
for (i = 0;i < table->nb_entities;i++) {
- cur = &table->table[i];
+ cur = table->table[i];
fprintf(output, "%d : %s : ", i, cur->name);
- switch (cur->type) {
+ switch (cur->etype) {
case XML_INTERNAL_GENERAL_ENTITY:
- fprintf(output, "INTERNAL GENERAL");
+ fprintf(output, "INTERNAL GENERAL, ");
break;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
- fprintf(output, "EXTERNAL PARSED");
+ fprintf(output, "EXTERNAL PARSED, ");
break;
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
- fprintf(output, "EXTERNAL UNPARSED");
+ fprintf(output, "EXTERNAL UNPARSED, ");
break;
case XML_INTERNAL_PARAMETER_ENTITY:
- fprintf(output, "INTERNAL PARAMETER");
+ fprintf(output, "INTERNAL PARAMETER, ");
break;
case XML_EXTERNAL_PARAMETER_ENTITY:
- fprintf(output, "EXTERNAL PARAMETER");
+ fprintf(output, "EXTERNAL PARAMETER, ");
break;
default:
fprintf(output, "UNKNOWN TYPE %d",
- cur->type);
+ cur->etype);
}
if (cur->ExternalID != NULL)
fprintf(output, "ID \"%s\"", cur->ExternalID);
@@ -449,14 +827,14 @@ static int xmlLsCountNode(xmlNodePtr node) {
switch (node->type) {
case XML_ELEMENT_NODE:
- list = node->childs;
+ list = node->children;
break;
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
- list = ((xmlDocPtr) node)->root;
+ list = ((xmlDocPtr) node)->children;
break;
case XML_ATTRIBUTE_NODE:
- list = ((xmlAttrPtr) node)->val;
+ list = ((xmlAttrPtr) node)->children;
break;
case XML_TEXT_NODE:
case XML_CDATA_SECTION_NODE:
@@ -475,6 +853,10 @@ static int xmlLsCountNode(xmlNodePtr node) {
case XML_ENTITY_NODE:
case XML_DOCUMENT_FRAG_NODE:
case XML_NOTATION_NODE:
+ case XML_DTD_NODE:
+ case XML_ELEMENT_DECL:
+ case XML_ATTRIBUTE_DECL:
+ case XML_ENTITY_DECL:
ret = 1;
break;
}
@@ -621,9 +1003,9 @@ xmlShellList(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node,
if ((node->type == XML_DOCUMENT_NODE) ||
(node->type == XML_HTML_DOCUMENT_NODE)) {
- cur = ((xmlDocPtr) node)->root;
- } else if (node->childs != NULL) {
- cur = node->childs;
+ cur = ((xmlDocPtr) node)->children;
+ } else if (node->children != NULL) {
+ cur = node->children;
} else {
xmlLsOneNode(stdout, node);
return(0);
@@ -910,10 +1292,10 @@ xmlShellDu(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr tree,
if ((node->type == XML_DOCUMENT_NODE) ||
(node->type == XML_HTML_DOCUMENT_NODE)) {
- node = ((xmlDocPtr) node)->root;
- } else if (node->childs != NULL) {
+ node = ((xmlDocPtr) node)->children;
+ } else if (node->children != NULL) {
/* deep first */
- node = node->childs;
+ node = node->children;
indent++;
} else if ((node != tree) && (node->next != NULL)) {
/* then siblings */
@@ -1008,7 +1390,7 @@ xmlShellPwd(xmlShellCtxtPtr ctxt, char *buffer, xmlNodePtr node,
} else if (cur->type == XML_ATTRIBUTE_NODE) {
sep = '@';
name = (const char *) (((xmlAttrPtr) cur)->name);
- next = ((xmlAttrPtr) cur)->node;
+ next = ((xmlAttrPtr) cur)->parent;
} else {
next = cur->parent;
}
527 encoding.c
View
@@ -35,14 +35,11 @@
#include <stdlib.h>
#endif
#include "encoding.h"
-#ifdef HAVE_UNICODE_H
-#include <unicode.h>
-#endif
#include "xmlmemory.h"
-#ifdef HAVE_UNICODE_H
+xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
+xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
-#else /* ! HAVE_UNICODE_H */
/*
* From rfc2044: encoding of the Unicode values on UTF-8:
*
@@ -55,6 +52,50 @@
*/
/**
+ * xmlCheckUTF8: Check utf-8 string for legality.
+ * @utf: Pointer to putative utf-8 encoded string.
+ *
+ * Checks @utf for being valid utf-8. @utf is assumed to be
+ * null-terminated. This function is not super-strict, as it will
+ * allow longer utf-8 sequences than necessary. Note that Java is
+ * capable of producing these sequences if provoked. Also note, this
+ * routine checks for the 4-byte maxiumum size, but does not check for
+ * 0x10ffff maximum value.
+ *
+ * Return value: true if @utf is valid.
+ **/
+int
+xmlCheckUTF8(const unsigned char *utf)
+{
+ int ix;
+ unsigned char c;
+
+ for (ix = 0; (c = utf[ix]);) {
+ if (c & 0x80) {
+ if ((utf[ix + 1] & 0xc0) != 0x80)
+ return(0);
+ if ((c & 0xe0) == 0xe0) {
+ if ((utf[ix + 2] & 0xc0) != 0x80)
+ return(0);
+ if ((c & 0xf0) == 0xf0) {
+ if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
+ return(0);
+ ix += 4;
+ /* 4-byte code */
+ } else
+ /* 3-byte code */
+ ix += 3;
+ } else
+ /* 2-byte code */
+ ix += 2;
+ } else
+ /* 1-byte code */
+ ix++;
+ }
+ return(1);
+}
+
+/**
* isolat1ToUTF8:
* @out: a pointer to an array of bytes to store the result
* @outlen: the length of @out
@@ -66,27 +107,27 @@
* Returns the number of byte written, or -1 by lack of space.
*/
int
-isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)
-{
+isolat1ToUTF8(unsigned char* out, int outlen,
+ const unsigned char* in, int *inlen) {
unsigned char* outstart= out;
unsigned char* outend= out+outlen;
- unsigned char* inend= in+inlen;
+ const unsigned char* inend= in+*inlen;
unsigned char c;
while (in < inend) {
c= *in++;
if (c < 0x80) {
- if (out >= outend) return -1;
+ if (out >= outend) return(-1);
*out++ = c;
}
else {
- if (out >= outend) return -1;
+ if (out >= outend) return(-1);
*out++ = 0xC0 | (c >> 6);
- if (out >= outend) return -1;
+ if (out >= outend) return(-1);
*out++ = 0x80 | (0x3F & c);
}
}
- return out-outstart;
+ return(out-outstart);
}
/**
@@ -101,138 +142,398 @@ isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)
* TODO: UTF8Toisolat1 need a fallback mechanism ...
*
* Returns the number of byte written, or -1 by lack of space, or -2
- * if the transcoding failed.
+ * if the transcoding fails (for *in is not valid utf8 string or
+ * the result of transformation can't fit into the encoding we want)
+ * The value of @inlen after return is the number of octets consumed
+ * as the return value is positive, else unpredictiable.
*/
int
-UTF8Toisolat1(unsigned char* out, int outlen, unsigned char* in, int inlen)
-{
+UTF8Toisolat1(unsigned char* out, int outlen,
+ const unsigned char* in, int *inlen) {
unsigned char* outstart= out;
unsigned char* outend= out+outlen;
- unsigned char* inend= in+inlen;
+ const unsigned char* inend= in+*inlen;
unsigned char c;
while (in < inend) {
c= *in++;
if (c < 0x80) {
- if (out >= outend) return -1;
+ if (out >= outend) return(-1);
*out++= c;
}
- else if (((c & 0xFE) == 0xC2) && in<inend) {
- if (out >= outend) return -1;
+ else if (in == inend) {
+ *inlen -= 1;
+ break;
+ }
+ else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
+ /* a two byte utf-8 and can be encoding as isolate1 */
*out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
- }
- else return -2;
+ }
+ else
+ return(-2);
+ /* TODO : some should be represent as "&#x____;" */
}
- return out-outstart;
+ return(out-outstart);
}
/**
- * UTF16ToUTF8:
+ * UTF16LEToUTF8:
* @out: a pointer to an array of bytes to store the result
* @outlen: the length of @out
- * @in: a pointer to an array of UTF-16 chars (array of unsigned shorts)
- * @inlen: the length of @in
+ * @inb: a pointer to an array of UTF-16LE passwd as a byte array
+ * @inlenb: the length of @in in UTF-16LE chars
*
- * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
- * block of chars out.
- * Returns the number of byte written, or -1 by lack of space.
+ * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
+ * block of chars out. This function assume the endian properity
+ * is the same between the native type of this machine and the
+ * inputed one.
+ *
+ * Returns the number of byte written, or -1 by lack of space, or -2
+ * if the transcoding fails (for *in is not valid utf16 string)
+ * The value of *inlen after return is the number of octets consumed
+ * as the return value is positive, else unpredictiable.
*/
int
-UTF16ToUTF8(unsigned char* out, int outlen, unsigned short* in, int inlen)
+UTF16LEToUTF8(unsigned char* out, int outlen,
+ const unsigned char* inb, int *inlenb)
{
unsigned char* outstart= out;
unsigned char* outend= out+outlen;
- unsigned short* inend= in+inlen;
- unsigned int c, d;
+ unsigned short* in = (unsigned short*) inb;
+ unsigned short* inend;
+ unsigned int c, d, inlen;
+ unsigned char *tmp;
int bits;
+ if ((*inlenb % 2) == 1)
+ (*inlenb)--;
+ inlen = *inlenb / 2;
+ inend= in + inlen;
while (in < inend) {
+#ifdef BIG_ENDIAN
+ tmp = (unsigned char *) in;
+ c = *tmp++;
+ c = c | (((unsigned int)*tmp) << 8);
+ in++;
+#else /* BIG_ENDIAN */
c= *in++;
+#endif /* BIG_ENDIAN */
if ((c & 0xFC00) == 0xD800) { /* surrogates */
- if ((in<inend) && (((d=*in++) & 0xFC00) == 0xDC00)) {
+ if (in >= inend) { /* (in > inend) shouldn't happens */
+ (*inlenb) -= 2;
+ break;
+ }
+#ifdef BIG_ENDIAN
+ tmp = (unsigned char *) in;
+ d = *tmp++;
+ d = d | (((unsigned int)*tmp) << 8);
+ in++;
+#else /* BIG_ENDIAN */
+ d = *in++;
+#endif /* BIG_ENDIAN */
+ if ((d & 0xFC00) == 0xDC00) {
c &= 0x03FF;
c <<= 10;
c |= d & 0x03FF;
c += 0x10000;
}
- else return -1;
+ else
+ return(-2);
+ }
+
+ /* assertion: c is a single UTF-4 value */
+ if (out >= outend)
+ return(-1);
+ if (c < 0x80) { *out++= c; bits= -6; }
+ else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
+ else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
+ else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
+
+ for ( ; bits >= 0; bits-= 6) {
+ if (out >= outend)
+ return(-1);
+ *out++= ((c >> bits) & 0x3F) | 0x80;
}
+ }
+ return(out-outstart);
+}
+
+/**
+ * UTF8ToUTF16LE:
+ * @outb: a pointer to an array of bytes to store the result
+ * @outlen: the length of @outb
+ * @in: a pointer to an array of UTF-8 chars
+ * @inlen: the length of @in
+ *
+ * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
+ * block of chars out.
+ * TODO: UTF8ToUTF16LE need a fallback mechanism ...
+ *
+ * Returns the number of byte written, or -1 by lack of space, or -2
+ * if the transcoding failed.
+ */
+int
+UTF8ToUTF16LE(unsigned char* outb, int outlen,
+ const unsigned char* in, int *inlen)
+{
+ unsigned short* out = (unsigned short*) outb;
+ unsigned short* outstart= out;
+ unsigned short* outend;
+ const unsigned char* inend= in+*inlen;
+ unsigned int c, d, trailing;
+#ifdef BIG_ENDIAN
+ unsigned char *tmp;
+ unsigned short tmp1, tmp2;
+#endif /* BIG_ENDIAN */
+
+ outlen /= 2; /* convert in short length */
+ outend = out + outlen;
+ while (in < inend) {
+ d= *in++;
+ if (d < 0x80) { c= d; trailing= 0; }
+ else if (d < 0xC0)
+ return(-2); /* trailing byte in leading position */
+ else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
+ else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
+ else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
+ else
+ return(-2); /* no chance for this in UTF-16 */
+
+ if (inend - in < trailing) {
+ *inlen -= (inend - in);
+ break;
+ }
+
+ for ( ; trailing; trailing--) {
+ if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
+ return(-1);
+ c <<= 6;
+ c |= d & 0x3F;
+ }
/* assertion: c is a single UTF-4 value */
+ if (c < 0x10000) {
+ if (out >= outend)
+ return(-1);
+#ifdef BIG_ENDIAN
+ tmp = (unsigned char *) out;
+ *tmp = c ;
+ *(tmp + 1) = c >> 8 ;
+ out++;
+#else /* BIG_ENDIAN */
+ *out++ = c;
+#endif /* BIG_ENDIAN */
+ }
+ else if (c < 0x110000) {
+ if (out+1 >= outend)
+ return(-1);
+ c -= 0x10000;
+#ifdef BIG_ENDIAN
+ tmp1 = 0xD800 | (c >> 10);
+ tmp = (unsigned char *) out;
+ *tmp = tmp1;
+ *(tmp + 1) = tmp1 >> 8;
+ out++;
+
+ tmp2 = 0xDC00 | (c & 0x03FF);
+ tmp = (unsigned char *) out;
+ *tmp = tmp2;
+ *(tmp + 1) = tmp2 >> 8;
+ out++;
+#else /* BIG_ENDIAN */
+ *out++ = 0xD800 | (c >> 10);
+ *out++ = 0xDC00 | (c & 0x03FF);
+#endif /* BIG_ENDIAN */
+ }
+ else
+ return(-1);
+ }
+ return(out-outstart);
+}
- if (out >= outend) return -1;
+/**
+ * UTF16BEToUTF8:
+ * @out: a pointer to an array of bytes to store the result
+ * @outlen: the length of @out
+ * @inb: a pointer to an array of UTF-16 passwd as a byte array
+ * @inlenb: the length of @in in UTF-16 chars
+ *
+ * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
+ * block of chars out. This function assume the endian properity
+ * is the same between the native type of this machine and the
+ * inputed one.
+ *
+ * Returns the number of byte written, or -1 by lack of space, or -2
+ * if the transcoding fails (for *in is not valid utf16 string)
+ * The value of *inlen after return is the number of octets consumed
+ * as the return value is positive, else unpredictiable.
+ */
+int
+UTF16BEToUTF8(unsigned char* out, int outlen,
+ const unsigned char* inb, int *inlenb)
+{
+ unsigned char* outstart= out;
+ unsigned char* outend= out+outlen;
+ unsigned short* in = (unsigned short*) inb;
+ unsigned short* inend;
+ unsigned int c, d, inlen;
+#ifdef BIG_ENDIAN
+#else /* BIG_ENDIAN */
+ unsigned char *tmp;
+#endif /* BIG_ENDIAN */
+ int bits;
+
+ if ((*inlenb % 2) == 1)
+ (*inlenb)--;
+ inlen = *inlenb / 2;
+ inend= in + inlen;
+ while (in < inend) {
+#ifdef BIG_ENDIAN
+ c= *in++;
+#else
+ tmp = (unsigned char *) in;
+ c = *tmp++;
+ c = c << 8;
+ c = c | (unsigned int) *tmp;
+ in++;
+#endif
+ if ((c & 0xFC00) == 0xD800) { /* surrogates */
+ if (in >= inend) { /* (in > inend) shouldn't happens */
+ (*inlenb) -= 2;
+ break;
+ }
+
+#ifdef BIG_ENDIAN
+ d= *in++;
+#else
+ tmp = (unsigned char *) in;
+ d = *tmp++;
+ d = d << 8;
+ d = d | (unsigned int) *tmp;
+ in++;
+#endif
+ if ((d & 0xFC00) == 0xDC00) {
+ c &= 0x03FF;
+ c <<= 10;
+ c |= d & 0x03FF;
+ c += 0x10000;
+ }
+ else
+ return(-2);
+ }
+
+ /* assertion: c is a single UTF-4 value */
+ if (out >= outend)
+ return(-1);
if (c < 0x80) { *out++= c; bits= -6; }
- else if (c < 0x800) { *out++= (c >> 6) | 0xC0; bits= 0; }
- else if (c < 0x10000) { *out++= (c >> 12) | 0xE0; bits= 6; }
- else { *out++= (c >> 18) | 0xF0; bits= 12; }
+ else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
+ else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
+ else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
- for ( ; bits > 0; bits-= 6) {
- if (out >= outend) return -1;
- *out++= (c >> bits) & 0x3F;
+ for ( ; bits >= 0; bits-= 6) {
+ if (out >= outend)
+ return(-1);
+ *out++= ((c >> bits) & 0x3F) | 0x80;
}
}
- return out-outstart;
+ return(out-outstart);
}
/**
- * UTF8ToUTF16:
- * @out: a pointer to an array of shorts to store the result
- * @outlen: the length of @out (number of shorts)
+ * UTF8ToUTF16BE:
+ * @outb: a pointer to an array of bytes to store the result
+ * @outlen: the length of @outb
* @in: a pointer to an array of UTF-8 chars
* @inlen: the length of @in
*
- * Take a block of UTF-8 chars in and try to convert it to an UTF-16
+ * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
* block of chars out.
- * TODO: UTF8ToUTF16 need a fallback mechanism ...
+ * TODO: UTF8ToUTF16BE need a fallback mechanism ...
*
* Returns the number of byte written, or -1 by lack of space, or -2
- * if the transcoding failed.
+ * if the transcoding failed.
*/
int
-UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen)
+UTF8ToUTF16BE(unsigned char* outb, int outlen,
+ const unsigned char* in, int *inlen)
{
+ unsigned short* out = (unsigned short*) outb;
unsigned short* outstart= out;
- unsigned short* outend= out+outlen;
- unsigned char* inend= in+inlen;
+ unsigned short* outend;
+ const unsigned char* inend= in+*inlen;
unsigned int c, d, trailing;
+#ifdef BIG_ENDIAN
+#else
+ unsigned char *tmp;
+ unsigned short tmp1, tmp2;
+#endif /* BIG_ENDIAN */
+ outlen /= 2; /* convert in short length */
+ outend = out + outlen;
while (in < inend) {
d= *in++;
if (d < 0x80) { c= d; trailing= 0; }
- else if (d < 0xC0) return -2; /* trailing byte in leading position */
+ else if (d < 0xC0)
+ return(-2); /* trailing byte in leading position */
else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
- else return -2; /* no chance for this in UTF-16 */
+ else
+ return(-2); /* no chance for this in UTF-16 */
+
+ if (inend - in < trailing) {
+ *inlen -= (inend - in);
+ break;
+ }
for ( ; trailing; trailing--) {
- if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) return -1;
+ if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) return(-1);
c <<= 6;
c |= d & 0x3F;
}
/* assertion: c is a single UTF-4 value */
if (c < 0x10000) {
- if (out >= outend) return -1;
+ if (out >= outend) return(-1);
+#ifdef BIG_ENDIAN
*out++ = c;
+#else
+ tmp = (unsigned char *) out;
+ *tmp = c >> 8;
+ *(tmp + 1) = c;
+ out++;
+#endif /* BIG_ENDIAN */
}
else if (c < 0x110000) {
- if (out+1 >= outend) return -1;
+ if (out+1 >= outend) return(-1);
c -= 0x10000;
+#ifdef BIG_ENDIAN
*out++ = 0xD800 | (c >> 10);
*out++ = 0xDC00 | (c & 0x03FF);
+#else
+ tmp1 = 0xD800 | (c >> 10);
+ tmp = (unsigned char *) out;
+ *tmp = tmp1 >> 8;
+ *(tmp + 1) = tmp1;
+ out++;
+
+ tmp2 = 0xDC00 | (c & 0x03FF);
+ tmp = (unsigned char *) out;
+ *tmp = tmp2 >> 8;
+ *(tmp + 1) = tmp2;
+ out++;
+#endif
}
- else return -1;
+ else return(-1);
}
- return out-outstart;
+ return(out-outstart);
}
-#endif /* ! HAVE_UNICODE_H */
-
/**
* xmlDetectCharEncoding:
* @in: a pointer to the first bytes of the XML entity, must be at least
* 4 bytes long.
+ * @len: pointer to the length of the buffer
*
* Guess the encoding of the entity using the first bytes of the entity content
* accordingly of the non-normative appendix F of the XML-1.0 recommendation.
@@ -240,30 +541,34 @@ UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen)
* Returns one of the XML_CHAR_ENCODING_... values.
*/
xmlCharEncoding
-xmlDetectCharEncoding(const unsigned char* in)
+xmlDetectCharEncoding(const unsigned char* in, int len)
{
- if ((in[0] == 0x00) && (in[1] == 0x00) &&
- (in[2] == 0x00) && (in[3] == 0x3C))
- return(XML_CHAR_ENCODING_UCS4BE);
- if ((in[0] == 0x3C) && (in[1] == 0x00) &&
- (in[2] == 0x00) && (in[3] == 0x00))
- return(XML_CHAR_ENCODING_UCS4LE);
- if ((in[0] == 0x00) && (in[1] == 0x00) &&
- (in[2] == 0x3C) && (in[3] == 0x00))
- return(XML_CHAR_ENCODING_UCS4_2143);
- if ((in[0] == 0x00) && (in[1] == 0x3C) &&
- (in[2] == 0x00) && (in[3] == 0x00))
- return(XML_CHAR_ENCODING_UCS4_3412);
- if ((in[0] == 0xFE) && (in[1] == 0xFF))
- return(XML_CHAR_ENCODING_UTF16BE);
- if ((in[0] == 0xFF) && (in[1] == 0xFE))
- return(XML_CHAR_ENCODING_UTF16LE);
- if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
- (in[2] == 0xA7) && (in[3] == 0x94))
- return(XML_CHAR_ENCODING_EBCDIC);
- if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
- (in[2] == 0x78) && (in[3] == 0x6D))
- return(XML_CHAR_ENCODING_UTF8);
+ if (len >= 4) {
+ if ((in[0] == 0x00) && (in[1] == 0x00) &&
+ (in[2] == 0x00) && (in[3] == 0x3C))
+ return(XML_CHAR_ENCODING_UCS4BE);
+ if ((in[0] == 0x3C) && (in[1] == 0x00) &&
+ (in[2] == 0x00) && (in[3] == 0x00))
+ return(XML_CHAR_ENCODING_UCS4LE);
+ if ((in[0] == 0x00) && (in[1] == 0x00) &&
+ (in[2] == 0x3C) && (in[3] == 0x00))
+ return(XML_CHAR_ENCODING_UCS4_2143);
+ if ((in[0] == 0x00) && (in[1] == 0x3C) &&
+ (in[2] == 0x00) && (in[3] == 0x00))
+ return(XML_CHAR_ENCODING_UCS4_3412);
+ if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
+ (in[2] == 0xA7) && (in[3] == 0x94))
+ return(XML_CHAR_ENCODING_EBCDIC);
+ if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
+ (in[2] == 0x78) && (in[3] == 0x6D))
+ return(XML_CHAR_ENCODING_UTF8);
+ }
+ if (len >= 2) {
+ if ((in[0] == 0xFE) && (in[1] == 0xFF))
+ return(XML_CHAR_ENCODING_UTF16BE);
+ if ((in[0] == 0xFF) && (in[1] == 0xFE))
+ return(XML_CHAR_ENCODING_UTF16LE);
+ }
return(XML_CHAR_ENCODING_NONE);
}
@@ -364,7 +669,8 @@ static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
* Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
*/
xmlCharEncodingHandlerPtr
-xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input,
+xmlNewCharEncodingHandler(const char *name,
+ xmlCharEncodingInputFunc input,
xmlCharEncodingOutputFunc output) {
xmlCharEncodingHandlerPtr handler;
char upper[500];
@@ -429,11 +735,11 @@ xmlInitCharEncodingHandlers(void) {
return;
}
xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
-#ifdef HAVE_UNICODE_H
-#else
- /* xmlNewCharEncodingHandler("UTF-16", UTF16ToUTF8, UTF8ToUTF16); */
+ xmlUTF16LEHandler =
+ xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
+ xmlUTF16BEHandler =
+ xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
-#endif
}
/**
@@ -493,7 +799,52 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc) {
if (handlers == NULL) xmlInitCharEncodingHandlers();
- /* TODO xmlGetCharEncodingHandler !!!!!!! */
+ switch (enc) {
+ case XML_CHAR_ENCODING_ERROR:
+ return(NULL);
+ case XML_CHAR_ENCODING_NONE:
+ return(NULL);
+ case XML_CHAR_ENCODING_UTF8:
+ return(NULL);
+ case XML_CHAR_ENCODING_UTF16LE:
+ return(xmlUTF16LEHandler);
+ case XML_CHAR_ENCODING_UTF16BE:
+ return(xmlUTF16BEHandler);
+ case XML_CHAR_ENCODING_EBCDIC:
+ return(NULL);
+ case XML_CHAR_ENCODING_UCS4LE:
+ return(NULL);
+ case XML_CHAR_ENCODING_UCS4BE:
+ return(NULL);
+ case XML_CHAR_ENCODING_UCS4_2143:
+ return(NULL);
+ case XML_CHAR_ENCODING_UCS4_3412:
+ return(NULL);
+ case XML_CHAR_ENCODING_UCS2:
+ return(NULL);
+ case XML_CHAR_ENCODING_8859_1:
+ return(NULL);
+ case XML_CHAR_ENCODING_8859_2:
+ return(NULL);
+ case XML_CHAR_ENCODING_8859_3:
+ return(NULL);
+ case XML_CHAR_ENCODING_8859_4:
+ return(NULL);
+ case XML_CHAR_ENCODING_8859_5:
+ return(NULL);
+ case XML_CHAR_ENCODING_8859_6:
+ return(NULL);
+ case XML_CHAR_ENCODING_8859_7:
+ return(NULL);
+ case XML_CHAR_ENCODING_8859_8:
+ return(NULL);
+ case XML_CHAR_ENCODING_8859_9:
+ return(NULL);
+ case XML_CHAR_ENCODING_2022_JP:
+ case XML_CHAR_ENCODING_SHIFT_JIS:
+ case XML_CHAR_ENCODING_EUC_JP:
+ return(NULL);
+ }
return(NULL);
}
10 encoding.h
View
@@ -67,11 +67,11 @@ typedef enum {
* Returns the number of byte written, or -1 by lack of space.
*/
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
- unsigned char* in, int inlen);
+ const unsigned char* in, int *inlen);
/**
- * xmlCharEncodingInputFunc:
+ * xmlCharEncodingOutputFunc:
* @out: a pointer ot an array of bytes to store the result
* @outlen: the lenght of @out
* @in: a pointer ot an array of UTF-8 chars
@@ -84,7 +84,7 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
* if the transcoding failed.
*/
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
- unsigned char* in, int inlen);
+ const unsigned char* in, int *inlen);
/*
* Block defining the handlers for non UTF-8 encodings.
@@ -101,10 +101,12 @@ struct _xmlCharEncodingHandler {
void xmlInitCharEncodingHandlers (void);
void xmlCleanupCharEncodingHandlers (void);
void xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);
-xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in);
+xmlCharEncoding xmlDetectCharEncoding (const unsigned char* in,
+ int len);
xmlCharEncoding xmlParseCharEncoding (const char* name);
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
+int xmlCheckUTF8 (const unsigned char *utf);
#ifdef __cplusplus
628 entities.c
View
@@ -21,6 +21,8 @@
#include "entities.h"
#include "parser.h"
+#define DEBUG_ENT_REF /* debugging of cross entities dependancies */
+
/*
* The XML predefined entities.
*/
@@ -45,6 +47,8 @@ xmlEntitiesTablePtr xmlPredefinedEntities = NULL;
void xmlFreeEntity(xmlEntityPtr entity) {
if (entity == NULL) return;
+ if (entity->children)
+ xmlFreeNodeList(entity->children);
if (entity->name != NULL)
xmlFree((char *) entity->name);
if (entity->ExternalID != NULL)
@@ -55,22 +59,31 @@ void xmlFreeEntity(xmlEntityPtr entity) {
xmlFree((char *) entity->content);
if (entity->orig != NULL)
xmlFree((char *) entity->orig);
+#ifdef WITH_EXTRA_ENT_DETECT
+ if (entity->entTab != NULL) {
+ int i;
+
+ for (i = 0; i < entity->entNr; i++)
+ xmlFree(entity->entTab[i]);
+ xmlFree(entity->entTab);
+ }
+#endif
memset(entity, -1, sizeof(xmlEntity));
+ xmlFree(entity);
}
/*
* xmlAddEntity : register a new entity for an entities table.
*/
-static void
+static xmlEntityPtr
xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) {
int i;
- xmlEntityPtr cur;
- int len;
+ xmlEntityPtr ret;
for (i = 0;i < table->nb_entities;i++) {
- cur = &table->table[i];
- if (!xmlStrcmp(cur->name, name)) {
+ ret = table->table[i];
+ if (!xmlStrcmp(ret->name, name)) {
/*
* The entity is already defined in this Dtd, the spec says to NOT
* override it ... Is it worth a Warning ??? !!!
@@ -78,15 +91,15 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
*/
if (((type == XML_INTERNAL_PARAMETER_ENTITY) ||
(type == XML_EXTERNAL_PARAMETER_ENTITY)) &&
- ((cur->type == XML_INTERNAL_PARAMETER_ENTITY) ||
- (cur->type == XML_EXTERNAL_PARAMETER_ENTITY)))
- return;
+ ((ret->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
+ (ret->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
+ return(NULL);
else
if (((type != XML_INTERNAL_PARAMETER_ENTITY) &&
(type != XML_EXTERNAL_PARAMETER_ENTITY)) &&
- ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) &&
- (cur->type != XML_EXTERNAL_PARAMETER_ENTITY)))
- return;
+ ((ret->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
+ (ret->etype != XML_EXTERNAL_PARAMETER_ENTITY)))
+ return(NULL);
}
}
if (table->nb_entities >= table->max_entities) {
@@ -94,35 +107,43 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
* need more elements.
*/
table->max_entities *= 2;
- table->table =