Permalink
Browse files

Rewrite/cleanup/tests of URI normalization:

- uri.c: rewrite of xmlNormalizeURIPath from Paul D. Smith
- test/URI/smith.uri result/URI/smith.uri Makefile.am:
  added the new tests for URI normalization
- testURI.c: fixed stoopid bugs
- result/VC/OneID3 result/VC/UniqueElementTypeDeclaration:
  the URI in the error messages are now properly normalized
Daniel
  • Loading branch information...
1 parent ea28ce6 commit b6e7fdbac64423e63474fbb4b95770209f280797 Daniel Veillard committed Feb 2, 2001
Showing with 259 additions and 26 deletions.
  1. +9 −0 ChangeLog
  2. +13 −0 Makefile.am
  3. +22 −18 aclocal.m4
  4. +15 −0 result/URI/smith.uri
  5. +1 −1 result/VC/OneID3
  6. +1 −1 result/VC/UniqueElementTypeDeclaration
  7. +15 −0 test/URI/smith.uri
  8. +4 −6 testURI.c
  9. +179 −0 uri.c
View
@@ -1,3 +1,12 @@
+Fri Feb 2 18:04:35 CET 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
+
+ * uri.c: rewrite of xmlNormalizeURIPath from Paul D. Smith
+ * test/URI/smith.uri result/URI/smith.uri Makefile.am:
+ added the new tests for URI normalization
+ * testURI.c: fixed stoopid bugs
+ * result/VC/OneID3 result/VC/UniqueElementTypeDeclaration:
+ the URI in the error messages are now properly normalized
+
Fri Feb 2 09:18:53 CET 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* uri.c: applied Marc Sanfacon's patch for xmlNormalizeURIPath
View
@@ -237,6 +237,19 @@ URItests : testURI
diff $(srcdir)/result/URI/$$name result.$$name ; \
rm result.$$name ; \
fi ; fi ; done)
+ @(for i in $(srcdir)/test/URI/*.uri ; do \
+ name=`basename $$i`; \
+ if [ ! -d $$i ] ; then \
+ if [ ! -f $(srcdir)/result/URI/$$name ] ; then \
+ echo New test file $$name ; \
+ $(top_builddir)/testURI < $$i > $(srcdir)/result/URI/$$name ; \
+ else \
+ echo Testing $$name ; \
+ $(top_builddir)/testURI < $$i > result.$$name ; \
+ grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
+ diff $(srcdir)/result/URI/$$name result.$$name ; \
+ rm result.$$name ; \
+ fi ; fi ; done)
XPathtests : testXPath
@echo "##"
View
@@ -620,31 +620,35 @@ esac
])
# AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for
-# the libltdl convenience library, adds --enable-ltdl-convenience to
-# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor
-# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed
-# to be `${top_builddir}/libltdl'. Make sure you start DIR with
-# '${top_builddir}/' (note the single quotes!) if your package is not
-# flat, and, if you're not using automake, define top_builddir as
-# appropriate in the Makefiles.
+# the libltdl convenience library and INCLTDL to the include flags for
+# the libltdl header and adds --enable-ltdl-convenience to the
+# configure arguments. Note that LIBLTDL and INCLTDL are not
+# AC_SUBSTed, nor is AC_CONFIG_SUBDIRS called. If DIR is not
+# provided, it is assumed to be `libltdl'. LIBLTDL will be prefixed
+# with '${top_builddir}/' and INCLTDL will be prefixed with
+# '${top_srcdir}/' (note the single quotes!). If your package is not
+# flat and you're not using automake, define top_builddir and
+# top_srcdir appropriately in the Makefiles.
AC_DEFUN(AC_LIBLTDL_CONVENIENCE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
case "$enable_ltdl_convenience" in
no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;;
"") enable_ltdl_convenience=yes
ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;;
esac
- LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdlc.la
- INCLTDL=ifelse($#,1,-I$1,['-I${top_builddir}/libltdl'])
+ LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la
+ INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
])
# AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for
-# the libltdl installable library, and adds --enable-ltdl-install to
-# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor
-# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed
-# to be `${top_builddir}/libltdl'. Make sure you start DIR with
-# '${top_builddir}/' (note the single quotes!) if your package is not
-# flat, and, if you're not using automake, define top_builddir as
-# appropriate in the Makefiles.
+# the libltdl installable library and INCLTDL to the include flags for
+# the libltdl header and adds --enable-ltdl-install to the configure
+# arguments. Note that LIBLTDL and INCLTDL are not AC_SUBSTed, nor is
+# AC_CONFIG_SUBDIRS called. If DIR is not provided and an installed
+# libltdl is not found, it is assumed to be `libltdl'. LIBLTDL will
+# be prefixed with '${top_builddir}/' and INCLTDL will be prefixed
+# with '${top_srcdir}/' (note the single quotes!). If your package is
+# not flat and you're not using automake, define top_builddir and
+# top_srcdir appropriately in the Makefiles.
# In the future, this macro may have to be called after AC_PROG_LIBTOOL.
AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
AC_CHECK_LIB(ltdl, main,
@@ -657,8 +661,8 @@ AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
])
if test x"$enable_ltdl_install" = x"yes"; then
ac_configure_args="$ac_configure_args --enable-ltdl-install"
- LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdl.la
- INCLTDL=ifelse($#,1,-I$1,['-I${top_builddir}/libltdl'])
+ LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la
+ INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
else
ac_configure_args="$ac_configure_args --enable-ltdl-install=no"
LIBLTDL="-lltdl"
View
@@ -0,0 +1,15 @@
+/bar
+bar
+bar
+bar
+baz
+
+foo/
+foo
+foo
+../foo./
+../foo/
+/foo
+../foo
+../../foo
+../../../foo
View
@@ -1,3 +1,3 @@
-./test/VC/dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val
+test/VC/dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val
<!ATTLIST doc val ID #IMPLIED>
^
@@ -1,3 +1,3 @@
-./test/VC/dtds/a.dtd:1: validity error: Redefinition of element a
+test/VC/dtds/a.dtd:1: validity error: Redefinition of element a
<!ELEMENT a (#PCDATA | b | c)*>
^
View
@@ -0,0 +1,15 @@
+/foo/../bar
+foo/../bar
+./foo/../bar
+foo/./../bar
+foo/bar/.././../baz
+foo/..
+foo/bar/..
+./foo
+././foo
+.././foo./
+.././foo/.
+/foo
+../foo
+../../foo
+../../../foo
View
@@ -27,11 +27,8 @@ int main(int argc, char **argv) {
const char *base = NULL;
xmlChar *composite;
- if (argv[arg] == NULL) {
- printf("Usage: %s [-base URI] URI ...\n", argv[0]);
- exit(0);
- }
- if ((!strcmp(argv[arg], "-base")) || (!strcmp(argv[arg], "--base"))) {
+ if ((argv[arg] != NULL) &&
+ ((!strcmp(argv[arg], "-base")) || (!strcmp(argv[arg], "--base")))) {
arg++;
base = argv[arg];
if (base != NULL)
@@ -64,6 +61,7 @@ int main(int argc, char **argv) {
if (ret != 0)
printf("%s : error %d\n", str, ret);
else {
+ xmlNormalizeURIPath(uri->path);
xmlPrintURI(stdout, uri);
printf("\n");
}
@@ -99,5 +97,5 @@ int main(int argc, char **argv) {
}
xmlFreeURI(uri);
xmlMemoryDump();
- exit(0);
+ return(0);
}
View
@@ -606,6 +606,7 @@ xmlFreeURI(xmlURIPtr uri) {
* *
************************************************************************/
+#if 0
/**
* xmlNormalizeURIPath:
* @path: pointer to the path string
@@ -739,6 +740,184 @@ xmlNormalizeURIPath(char *path) {
}
return(0);
}
+#else
+/**
+ * xmlNormalizeURIPath:
+ * @path: pointer to the path string
+ *
+ * Applies the 5 normalization steps to a path string--that is, RFC 2396
+ * Section 5.2, steps 6.c through 6.g.
+ *
+ * Normalization occurs directly on the string, no new allocation is done
+ *
+ * Returns 0 or an error code
+ */
+int
+xmlNormalizeURIPath(char *path) {
+ char *cur, *out;
+
+ if (path == NULL)
+ return(-1);
+
+ /* Skip all initial "/" chars. We want to get to the beginning of the
+ * first non-empty segment.
+ */
+ cur = path;
+ while (cur[0] == '/')
+ ++cur;
+ if (cur[0] == '\0')
+ return(0);
+
+ /* Keep everything we've seen so far. */
+ out = cur;
+
+ /*
+ * Analyze each segment in sequence for cases (c) and (d).
+ */
+ while (cur[0] != '\0') {
+ /*
+ * c) All occurrences of "./", where "." is a complete path segment,
+ * are removed from the buffer string.
+ */
+ if ((cur[0] == '.') && (cur[1] == '/')) {
+ cur += 2;
+ continue;
+ }
+
+ /*
+ * d) If the buffer string ends with "." as a complete path segment,
+ * that "." is removed.
+ */
+ if ((cur[0] == '.') && (cur[1] == '\0'))
+ break;
+
+ /* Otherwise keep the segment. */
+ while (cur[0] != '/') {
+ if (cur[0] == '\0')
+ goto done_cd;
+ (out++)[0] = (cur++)[0];
+ }
+ (out++)[0] = (cur++)[0];
+ }
+ done_cd:
+ out[0] = '\0';
+
+ /* Reset to the beginning of the first segment for the next sequence. */
+ cur = path;
+ while (cur[0] == '/')
+ ++cur;
+ if (cur[0] == '\0')
+ return(0);
+
+ /*
+ * Analyze each segment in sequence for cases (e) and (f).
+ *
+ * e) All occurrences of "<segment>/../", where <segment> is a
+ * complete path segment not equal to "..", are removed from the
+ * buffer string. Removal of these path segments is performed
+ * iteratively, removing the leftmost matching pattern on each
+ * iteration, until no matching pattern remains.
+ *
+ * f) If the buffer string ends with "<segment>/..", where <segment>
+ * is a complete path segment not equal to "..", that
+ * "<segment>/.." is removed.
+ *
+ * To satisfy the "iterative" clause in (e), we need to collapse the
+ * string every time we find something that needs to be removed. Thus,
+ * we don't need to keep two pointers into the string: we only need a
+ * "current position" pointer.
+ */
+ while (1) {
+ char *segp;
+
+ /* At the beginning of each iteration of this loop, "cur" points to
+ * the first character of the segment we want to examine.
+ */
+
+ /* Find the end of the current segment. */
+ segp = cur;
+ while ((segp[0] != '/') && (segp[0] != '\0'))
+ ++segp;
+
+ /* If this is the last segment, we're done (we need at least two
+ * segments to meet the criteria for the (e) and (f) cases).
+ */
+ if (segp[0] == '\0')
+ break;
+
+ /* If the first segment is "..", or if the next segment _isn't_ "..",
+ * keep this segment and try the next one.
+ */
+ ++segp;
+ if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
+ || ((segp[0] != '.') || (segp[1] != '.')
+ || ((segp[2] != '/') && (segp[2] != '\0')))) {
+ cur = segp;
+ continue;
+ }
+
+ /* If we get here, remove this segment and the next one and back up
+ * to the previous segment (if there is one), to implement the
+ * "iteratively" clause. It's pretty much impossible to back up
+ * while maintaining two pointers into the buffer, so just compact
+ * the whole buffer now.
+ */
+
+ /* If this is the end of the buffer, we're done. */
+ if (segp[2] == '\0') {
+ cur[0] = '\0';
+ break;
+ }
+ strcpy(cur, segp + 3);
+
+ /* If there are no previous segments, then keep going from here. */
+ segp = cur;
+ while ((segp > path) && ((--segp)[0] == '/'))
+ ;
+ if (segp == path)
+ continue;
+
+ /* "segp" is pointing to the end of a previous segment; find it's
+ * start. We need to back up to the previous segment and start
+ * over with that to handle things like "foo/bar/../..". If we
+ * don't do this, then on the first pass we'll remove the "bar/..",
+ * but be pointing at the second ".." so we won't realize we can also
+ * remove the "foo/..".
+ */
+ cur = segp;
+ while ((cur > path) && (cur[-1] != '/'))
+ --cur;
+ }
+ out[0] = '\0';
+
+ /*
+ * g) If the resulting buffer string still begins with one or more
+ * complete path segments of "..", then the reference is
+ * considered to be in error. Implementations may handle this
+ * error by retaining these components in the resolved path (i.e.,
+ * treating them as part of the final URI), by removing them from
+ * the resolved path (i.e., discarding relative levels above the
+ * root), or by avoiding traversal of the reference.
+ *
+ * We discard them from the final path.
+ */
+ if (path[0] == '/') {
+ cur = path;
+ while ((cur[1] == '.') && (cur[2] == '.')
+ && ((cur[3] == '/') || (cur[3] == '\0')))
+ cur += 3;
+
+ if (cur != path) {
+ out = path;
+ while (cur[0] != '\0')
+ (out++)[0] = (cur++)[0];
+ out[0] = 0;
+ }
+ }
+
+ return(0);
+}
+#endif
/**
* xmlURIUnescapeString:

0 comments on commit b6e7fdb

Please sign in to comment.