Permalink
Browse files

Unicode and Path bug fixes

  • Loading branch information...
1 parent 2668def commit d867099b7e7703822e04c762b0c5cba02f5c2ba3 @EnigmaCurry committed Dec 13, 2009
Showing with 95 additions and 18 deletions.
  1. +1 −2 blogofile/filter.py
  2. +3 −1 blogofile/post.py
  3. +91 −15 blogofile/util.py
View
@@ -1,6 +1,5 @@
import logging
import imp
-import codecs
import util
@@ -22,7 +21,7 @@ def run_chain(chain, content):
logging.debug("Applying filter: "+fn)
content = f.run(content)
logging.debug("Content:"+content)
- return codecs.decode(content,"utf-8")
+ return util.force_unicode(content)
def parse_chain(chain):
"""Parse a filter chain into a sequence of filters"""
View
@@ -245,7 +245,9 @@ def __hash__(self):
return hash(self.name)
def __repr__(self):
return self.name
-
+ def __cmp__(self, other):
+ return cmp(self.name, other.name)
+
def parse_posts(directory):
"""Retrieve all the posts from the directory specified.
View
@@ -49,24 +49,36 @@ def mkdir(newdir):
if tail:
os.mkdir(newdir)
-def blog_path_helper(path_parts):
- """Make an absolute URL path for something on the blog"""
- if type(path_parts) in (str, unicode):
- path_parts = (path_parts,)
- a_path = urlparse.urlsplit(config.site_url).path
- a_path = "/".join((a_path,config.blog_path))
- a_path = a_path + "/" + "/".join(path_parts)
- if not a_path.startswith("/"):
- a_path = "/"+a_path
- return a_path
-
-def path_join(*parts):
- """An OS independant os.path.join
+def blog_path_helper(*parts):
+ """Make an absolute URL path for something on the blog
+
+ path_parts is a sequence of path parts to append to the blog URL"""
+ new_parts = []
+ for p in parts:
+ if hasattr(p,"__iter__"):
+ #This part is a sequence itself, recurse into it
+ p = path_join(*p, sep="/")
+ if p in ("","\\","/"):
+ continue
+ new_parts.append(p)
+ if len(new_parts) > 0:
+ return "/".join(new_parts)
+ else:
+ return "/"
+
+def path_join(*parts, **kwargs):
+ """A better os.path.join
Converts (back)slashes from other platforms automatically
Normally, os.path.join is great, as long as you pass each dir/file
- independantly, but not if you (accidentally/intentionally) put a slash in"""
+ independantly, but not if you (accidentally/intentionally) put a slash in
+ if sep is specified, use that as the seperator
+ rather than the system default"""
+ if kwargs.has_key('sep'):
+ sep = kwargs['sep']
+ else:
+ sep = os.sep
if os.sep == "\\":
wrong_slash_type = "/"
else:
@@ -79,6 +91,70 @@ def path_join(*parts):
if p in ("","\\","/"):
continue
new_parts.append(p.replace(wrong_slash_type,os.sep))
- return os.sep.join(new_parts)
+ return sep.join(new_parts)
+def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
+ """
+ Force a string to be unicode.
+
+ If strings_only is True, don't convert (some) non-string-like objects.
+
+ Originally copied from the Django source code, further modifications have
+ been made.
+
+ Original copyright and license:
+
+ Copyright (c) Django Software Foundation and individual contributors.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of Django nor the names of its contributors may be used
+ to endorse or promote products derived from this software without
+ specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ """
+ if strings_only and is_protected_type(s):
+ return s
+ if not isinstance(s, basestring,):
+ if hasattr(s, '__unicode__'):
+ s = unicode(s)
+ else:
+ try:
+ s = unicode(str(s), encoding, errors)
+ except UnicodeEncodeError:
+ if not isinstance(s, Exception):
+ raise
+ # If we get to here, the caller has passed in an Exception
+ # subclass populated with non-ASCII data without special
+ # handling to display as a string. We need to handle this
+ # without raising a further exception. We do an
+ # approximation to what the Exception's standard str()
+ # output should be.
+ s = ' '.join([force_unicode(arg, encoding, strings_only,
+ errors) for arg in s])
+ elif not isinstance(s, unicode):
+ # Note: We use .decode() here, instead of unicode(s, encoding,
+ # errors), so that if s is a SafeString, it ends up being a
+ # SafeUnicode at the end.
+ s = s.decode(encoding, errors)
+ return s

0 comments on commit d867099

Please sign in to comment.