Skip to content
Browse files

Add tests for util.create_slug() function and refactor it.

  • Loading branch information...
1 parent e8b49c6 commit b3cbe67e9b333589510be6376cde6e7410428b97 @douglatornell douglatornell committed Jul 25, 2012
Showing with 90 additions and 51 deletions.
  1. +63 −0 blogofile/tests/test_util.py
  2. +27 −51 blogofile/util.py
View
63 blogofile/tests/test_util.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+"""Unit tests for blogofile util module.
+"""
+try:
+ import unittest2 as unittest # For Python 2.6
+except ImportError:
+ import unittest # flake8 ignore # NOQA
+from mock import (
+ MagicMock,
+ patch,
+ )
+import six
+from .. import util
+
+
+@patch.object(util.bf, 'config')
+class TestCreateSlug(unittest.TestCase):
+ """Unit tests for create_slug function.
+ """
+ def _call_fut(self, *args):
+ """Call the fuction under test.
+ """
+ return util.create_slug(*args)
+
+ def test_create_slug_ascii(self, mock_config):
+ """create_slug returns expected result for ASCII title
+ """
+ mock_config.site = MagicMock(slugify=None, slug_unicode=None)
+ mock_config.blog = MagicMock(slugify=None)
+ slug = self._call_fut('Foo Bar!')
+ self.assertEqual(slug, 'foo-bar')
+
+ def test_create_slug_unidecode(self, mock_config):
+ """create_slug returns expected ASCII result for Unicode title
+ """
+ mock_config.site = MagicMock(slugify=None, slug_unicode=None)
+ mock_config.blog = MagicMock(slugify=None)
+ slug = self._call_fut(six.u('\u5317\u4EB0'))
+ self.assertEqual(slug, 'bei-jing')
+
+ def test_create_slug_unicode(self, mock_config):
+ """create_slug returns expected Unicode result for Unicode title
+ """
+ mock_config.site = MagicMock(slugify=None, slug_unicode=True)
+ mock_config.blog.slugify = None
+ slug = self._call_fut(six.u('\u5317\u4EB0'))
+ self.assertEqual(slug, six.u('\u5317\u4EB0'))
+
+ def test_create_slug_user_site_slugify(self, mock_config):
+ """create_slug uses user-defined config.site.slugify function
+ """
+ mock_config.site = MagicMock(slugify=lambda s: 'bar-foo')
+ mock_config.blog = MagicMock(slugify=None)
+ slug = self._call_fut('Foo Bar!')
+ self.assertEqual(slug, 'bar-foo')
+
+ def test_create_slug_user_blog_slugify(self, mock_config):
+ """create_slug uses user-defined config.blog.slugify function
+ """
+ mock_config.site = MagicMock(slugify=None)
+ mock_config.blog = MagicMock(slugify=lambda s: 'deprecated')
+ slug = self._call_fut('Foo Bar!')
+ self.assertEqual(slug, 'deprecated')
View
78 blogofile/util.py
@@ -12,13 +12,16 @@
except ImportError:
from urlparse import urlparse # For Python 3; flake8 ignore # NOQA
from markupsafe import Markup
-
+import six
+from unidecode import unidecode
from .cache import bf
bf.util = sys.modules['blogofile.util']
logger = logging.getLogger("blogofile.util")
+# Word separators and punctuation for slug creation
+PUNCT_RE = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
html_escape_table = {
"&": "&amp;",
@@ -29,7 +32,7 @@
}
-def html_escape(text): # pragma: no cover
+def html_escape(text):
"""Produce entities within text.
"""
L = []
@@ -252,56 +255,29 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): #p
s = s.decode(encoding, errors)
return s
-_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')
-def create_slug_new(title, delim='-'):
- # Get rid of any html entities
- slug = Markup(title).unescape()
- result = []
- for word in _punct_re.split(slug):
- result.extend(_unidecode_func(word).split())
- return _str_func(delim.join(result)).lower()
+def create_slug(title, delim='-'):
+ """Create a slug from `title`, with words lowercased, and
+ separated by `delim`.
-def create_slug_old(title):
- # Get rid of any html entities
- slug = Markup(title).unescape()
- # Try to convert non-ascii characters to their ascii equivalent:
- # HACK: Until we do a proper six-based 2 & 3 implementation...
- # The slug shouldn't be encoded here; that should be done
- # where it is output (unicode internally, encode/decode at edges)
- slug = _str_func(
- unicodedata.normalize("NFKD", slug).encode("ascii", "ignore"), "utf-8")
- # Replace any remaining non-valid URL characters with dashes
- # (reference RFC 1738 section 2.2)
- slug = re.sub("[^a-zA-Z0-9$\-_\.+!*'(),]", "-", slug).lower()
- return slug
+ User may provide their own function to do this via `config.site.slugify`.
-_create_slug = None
-_unidecode_func = None
-def create_slug(title):
- global _create_slug
- global _unidecode_func
- if _create_slug == None:
- # first launch, deterimining what method to use
- if bf.config.site.slugify:
- # user has defined their own function, use it instead
- _create_slug = bf.config.site.slugify
- elif bf.config.blog.slugify:
- # for backwards compatibility
- _create_slug = bf.config.blog.slugify
- elif bf.config.site.slug_unicode:
- # unicode slugs
- _create_slug = create_slug_new
- _unidecode_func = lambda s: s
+ `config.site.slug_unicode` controls whether Unicode characters are included
+ in the slug as is, or mapped to reasonable ASCII equivalents.
+ """
+ # Dispatch to user-supplied slug creation function, if one exists
+ if bf.config.site.slugify:
+ return bf.config.site.slugify(title)
+ elif bf.config.blog.slugify:
+ # For backward compatibility
+ return bf.config.blog.slugify(title)
+ # Get rid of any HTML entities
+ slug = Markup(title).unescape()
+ result = []
+ for word in PUNCT_RE.split(slug):
+ if not bf.config.site.slug_unicode:
+ result.extend(unidecode(word).split())
else:
- try:
- # ASCII slugs, better unicode handling
- from unidecode import unidecode
- _unidecode_func = unidecode
- _create_slug = create_slug_new
- except ImportError:
- # fallback to old function
- _create_slug = create_slug_old
- return _create_slug(title)
-_str_func = unicode if sys.version_info < (3,) else str
-
+ result.append(word)
+ slug = six.text_type(delim.join(result)).lower()
+ return slug

0 comments on commit b3cbe67

Please sign in to comment.
Something went wrong with that request. Please try again.