Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
camp-zju committed May 10, 2018
1 parent 1b358a8 commit b9376f0
Showing 1 changed file with 44 additions and 2 deletions.
46 changes: 44 additions & 2 deletions geonode/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,8 +933,16 @@ def check_shp_columnnames(layer):
charset)

if not a.match(field_name):
new_field_name = custom_slugify(field_name)

# once the field_name contains Chinese, to use slugify_zh
has_ch = False
for ch in field_name:
if u'\u4e00' <= ch <= u'\u9fff':
has_ch=True
break
if has_ch == True:
new_field_name = slugify_zh(field_name, separator='_')
else:
new_field_name = custom_slugify(field_name)
if not b.match(new_field_name):
new_field_name = '_' + new_field_name
j = 0
Expand Down Expand Up @@ -1302,3 +1310,37 @@ def chmod_tree(dst, permissions=0o777):
for dirname in dirnames:
path = os.path.join(dirpath, dirname)
os.chmod(path, permissions)


def slugify_zh(text, separator='_'):
"""
Make a slug from the given text, which is simplified from slugify.
It remove the other args and do not convert Chinese into Pinyin
:param text (str): initial text
:param separator (str): separator between words
:return (str):
"""

QUOTE_PATTERN = re.compile(r'[\']+')
ALLOWED_CHARS_PATTERN = re.compile(u'[^\u4e00-\u9fa5a-z0-9]+')
DUPLICATE_DASH_PATTERN = re.compile('-{2,}')
NUMBERS_PATTERN = re.compile('(?<=\d),(?=\d)')
DEFAULT_SEPARATOR = '-'

if not isinstance(text, types.UnicodeType):
text = unicode(text, 'utf-8', 'ignore')
# replace quotes with dashes - pre-process
text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text)
# make the text lowercase
text = text.lower()
# remove generated quotes -- post-process
text = QUOTE_PATTERN.sub('', text)
# cleanup numbers
text = NUMBERS_PATTERN.sub('', text)
# replace all other unwanted characters
text = re.sub(ALLOWED_CHARS_PATTERN, DEFAULT_SEPARATOR, text)
# remove redundant
text = re.sub(DUPLICATE_DASH_PATTERN, DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
if separator != DEFAULT_SEPARATOR:
text = text.replace(DEFAULT_SEPARATOR, separator)
return text

0 comments on commit b9376f0

Please sign in to comment.