Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

make compose=true the default in normalize_string

  • Loading branch information...
commit a500b83d0a30dc735124f1a95d86c09b68272c3b 1 parent 76c99f3
@stevengj stevengj authored
Showing with 12 additions and 9 deletions.
  1. +7 −5 base/utf8proc.jl
  2. +5 −4 doc/stdlib/base.rst
View
12 base/utf8proc.jl
@@ -41,14 +41,16 @@ let
end
end
-function normalize_string(s::String; stable::Bool=false, compat::Bool=false, compose::Bool=false, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
+function normalize_string(s::String; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
flags = 0
stable && (flags = flags | UTF8PROC_STABLE)
- compat && (flags = flags | UTF8PROC_COMPAT | (decompose ? 0 : UTF8PROC_COMPOSE))
- compose && (flags = flags | UTF8PROC_COMPOSE)
+ compat && (flags = flags | UTF8PROC_COMPAT)
if decompose
- compose && throw(ArgumentError("compose=true and decompose=true cannot both be specified"))
flags = flags | UTF8PROC_DECOMPOSE
+ elseif compose
+ flags = flags | UTF8PROC_COMPOSE
+ elseif compat || stripmark
+ throw(ArgumentError("compat=true or stripmark=true require compose=true or decompose=true"))
end
stripignore && (flags = flags | UTF8PROC_IGNORE)
rejectna && (flags = flags | UTF8PROC_REJECTNA)
@@ -59,7 +61,7 @@ function normalize_string(s::String; stable::Bool=false, compat::Bool=false, com
stripcc && (flags = flags | UTF8PROC_STRIPCC)
casefold && (flags = flags | UTF8PROC_CASEFOLD)
lump && (flags = flags | UTF8PROC_LUMP)
- stripmark && (flags = flags | UTF8PROC_STRIPMARK | (decompose ? 0 : UTF8PROC_COMPOSE))
+ stripmark && (flags = flags | UTF8PROC_STRIPMARK)
utf8proc_map(s, flags)
end
View
9 doc/stdlib/base.rst
@@ -961,14 +961,15 @@ Strings
Alternatively, finer control and additional transformations may be
be obtained by calling `normalize_string(s; keywords...)`, where
any number of the following boolean keywords options (which all default
- to ``false``) are specified:
+ to ``false`` except for ``compose``) are specified:
- * ``compose=true`` or ``decompose=true``: canonical composition or decomposition, respectively
- * ``compat=true``: compatibility equivalents are canonicalized (implies `compose=true` unless `decompose=true` was specified)
+ * ``compose=false``: do not perform canonical composition
+ * ``decompose=true``: do canonical decomposition instead of canonical composition (``compose=true`` is ignored if present)
+ * ``compat=true``: compatibility equivalents are canonicalized
* ``casefold=true``: perform Unicode case folding, e.g. for case-insensitive string comparison
* ``lump=true``: non--standard canonicalization of various similar-looking characters into a single ASCII character, as defined by the utf8proc library (e.g. fraction and division slashes, space characters, dash characters, etcetera)
* ``newline2lf=true``, ``newline2ls=true``, or ``newline2ps=true``: convert various newline sequences (LF, CRLF, CR, NEL) into a linefeed (LF), line-separation (LS), or paragraph-separation (PS) character, respectively
- * ``stripmark=true``: strip diacritical marks (e.g. accents) (implies `compose=true` unless `decompose=true` was specified)
+ * ``stripmark=true``: strip diacritical marks (e.g. accents)
* ``stripignore=true``: strip Unicode's "default ignorable" characters (e.g. the soft hyphen or the left-to-right marker)
* ``stripcc=true``: strip control characters; horizontal tabs and form feeds are converted to spaces; newlines are also converted to spaces unless a newline-conversion flag was specified
* ``rejectna=true``: throw an error if unassigned code points are found
Please sign in to comment.
Something went wrong with that request. Please try again.