Skip to content

Commit

Permalink
Merge pull request #25021 from JuliaLang/nl/unicode
Browse files Browse the repository at this point in the history
Move Unicode-related functions to new Unicode stdlib package
  • Loading branch information
nalimilan committed Dec 13, 2017
2 parents 87c1d4f + 756936a commit 295b098
Show file tree
Hide file tree
Showing 72 changed files with 551 additions and 445 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ The steps required to add a new docstring are listed below:
Examples written within docstrings can be used as testcases known as "doctests" by annotating code blocks with `jldoctest`.

```jldoctest
julia> uppercase("Docstring test")
julia> Unicode.uppercase("Docstring test")
"DOCSTRING TEST"
```

Expand Down
11 changes: 11 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,16 @@ Deprecated or removed
* The `sum_kbn` and `cumsum_kbn` functions have been moved to the
[KahanSummation](https://github.com/JuliaMath/KahanSummation.jl) package ([#24869]).

* Unicode-related string functions have been moved to the new `Unicode` standard
library module ([#25021]). This applies to `normalize_string`, `graphemes`,
`is_assigned_char`, `textwidth`, `isascii`, `islower`, `isupper`, `isalpha`,
`isdigit`, `isxdigit`, `isnumber`, `isalnum`, `iscntrl`, `ispunct`, `isspace`,
`isprint`, `isgraph`, `lowercase`, `uppercase`, `titlecase`, `lcfirst` and `ucfirst`.

* `isnumber` has been deprecated in favor of `isnumeric`, `is_assigned_char`
in favor of `isassigned` and `normalize_string` in favor of `normalize`, all three
in the new `Unicode` standard library module ([#25021]).

Command-line option changes
---------------------------

Expand Down Expand Up @@ -1711,3 +1721,4 @@ Command-line option changes
[#24413]: https://github.com/JuliaLang/julia/issues/24413
[#24653]: https://github.com/JuliaLang/julia/issues/24653
[#24869]: https://github.com/JuliaLang/julia/issues/24869
[#25021]: https://github.com/JuliaLang/julia/issues/25021
2 changes: 1 addition & 1 deletion base/arrayshow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ function print_matrix(io::IO, X::AbstractVecOrMat,
screenwidth -= length(pre) + length(post)
presp = repeat(" ", length(pre)) # indent each row to match pre string
postsp = ""
@assert textwidth(hdots) == textwidth(ddots)
@assert Unicode.textwidth(hdots) == Unicode.textwidth(ddots)
sepsize = length(sep)
rowsA, colsA = indices(X,1), indices(X,2)
m, n = length(rowsA), length(colsA)
Expand Down
6 changes: 3 additions & 3 deletions base/char.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ function show(io::IO, c::Char)
return
end
end
if isprint(c)
if Unicode.isprint(c)
write(io, 0x27, c, 0x27)
else
u = UInt32(c)
Expand All @@ -81,6 +81,6 @@ end
function show(io::IO, ::MIME"text/plain", c::Char)
show(io, c)
u = UInt32(c)
print(io, ": ", isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 6 : 4))
print(io, " (category ", UTF8proc.category_abbrev(c), ": ", UTF8proc.category_string(c), ")")
print(io, ": ", Unicode.isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 6 : 4))
print(io, " (category ", Unicode.category_abbrev(c), ": ", Unicode.category_string(c), ")")
end
2 changes: 1 addition & 1 deletion base/client.jl
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ function load_machine_file(path::AbstractString)
s = split(line, '*'; keep = false)
map!(strip, s, s)
if length(s) > 1
cnt = isnumber(s[1]) ? parse(Int,s[1]) : Symbol(s[1])
cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
push!(machines,(s[2], cnt))
else
push!(machines,line)
Expand Down
31 changes: 23 additions & 8 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1072,13 +1072,6 @@ function Matrix()
return Matrix(uninitialized, 0, 0)
end

for name in ("alnum", "alpha", "cntrl", "digit", "number", "graph",
"lower", "print", "punct", "space", "upper", "xdigit")
f = Symbol("is",name)
@eval import .UTF8proc: $f
@eval @deprecate ($f)(s::AbstractString) all($f, s)
end

# TODO: remove warning for using `_` in parse_input_line in base/client.jl

# Special functions have been moved to a package
Expand Down Expand Up @@ -1512,7 +1505,7 @@ export hex2num
@deprecate convert(::Type{Symbol}, s::AbstractString) Symbol(s)
@deprecate convert(::Type{String}, s::Symbol) String(s)
@deprecate convert(::Type{String}, v::Vector{UInt8}) String(v)
@deprecate convert(::Type{S}, g::UTF8proc.GraphemeIterator) where {S<:AbstractString} convert(S, g.s)
@deprecate convert(::Type{S}, g::Unicode.GraphemeIterator) where {S<:AbstractString} convert(S, g.s)

# Issue #19923
@deprecate ror circshift
Expand Down Expand Up @@ -2972,6 +2965,28 @@ end
@deprecate_moved sum_kbn "KahanSummation"
@deprecate_moved cumsum_kbn "KahanSummation"

# PR #25021
@deprecate_moved normalize_string "Unicode" true true
@deprecate_moved graphemes "Unicode" true true
@deprecate_moved is_assigned_char "Unicode" true true
@deprecate_moved textwidth "Unicode" true true
@deprecate_moved islower "Unicode" true true
@deprecate_moved isupper "Unicode" true true
@deprecate_moved isalpha "Unicode" true true
@deprecate_moved isdigit "Unicode" true true
@deprecate_moved isnumber "Unicode" true true
@deprecate_moved isalnum "Unicode" true true
@deprecate_moved iscntrl "Unicode" true true
@deprecate_moved ispunct "Unicode" true true
@deprecate_moved isspace "Unicode" true true
@deprecate_moved isprint "Unicode" true true
@deprecate_moved isgraph "Unicode" true true
@deprecate_moved lowercase "Unicode" true true
@deprecate_moved uppercase "Unicode" true true
@deprecate_moved titlecase "Unicode" true true
@deprecate_moved lcfirst "Unicode" true true
@deprecate_moved ucfirst "Unicode" true true

# END 0.7 deprecations

# BEGIN 1.0 deprecations
Expand Down
4 changes: 2 additions & 2 deletions base/dict.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

function _truncate_at_width_or_chars(str, width, chars="", truncmark="")
truncwidth = textwidth(truncmark)
truncwidth = Unicode.textwidth(truncmark)
(width <= 0 || width < truncwidth) && return ""

wid = truncidx = lastidx = 0
idx = start(str)
while !done(str, idx)
lastidx = idx
c, idx = next(str, idx)
wid += textwidth(c)
wid += Unicode.textwidth(c)
wid >= width - truncwidth && truncidx == 0 && (truncidx = lastidx)
(wid >= width || c in chars) && break
end
Expand Down
1 change: 1 addition & 0 deletions base/distributed/Distributed.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte
binding_module, notify_error, atexit, julia_exename, julia_cmd,
AsyncGenerator, display_error, acquire, release, invokelatest, warn_once,
shell_escape_posixly, uv_error
using Base.Unicode: isascii, isdigit, isnumeric

# NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use

Expand Down
4 changes: 3 additions & 1 deletion base/docs/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Text / HTML objects

import Base: print, show, ==, hash
using Base.Unicode

export HTML, @html_str

Expand Down Expand Up @@ -231,7 +232,8 @@ function matchinds(needle, haystack; acronym = false)
for (i, char) in enumerate(haystack)
isempty(chars) && break
while chars[1] == ' ' shift!(chars) end # skip spaces
if lowercase(char) == lowercase(chars[1]) && (!acronym || !isalpha(lastc))
if Unicode.lowercase(char) == Unicode.lowercase(chars[1]) &&
(!acronym || !Unicode.isalpha(lastc))
push!(is, i)
shift!(chars)
end
Expand Down
22 changes: 0 additions & 22 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -725,40 +725,22 @@ export
eachmatch,
endswith,
escape_string,
graphemes,
hex,
hex2bytes,
hex2bytes!,
ind2chr,
info,
is_assigned_char,
isalnum,
isalpha,
isascii,
iscntrl,
isdigit,
isgraph,
islower,
ismatch,
isnumber,
isprint,
ispunct,
isspace,
isupper,
isvalid,
isxdigit,
join,
lcfirst,
logging,
lowercase,
lpad,
lstrip,
match,
matchall,
ncodeunits,
ndigits,
nextind,
normalize_string,
oct,
prevind,
print,
Expand All @@ -785,13 +767,9 @@ export
string,
strip,
summary,
textwidth,
thisind,
titlecase,
transcode,
ucfirst,
unescape_string,
uppercase,
warn,

# random numbers
Expand Down
2 changes: 1 addition & 1 deletion base/interactiveutil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ function edit(path::AbstractString, line::Integer=0)
cmd = line != 0 ? `$command $path -l $line` : `$command $path`
elseif startswith(name, "subl") || startswith(name, "atom")
cmd = line != 0 ? `$command $path:$line` : `$command $path`
elseif name == "code" || (Sys.iswindows() && uppercase(name) == "CODE.EXE")
elseif name == "code" || (Sys.iswindows() && Unicode.uppercase(name) == "CODE.EXE")
cmd = line != 0 ? `$command -g $path:$line` : `$command -g $path`
elseif startswith(name, "notepad++")
cmd = line != 0 ? `$command $path -n$line` : `$command $path`
Expand Down
4 changes: 3 additions & 1 deletion base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,8 @@ characters from that character until the start of the next line are ignored.
julia> buf = IOBuffer(" text")
IOBuffer(data=UInt8[...], readable=true, writable=false, seekable=true, append=false, size=8, maxsize=Inf, ptr=1, mark=-1)
julia> using Unicode
julia> skipchars(buf, isspace)
IOBuffer(data=UInt8[...], readable=true, writable=false, seekable=true, append=false, size=8, maxsize=Inf, ptr=5, mark=-1)
Expand Down Expand Up @@ -967,7 +969,7 @@ julia> countlines(io, '.')
```
"""
function countlines(io::IO, eol::Char='\n')
isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
Unicode.isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
aeol = UInt8(eol)
a = Vector{UInt8}(uninitialized, 8192)
nl = 0
Expand Down
4 changes: 2 additions & 2 deletions base/libuv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ function uv_sizeof_req(req)
end

for h in uv_handle_types
@eval const $(Symbol("_sizeof_",lowercase(string(h)))) = uv_sizeof_handle($h)
@eval const $(Symbol("_sizeof_",Unicode.lowercase(string(h)))) = uv_sizeof_handle($h)
end
for r in uv_req_types
@eval const $(Symbol("_sizeof_",lowercase(string(r)))) = uv_sizeof_req($r)
@eval const $(Symbol("_sizeof_",Unicode.lowercase(string(r)))) = uv_sizeof_req($r)
end

uv_handle_data(handle) = ccall(:jl_uv_handle_data,Ptr{Void},(Ptr{Void},),handle)
Expand Down
4 changes: 2 additions & 2 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ elseif Sys.isapple()

# If there is no match, it's possible that the file does exist but HFS+
# performed unicode normalization. See https://developer.apple.com/library/mac/qa/qa1235/_index.html.
isascii(path_basename) && return false
Vector{UInt8}(normalize_string(path_basename, :NFD)) == casepreserved_basename
Unicode.isascii(path_basename) && return false
Vector{UInt8}(Unicode.normalize(path_basename, :NFD)) == casepreserved_basename
end
else
# Generic fallback that performs a slow directory listing.
Expand Down
1 change: 1 addition & 0 deletions base/markdown/Markdown.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module Markdown

import Base: show, ==
import Core: @doc_str
using Base.Unicode: lowercase, ucfirst, isspace

include(joinpath("parse", "config.jl"))
include(joinpath("parse", "util.jl"))
Expand Down
2 changes: 1 addition & 1 deletion base/mpfr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ convert(::Type{BigFloat}, x::Union{Float16,Float32}) = BigFloat(Float64(x))
convert(::Type{BigFloat}, x::Rational) = BigFloat(numerator(x)) / BigFloat(denominator(x))

function tryparse(::Type{BigFloat}, s::AbstractString, base::Int=0)
!isempty(s) && isspace(s[end]) && return tryparse(BigFloat, rstrip(s), base)
!isempty(s) && Base.Unicode.isspace(s[end]) && return tryparse(BigFloat, rstrip(s), base)
z = BigFloat()
err = ccall((:mpfr_set_str, :libmpfr), Int32, (Ref{BigFloat}, Cstring, Int32, Int32), z, s, base, ROUNDING_MODE[])
err == 0 ? Nullable(z) : Nullable{BigFloat}()
Expand Down
2 changes: 2 additions & 0 deletions base/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,8 @@ entered in the Julia REPL (and most editors, appropriately configured) by typing
# Examples
```jldoctest
julia> using Unicode
julia> map(uppercase∘hex, 250:255)
6-element Array{String,1}:
"FA"
Expand Down
18 changes: 9 additions & 9 deletions base/parse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ end
function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos::Int, endpos::Int)
c, i, j = parseint_next(s, startpos, endpos)

while isspace(c)
while Unicode.isspace(c)
c, i, j = parseint_next(s,i,endpos)
end
(j == 0) && (return 0, 0, 0)
Expand All @@ -66,7 +66,7 @@ function parseint_preamble(signed::Bool, base::Int, s::AbstractString, startpos:
end
end

while isspace(c)
while Unicode.isspace(c)
c, i, j = parseint_next(s,i,endpos)
end
(j == 0) && (return 0, 0, 0)
Expand Down Expand Up @@ -125,10 +125,10 @@ function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::
return Nullable{T}(n)
end
c, i = next(s,i)
isspace(c) && break
Unicode.isspace(c) && break
end
(T <: Signed) && (n *= sgn)
while !isspace(c)
while !Unicode.isspace(c)
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
Expand All @@ -149,7 +149,7 @@ function tryparse_internal(::Type{T}, s::AbstractString, startpos::Int, endpos::
end
while i <= endpos
c, i = next(s,i)
if !isspace(c)
if !Unicode.isspace(c)
raise && throw(ArgumentError("extra characters after whitespace in $(repr(SubString(s,startpos,endpos)))"))
return _n
end
Expand All @@ -168,10 +168,10 @@ function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}},
orig_end = endpos

# Ignore leading and trailing whitespace
while isspace(sbuff[startpos]) && startpos <= endpos
while Unicode.isspace(sbuff[startpos]) && startpos <= endpos
startpos = nextind(sbuff, startpos)
end
while isspace(sbuff[endpos]) && endpos >= startpos
while Unicode.isspace(sbuff[endpos]) && endpos >= startpos
endpos = prevind(sbuff, endpos)
end

Expand All @@ -186,7 +186,7 @@ function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}},

if raise
substr = SubString(sbuff, orig_start, orig_end) # show input string in the error to avoid confusion
if all(isspace, substr)
if all(Unicode.isspace, substr)
throw(ArgumentError("input string only contains whitespace"))
else
throw(ArgumentError("invalid Bool representation: $(repr(substr))"))
Expand Down Expand Up @@ -243,7 +243,7 @@ tryparse_internal(::Type{Float16}, s::AbstractString, startpos::Int, endpos::Int

function tryparse_internal(::Type{Complex{T}}, s::Union{String,SubString{String}}, i::Int, e::Int, raise::Bool) where {T<:Real}
# skip initial whitespace
while i e && isspace(s[i])
while i e && Unicode.isspace(s[i])
i = nextind(s, i)
end
if i > e
Expand Down
4 changes: 2 additions & 2 deletions base/pkg/entry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ function available()
for (pkg, vers) in all_avail
any(x->Types.satisfies("julia", VERSION, x[2].requires), vers) && push!(avail, pkg)
end
sort!(avail, by=lowercase)
sort!(avail, by=Base.Unicode.lowercase)
end

function available(pkg::AbstractString)
Expand Down Expand Up @@ -572,7 +572,7 @@ end

function warnbanner(msg...; label="[ WARNING ]", prefix="")
cols = Base.displaysize(STDERR)[2]
str = rpad(lpad(label, div(cols+textwidth(label), 2), "="), cols, "=")
str = rpad(lpad(label, div(cols+Base.Unicode.textwidth(label), 2), "="), cols, "=")
warn(prefix="", str)
println(STDERR)
warn(prefix=prefix, msg...)
Expand Down
2 changes: 1 addition & 1 deletion base/pkg/reqs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ function write(io::IO, lines::Vector{Line})
end
end
function write(io::IO, reqs::Requires)
for pkg in sort!(collect(keys(reqs)), by=lowercase)
for pkg in sort!(collect(keys(reqs)), by=Unicode.lowercase)
println(io, Requirement(pkg, reqs[pkg]).content)
end
end
Expand Down

0 comments on commit 295b098

Please sign in to comment.