Skip to content

Commit

Permalink
Move Unicode-related functions to new Unicode stdlib package
Browse files Browse the repository at this point in the history
  • Loading branch information
nalimilan committed Dec 10, 2017
1 parent ff045af commit 66b5e9c
Show file tree
Hide file tree
Showing 14 changed files with 208 additions and 195 deletions.
2 changes: 1 addition & 1 deletion base/distributed/Distributed.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte
VERSION_STRING, sync_begin, sync_add, sync_end, async_run_thunk,
binding_module, notify_error, atexit, julia_exename, julia_cmd,
AsyncGenerator, display_error, acquire, release, invokelatest, warn_once,
shell_escape_posixly, uv_error
shell_escape_posixly, uv_error, isascii

# NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use

Expand Down
18 changes: 0 additions & 18 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -731,24 +731,10 @@ export
hex2bytes!,
ind2chr,
info,
is_assigned_char,
isalnum,
isalpha,
isascii,
iscntrl,
isdigit,
isgraph,
islower,
ismatch,
isnumber,
isprint,
ispunct,
isspace,
isupper,
isvalid,
isxdigit,
join,
lcfirst,
logging,
lowercase,
lpad,
Expand All @@ -758,7 +744,6 @@ export
ncodeunits,
ndigits,
nextind,
normalize_string,
oct,
prevind,
print,
Expand All @@ -785,11 +770,8 @@ export
string,
strip,
summary,
textwidth,
thisind,
titlecase,
transcode,
ucfirst,
unescape_string,
uppercase,
warn,
Expand Down
1 change: 1 addition & 0 deletions base/markdown/Markdown.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module Markdown

import Base: show, ==
import Core: @doc_str
using Base: ucfirst

include(joinpath("parse", "config.jl"))
include(joinpath("parse", "util.jl"))
Expand Down
2 changes: 2 additions & 0 deletions base/repl/LineEdit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import ..Terminals: raw!, width, height, cmove, getX,

import Base: ensureroom, peek, show, AnyDict, position

using Base: textwidth

abstract type TextInterface end
abstract type ModeState end

Expand Down
133 changes: 0 additions & 133 deletions base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -469,143 +469,10 @@ next(e::EachStringIndex, state) = (state, nextind(e.s, state))
done(e::EachStringIndex, state) = done(e.s, state)
eltype(::Type{EachStringIndex}) = Int

"""
isascii(c::Union{Char,AbstractString}) -> Bool
Test whether a character belongs to the ASCII character set, or whether this is true for
all elements of a string.
# Examples
```jldoctest
julia> isascii('a')
true
julia> isascii('α')
false
julia> isascii("abc")
true
julia> isascii("αβγ")
false
```
"""
isascii(c::Char) = c < Char(0x80)
isascii(s::AbstractString) = all(isascii, s)

## string promotion rules ##

promote_rule(::Type{<:AbstractString}, ::Type{<:AbstractString}) = String

"""
isxdigit(c::Char) -> Bool
Test whether a character is a valid hexadecimal digit. Note that this does not
include `x` (as in the standard `0x` prefix).
# Examples
```jldoctest
julia> isxdigit('a')
true
julia> isxdigit('x')
false
```
"""
isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'

## uppercase, lowercase, and titlecase transformations ##

"""
uppercase(s::AbstractString)
Return `s` with all characters converted to uppercase.
# Examples
```jldoctest
julia> uppercase("Julia")
"JULIA"
```
"""
uppercase(s::AbstractString) = map(uppercase, s)

"""
lowercase(s::AbstractString)
Return `s` with all characters converted to lowercase.
# Examples
```jldoctest
julia> lowercase("STRINGS AND THINGS")
"strings and things"
```
"""
lowercase(s::AbstractString) = map(lowercase, s)

"""
titlecase(s::AbstractString)
Capitalize the first character of each word in `s`.
See also [`ucfirst`](@ref) to capitalize only the first
character in `s`.
# Examples
```jldoctest
julia> titlecase("the julia programming language")
"The Julia Programming Language"
```
"""
function titlecase(s::AbstractString)
startword = true
b = IOBuffer()
for c in s
if isspace(c)
print(b, c)
startword = true
else
print(b, startword ? titlecase(c) : c)
startword = false
end
end
return String(take!(b))
end

"""
ucfirst(s::AbstractString)
Return `string` with the first character converted to uppercase
(technically "title case" for Unicode).
See also [`titlecase`](@ref) to capitalize the first character of
every word in `s`.
# Examples
```jldoctest
julia> ucfirst("python")
"Python"
```
"""
function ucfirst(s::AbstractString)
isempty(s) && return s
c = s[1]
tc = titlecase(c)
return c==tc ? s : string(tc,s[nextind(s,1):end])
end

"""
lcfirst(s::AbstractString)
Return `string` with the first character converted to lowercase.
# Examples
```jldoctest
julia> lcfirst("Julia")
"julia"
```
"""
function lcfirst(s::AbstractString)
isempty(s) || islower(s[1]) ? s : string(lowercase(s[1]),s[nextind(s,1):end])
end

## string map, filter, has ##

function map(f, s::AbstractString)
Expand Down
146 changes: 139 additions & 7 deletions base/strings/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@
# Various Unicode functionality from the utf8proc library
module UTF8proc

import Base: show, ==, hash, string, Symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase, titlecase
import Base: show, ==, hash, string, Symbol, isless, length, eltype, start, next, done, convert, isvalid

export isgraphemebreak, category_code, category_abbrev, category_string

# also exported by Base:
export normalize_string, graphemes, is_assigned_char, textwidth, isvalid,
islower, isupper, isalpha, isdigit, isnumber, isalnum,
iscntrl, ispunct, isspace, isprint, isgraph
export isgraphemebreak, category_code, category_abbrev, category_string,
normalize_string, graphemes, is_assigned_char, textwidth, isascii, isvalid,
islower, isupper, isalpha, isdigit, isxdigit, isnumber, isalnum,
iscntrl, ispunct, isspace, isprint, isgraph,
lowercase, uppercase, titlecase, lcfirst, ucfirst

# whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff

Expand Down Expand Up @@ -533,6 +532,139 @@ true
"""
isgraph(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_SO)

"""
isascii(c::Union{Char,AbstractString}) -> Bool
Test whether a character belongs to the ASCII character set, or whether this is true for
all elements of a string.
# Examples
```jldoctest
julia> isascii('a')
true
julia> isascii('α')
false
julia> isascii("abc")
true
julia> isascii("αβγ")
false
```
"""
isascii(c::Char) = c < Char(0x80)
isascii(s::AbstractString) = all(isascii, s)

"""
isxdigit(c::Char) -> Bool
Test whether a character is a valid hexadecimal digit. Note that this does not
include `x` (as in the standard `0x` prefix).
# Examples
```jldoctest
julia> isxdigit('a')
true
julia> isxdigit('x')
false
```
"""
isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'

## uppercase, lowercase, and titlecase transformations ##

"""
uppercase(s::AbstractString)
Return `s` with all characters converted to uppercase.
# Examples
```jldoctest
julia> uppercase("Julia")
"JULIA"
```
"""
uppercase(s::AbstractString) = map(uppercase, s)

"""
lowercase(s::AbstractString)
Return `s` with all characters converted to lowercase.
# Examples
```jldoctest
julia> lowercase("STRINGS AND THINGS")
"strings and things"
```
"""
lowercase(s::AbstractString) = map(lowercase, s)

"""
titlecase(s::AbstractString)
Capitalize the first character of each word in `s`.
See also [`ucfirst`](@ref) to capitalize only the first
character in `s`.
# Examples
```jldoctest
julia> titlecase("the julia programming language")
"The Julia Programming Language"
```
"""
function titlecase(s::AbstractString)
startword = true
b = IOBuffer()
for c in s
if isspace(c)
print(b, c)
startword = true
else
print(b, startword ? titlecase(c) : c)
startword = false
end
end
return String(take!(b))
end

"""
ucfirst(s::AbstractString)
Return `string` with the first character converted to uppercase
(technically "title case" for Unicode).
See also [`titlecase`](@ref) to capitalize the first character of
every word in `s`.
# Examples
```jldoctest
julia> ucfirst("python")
"Python"
```
"""
function ucfirst(s::AbstractString)
isempty(s) && return s
c = s[1]
tc = titlecase(c)
return c==tc ? s : string(tc,s[nextind(s,1):end])
end

"""
lcfirst(s::AbstractString)
Return `string` with the first character converted to lowercase.
# Examples
```jldoctest
julia> lcfirst("Julia")
"julia"
```
"""
function lcfirst(s::AbstractString)
isempty(s) || islower(s[1]) ? s : string(lowercase(s[1]),s[nextind(s,1):end])
end

############################################################################
# iterators for grapheme segmentation

Expand Down
1 change: 1 addition & 0 deletions base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,7 @@ Base.require(:Profile)
Base.require(:SharedArrays)
Base.require(:SuiteSparse)
Base.require(:Test)
Base.require(:Unicode)

@eval Base begin
@deprecate_binding Test root_module(:Test) true ", run `using Test` instead"
Expand Down

0 comments on commit 66b5e9c

Please sign in to comment.