Skip to content

Commit

Permalink
refactor String constructors
Browse files Browse the repository at this point in the history
- converting between strings, Symbols, and Vectors should be a constructor,
  not `convert`
- define everything as constructors, with a single general `convert`
  method for all string types
  • Loading branch information
JeffBezanson committed Aug 22, 2017
1 parent eaca8f2 commit 1ad17ba
Show file tree
Hide file tree
Showing 10 changed files with 40 additions and 35 deletions.
8 changes: 8 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,14 @@ export hex2num
@deprecate ctranspose adjoint
@deprecate ctranspose! adjoint!

@deprecate convert(::Type{Vector{UInt8}}, s::AbstractString) Vector{UInt8}(s)
@deprecate convert(::Type{Array{UInt8}}, s::AbstractString) Vector{UInt8}(s)
@deprecate convert(::Type{Vector{Char}}, s::AbstractString) Vector{Char}(s)
@deprecate convert(::Type{Symbol}, s::AbstractString) Symbol(s)
@deprecate convert(::Type{String}, s::Symbol) String(s)
@deprecate convert(::Type{String}, v::Vector{UInt8}) String(v)
@deprecate convert(::Type{S}, g::UTF8proc.GraphemeIterator) where {S<:AbstractString} convert(S, g.s)

# issue #5148, PR #23259
# warning for `const` on locals should be changed to an error in julia-syntax.scm

Expand Down
3 changes: 2 additions & 1 deletion base/stacktraces.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ struct StackFrame # this type should be kept platform-agnostic so that profiles
pointer::UInt64 # Large enough to be read losslessly on 32- and 64-bit machines.
end

StackFrame(func, file, line) = StackFrame(func, file, line, Nullable{Core.MethodInstance}(), false, false, 0)
StackFrame(func, file, line) = StackFrame(Symbol(func), Symbol(file), line,
Nullable{Core.MethodInstance}(), false, false, 0)

"""
StackTrace
Expand Down
20 changes: 7 additions & 13 deletions base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,15 @@ next(s::AbstractString, i::Integer) = next(s,Int(i))
string() = ""
string(s::AbstractString) = s

"""
String(s::AbstractString)
(::Type{Vector{UInt8}})(s::AbstractString) = Vector{UInt8}(String(s))
(::Type{Array{UInt8}})(s::AbstractString) = Vector{UInt8}(s)
(::Type{Vector{Char}})(s::AbstractString) = collect(s)

Convert a string to a contiguous byte array representation encoded as UTF-8 bytes.
This representation is often appropriate for passing strings to C.
"""
String(s::AbstractString) = print_to_string(s)
Symbol(s::AbstractString) = Symbol(String(s))

convert(::Type{Vector{UInt8}}, s::AbstractString) = convert(Vector{UInt8}, String(s))
convert(::Type{Array{UInt8}}, s::AbstractString) = convert(Vector{UInt8}, s)
convert(::Type{String}, s::AbstractString) = String(s)
convert(::Type{Vector{Char}}, s::AbstractString) = collect(s)
convert(::Type{Symbol}, s::AbstractString) = Symbol(s)
convert(::Type{String}, s::Symbol) = unsafe_string(Cstring(s))
# string types are convertible
convert(::Type{T}, s::T) where {T<:AbstractString} = s
convert(::Type{T}, s::AbstractString) where {T<:AbstractString} = T(s)

## generic supplied functions ##

Expand All @@ -40,7 +35,6 @@ getindex(s::AbstractString, v::AbstractVector{Bool}) =
throw(ArgumentError("logical indexing not supported for strings"))

get(s::AbstractString, i::Integer, default) = isvalid(s,i) ? s[i] : default
Symbol(s::AbstractString) = Symbol(String(s))

"""
sizeof(s::AbstractString)
Expand Down
16 changes: 12 additions & 4 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@ end

_string_n(n::Integer) = ccall(:jl_alloc_string, Ref{String}, (Csize_t,), n)

convert(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)
convert(::Type{String}, s::String) = s
convert(::Type{String}, v::Vector{UInt8}) = String(v)
"""
String(s::AbstractString)
Convert a string to a contiguous byte array representation encoded as UTF-8 bytes.
This representation is often appropriate for passing strings to C.
"""
String(s::AbstractString) = print_to_string(s)

String(s::Symbol) = unsafe_string(Cstring(s))

(::Type{Vector{UInt8}})(s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)

## low-level functions ##

Expand Down Expand Up @@ -394,7 +402,7 @@ function string(a::Union{String,Char}...)
end

function reverse(s::String)
dat = convert(Vector{UInt8},s)
dat = Vector{UInt8}(s)
n = length(dat)
n <= 1 && return s
buf = StringVector(n)
Expand Down
9 changes: 4 additions & 5 deletions base/strings/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j
SubString(s::SubString, i::Int, j::Int) = SubString(s.string, s.offset+i, s.offset+j)
SubString(s::AbstractString, i::Integer, j::Integer) = SubString(s, Int(i), Int(j))
SubString(s::AbstractString, i::Integer) = SubString(s, i, endof(s))
SubString{T}(s::T) where {T<:AbstractString} = SubString(s, 1, endof(s))

String(p::SubString{String}) =
unsafe_string(pointer(p.string, p.offset+1), nextind(p, p.endof)-1)

sizeof(s::SubString{String}) = s.endof == 0 ? 0 : nextind(s, s.endof) - 1

Expand Down Expand Up @@ -73,11 +77,6 @@ chr2ind(s::SubString{<:DirectIndexString}, i::Integer) = begin checkbounds(s,i);
nextind(s::SubString, i::Integer) = nextind(s.string, i+s.offset)-s.offset
prevind(s::SubString, i::Integer) = prevind(s.string, i+s.offset)-s.offset

convert(::Type{SubString{T}}, s::T) where {T<:AbstractString} = SubString(s, 1, endof(s))

String(p::SubString{String}) =
unsafe_string(pointer(p.string, p.offset+1), nextind(p, p.endof)-1)

function getindex(s::AbstractString, r::UnitRange{Int})
checkbounds(s, r) || throw(BoundsError(s, r))
SubString(s, first(r), last(r))
Expand Down
2 changes: 0 additions & 2 deletions base/strings/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -500,8 +500,6 @@ end
hash(g::GraphemeIterator, h::UInt) = hash(g.s, h)
isless(g1::GraphemeIterator, g2::GraphemeIterator) = isless(g1.s, g2.s)

convert(::Type{S}, g::GraphemeIterator) where {S<:AbstractString} = convert(S, g.s)

show(io::IO, g::GraphemeIterator{S}) where {S} = print(io, "length-$(length(g)) GraphemeIterator{$S} for \"$(g.s)\"")

############################################################################
Expand Down
1 change: 0 additions & 1 deletion base/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1366,7 +1366,6 @@ with string types besides the standard `String` type.
struct GenericString <: AbstractString
string::AbstractString
end
Base.convert(::Type{GenericString}, s::AbstractString) = GenericString(s)
Base.endof(s::GenericString) = endof(s.string)
Base.next(s::GenericString, i::Int) = next(s.string, i)

Expand Down
9 changes: 4 additions & 5 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -189,16 +189,15 @@ end
struct tstStringType <: AbstractString
data::Array{UInt8,1}
end
tstr = tstStringType("12")
tstr = tstStringType(Vector{UInt8}("12"))
@test_throws ErrorException endof(tstr)
@test_throws ErrorException next(tstr, Bool(1))

gstr = GenericString("12")
@test typeof(string(gstr))==GenericString
@test string(gstr) isa GenericString

@test convert(Array{UInt8}, gstr) ==[49;50]
@test convert(Array{Char,1}, gstr) ==['1';'2']
@test convert(Symbol, gstr)==Symbol("12")
@test Array{UInt8}(gstr) == [49, 50]
@test Array{Char,1}(gstr) == ['1', '2']

@test gstr[1] == '1'
@test gstr[1:1] == "1"
Expand Down
6 changes: 3 additions & 3 deletions test/unicode/utf8.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
let ch = 0x10000
for hi = 0xd800:0xdbff
for lo = 0xdc00:0xdfff
@test convert(String, Vector{UInt8}(String(Char[hi, lo]))) == string(Char(ch))
@test String(Vector{UInt8}(String(Char[hi, lo]))) == string(Char(ch))
ch += 1
end
end
Expand Down Expand Up @@ -41,7 +41,7 @@ end
end

@testset "string convert" begin
@test convert(String, b"this is a test\xed\x80\x80") == "this is a test\ud000"
@test String(b"this is a test\xed\x80\x80") == "this is a test\ud000"
## Specifically check UTF-8 string whose lead byte is same as a surrogate
@test convert(String, b"\xed\x9f\xbf") == "\ud7ff"
@test String(b"\xed\x9f\xbf") == "\ud7ff"
end
1 change: 0 additions & 1 deletion test/unicode/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,6 @@ end
g = graphemes(str)
h = hash(str)
@test hash(g) == h
@test convert(GenericString, g) == str
@test repr(g) == "length-14 GraphemeIterator{String} for \"$str\""
end
end
Expand Down

0 comments on commit 1ad17ba

Please sign in to comment.