Skip to content

Commit

Permalink
deprecate Vector<->String conversion in favor of something safer
Browse files Browse the repository at this point in the history
fixes #24388
  • Loading branch information
JeffBezanson committed Dec 23, 2017
1 parent 47dee03 commit b9504db
Show file tree
Hide file tree
Showing 18 changed files with 79 additions and 36 deletions.
14 changes: 7 additions & 7 deletions base/c.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ cconvert(::Type{Cstring}, s::AbstractString) =
cconvert(Cstring, String(s)::String)

function cconvert(::Type{Cwstring}, s::AbstractString)
v = transcode(Cwchar_t, Vector{UInt8}(String(s)))
v = transcode(Cwchar_t, String(s))
!isempty(v) && v[end] == 0 || push!(v, 0)
return v
end
Expand All @@ -140,7 +140,7 @@ containsnul(p::Ptr, len) =
containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
containsnul(s::AbstractString) = '\0' in s

function unsafe_convert(::Type{Cstring}, s::Union{String,Vector{UInt8}})
function unsafe_convert(::Type{Cstring}, s::Union{String,AbstractVector{UInt8}})
p = unsafe_convert(Ptr{Cchar}, s)
containsnul(p, sizeof(s)) &&
throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
Expand Down Expand Up @@ -174,7 +174,7 @@ same argument.
This is only available on Windows.
"""
function cwstring(s::AbstractString)
bytes = Vector{UInt8}(String(s))
bytes = StringBytes(String(s))
0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
return push!(transcode(UInt16, bytes), 0)
end
Expand Down Expand Up @@ -202,19 +202,19 @@ Only conversion to/from UTF-8 is currently supported.
"""
function transcode end

transcode(::Type{T}, src::Vector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
transcode(::Type{T}, src::Vector{UInt8}) where {T<:Union{Int32,UInt32}} = transcode(T, String(src))
transcode(::Type{T}, src::Union{Vector{UInt8},StringBytes}) where {T<:Union{Int32,UInt32}} = transcode(T, String(src))
function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
buf = IOBuffer()
for c in src; print(buf, Char(c)); end
take!(buf)
end
transcode(::Type{String}, src::String) = src
transcode(T, src::String) = transcode(T, Vector{UInt8}(src))
transcode(T, src::String) = transcode(T, StringBytes(src))
transcode(::Type{String}, src) = String(transcode(UInt8, src))

function transcode(::Type{UInt16}, src::Vector{UInt8})
function transcode(::Type{UInt16}, src::Union{Vector{UInt8},StringBytes})
dst = UInt16[]
i, n = 1, length(src)
n > 0 || return dst
Expand Down
15 changes: 13 additions & 2 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1555,8 +1555,19 @@ export hex2num
@deprecate ctranspose adjoint
@deprecate ctranspose! adjoint!

@deprecate convert(::Type{Vector{UInt8}}, s::AbstractString) Vector{UInt8}(s)
@deprecate convert(::Type{Array{UInt8}}, s::AbstractString) Vector{UInt8}(s)
function convert(::Union{Type{Vector{UInt8}}, Type{Array{UInt8}}}, s::AbstractString)
depwarn("Strings can no longer be `convert`ed to byte arrays. Use `unsafe_wrap` or `StringBytes` instead.", :Type)
unsafe_wrap(Vector{UInt8}, String(s))
end
function (::Type{Vector{UInt8}})(s::String)
depwarn("Vector{UInt8}(s::String) will copy data in the future. To avoid copying, use `unsafe_wrap` or `StringBytes` instead.", :Type)
unsafe_wrap(Vector{UInt8}, s)
end
function (::Type{Array{UInt8}})(s::String)
depwarn("Array{UInt8}(s::String) will copy data in the future. To avoid copying, use `unsafe_wrap` or `StringBytes` instead.", :Type)
unsafe_wrap(Vector{UInt8}, s)
end

@deprecate convert(::Type{Vector{Char}}, s::AbstractString) Vector{Char}(s)
@deprecate convert(::Type{Symbol}, s::AbstractString) Symbol(s)
@deprecate convert(::Type{String}, s::Symbol) String(s)
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ export
StridedMatrix,
StridedVecOrMat,
StridedVector,
StringBytes,
SubArray,
SubString,
Symmetric,
Expand Down
2 changes: 1 addition & 1 deletion base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ function readuntil(io::IO, target::AbstractString)
# decide how we can index target
if target isa String
# convert String to a utf8-byte-iterator
target = Vector{UInt8}(target)
target = StringBytes(target)
#elseif applicable(codeunit, target)
# TODO: a more general version of above optimization
# would be to permit accessing any string via codeunit
Expand Down
2 changes: 1 addition & 1 deletion base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ function GenericIOBuffer(data::T, readable::Bool, writable::Bool, seekable::Bool
end

# allocate Vector{UInt8}s for IOBuffer storage that can efficiently become Strings
StringVector(n::Integer) = Vector{UInt8}(_string_n(n))
StringVector(n::Integer) = unsafe_wrap(Vector{UInt8}, _string_n(n))

# IOBuffers behave like Files. They are typically readable and writable. They are seekable. (They can be appendable).

Expand Down
4 changes: 2 additions & 2 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ elseif Sys.isapple()
break
end
# Hack to compensate for inability to create a string from a subarray with no allocations.
Vector{UInt8}(path_basename) == casepreserved_basename && return true
StringBytes(path_basename) == casepreserved_basename && return true

# If there is no match, it's possible that the file does exist but HFS+
# performed unicode normalization. See https://developer.apple.com/library/mac/qa/qa1235/_index.html.
isascii(path_basename) && return false
Vector{UInt8}(Unicode.normalize(path_basename, :NFD)) == casepreserved_basename
StringBytes(Unicode.normalize(path_basename, :NFD)) == casepreserved_basename
end
else
# Generic fallback that performs a slow directory listing.
Expand Down
2 changes: 1 addition & 1 deletion base/repl/LineEdit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ function edit_splice!(s, r::Region=region(s), ins::AbstractString = ""; rigid_ma
elseif buf.mark >= B
buf.mark += sizeof(ins) - B + A
end
ret = splice!(buf.data, A+1:B, Vector{UInt8}(ins)) # position(), etc, are 0-indexed
ret = splice!(buf.data, A+1:B, Base.StringBytes(String(ins))) # position(), etc, are 0-indexed
buf.size = buf.size + sizeof(ins) - B + A
adjust_pos && seek(buf, position(buf) + sizeof(ins))
String(ret)
Expand Down
2 changes: 1 addition & 1 deletion base/replutil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ function showerror(io::IO, ex::ErrorException)
print(io, ex.msg)
if ex.msg == "type String has no field data"
println(io)
print(io, "Use `Vector{UInt8}(str)` instead.")
print(io, "Use `StringBytes(str)` instead.")
end
end
showerror(io::IO, ex::KeyError) = print(io, "KeyError: key $(repr(ex.key)) not found")
Expand Down
4 changes: 2 additions & 2 deletions base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ checkbounds(s::AbstractString, I::Union{Integer,AbstractArray}) =
string() = ""
string(s::AbstractString) = s

(::Type{Vector{UInt8}})(s::AbstractString) = Vector{UInt8}(String(s))
(::Type{Array{UInt8}})(s::AbstractString) = Vector{UInt8}(s)
(::Type{Vector{UInt8}})(s::AbstractString) = unsafe_wrap(Vector{UInt8}, String(s))
(::Type{Array{UInt8}})(s::AbstractString) = unsafe_wrap(Vector{UInt8}, String(s))
(::Type{Vector{Char}})(s::AbstractString) = collect(s)

Symbol(s::AbstractString) = Symbol(String(s))
Expand Down
9 changes: 6 additions & 3 deletions base/strings/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ julia> String(take!(io))
"Haho"
```
"""
IOBuffer(str::String) = IOBuffer(Vector{UInt8}(str))
IOBuffer(s::SubString{String}) = IOBuffer(view(Vector{UInt8}(s.string), s.offset + 1 : s.offset + sizeof(s)))
IOBuffer(str::String) = IOBuffer(StringBytes(str))
IOBuffer(s::SubString{String}) = IOBuffer(view(StringBytes(s.string), s.offset + 1 : s.offset + sizeof(s)))

# join is implemented using IO

Expand Down Expand Up @@ -373,7 +373,10 @@ function unescape_string(io, s::AbstractString)
end
end

macro b_str(s); :(Vector{UInt8}($(unescape_string(s)))); end
macro b_str(s)
v = unsafe_wrap(Vector{UInt8}, unescape_string(s))
QuoteNode(v)
end

"""
@raw_str -> String
Expand Down
4 changes: 2 additions & 2 deletions base/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ function search(a::ByteArray, b::Char, i::Integer = 1)
if isascii(b)
search(a,UInt8(b),i)
else
search(a,Vector{UInt8}(string(b)),i).start
search(a,unsafe_wrap(Vector{UInt8},string(b)),i).start
end
end

Expand Down Expand Up @@ -62,7 +62,7 @@ function rsearch(a::ByteArray, b::Char, i::Integer = length(a))
if isascii(b)
rsearch(a,UInt8(b),i)
else
rsearch(a,Vector{UInt8}(string(b)),i).start
rsearch(a,unsafe_wrap(Vector{UInt8},string(b)),i).start
end
end

Expand Down
30 changes: 29 additions & 1 deletion base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,35 @@ This representation is often appropriate for passing strings to C.
String(s::AbstractString) = print_to_string(s)
String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))

(::Type{Vector{UInt8}})(s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)
unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)

"""
StringBytes(s::String)
Wrap a `String` (without copying) in an immutable vector-like object that accesses the bytes
of the string's representation.
"""
struct StringBytes <: AbstractVector{UInt8}
s::String
end

sizeof(s::StringBytes) = sizeof(s.s)
length(s::StringBytes) = sizeof(s.s)
size(s::StringBytes) = (length(s),)
@propagate_inbounds getindex(s::StringBytes, i::Int) = codeunit(s.s, i)
IndexStyle(::Type{StringBytes}) = IndexLinear()
start(s::StringBytes) = 1
next(s::StringBytes, i) = (@_propagate_inbounds_meta; (s[i], i+1))
done(s::StringBytes, i) = (@_inline_meta; i == length(s)+1)

(::Type{Vector{UInt8}})(s::StringBytes) = copyto!(Vector{UInt8}(uninitialized, length(s)), s)

unsafe_convert(::Type{Ptr{UInt8}}, s::StringBytes) = unsafe_convert(Ptr{UInt8}, s.s)
unsafe_convert(::Type{Ptr{Int8}}, s::StringBytes) = unsafe_convert(Ptr{Int8}, s.s)

write(io::IO, s::StringBytes) = write(io, s.s)

String(s::StringBytes) = s.s

## low-level functions ##

Expand Down
2 changes: 1 addition & 1 deletion test/char.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ end
@testset "read incomplete character at end of stream or file" begin
local file = tempname()
local iob = IOBuffer([0xf0])
local bytes(c::Char) = Vector{UInt8}(string(c))
local bytes(c::Char) = Base.StringBytes(string(c))
@test bytes(read(iob, Char)) == [0xf0]
@test eof(iob)
try
Expand Down
2 changes: 1 addition & 1 deletion test/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4114,7 +4114,7 @@ b = "aaa"
c = [0x2, 0x1, 0x3]

@test check_nul(a)
@test check_nul(Vector{UInt8}(b))
@test check_nul(unsafe_wrap(Vector{UInt8},b))
@test check_nul(c)
d = [0x2, 0x1, 0x3]
@test check_nul(d)
Expand Down
2 changes: 1 addition & 1 deletion test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ end

let s = "abcα🐨\0x\0"
for T in (UInt8, UInt16, UInt32, Int32)
@test transcode(T, s) == transcode(T, Vector{UInt8}(s))
@test transcode(T, s) == transcode(T, Base.StringBytes(s))
@test transcode(String, transcode(T, s)) == s
end
end
Expand Down
2 changes: 1 addition & 1 deletion test/random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ let b = ['0':'9';'A':'Z';'a':'z']
@test length(randstring(rng...)) == 8
@test length(randstring(rng..., 20)) == 20
@test issubset(randstring(rng...), b)
for c = ['a':'z', "qwèrtï", Set(Vector{UInt8}("gcat"))],
for c = ['a':'z', "qwèrtï", Set(Base.StringBytes("gcat"))],
len = [8, 20]
s = len == 8 ? randstring(rng..., c) : randstring(rng..., c, len)
@test length(s) == len
Expand Down
14 changes: 7 additions & 7 deletions test/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ for (name, f) in l
@test readuntil(io(t), s) == m
@test readuntil(io(t), SubString(s, start(s), endof(s))) == m
@test readuntil(io(t), GenericString(s)) == m
@test readuntil(io(t), Vector{UInt8}(s)) == Vector{UInt8}(m)
@test readuntil(io(t), unsafe_wrap(Vector{UInt8},s)) == unsafe_wrap(Vector{UInt8},m)
@test readuntil(io(t), collect(s)::Vector{Char}) == Vector{Char}(m)
end
cleanup()
Expand Down Expand Up @@ -223,7 +223,7 @@ for (name, f) in l


verbose && println("$name read...")
@test read(io()) == Vector{UInt8}(text)
@test read(io()) == unsafe_wrap(Vector{UInt8},text)

@test read(io()) == read(filename)

Expand Down Expand Up @@ -331,7 +331,7 @@ for (name, f) in l
@test read("$filename.to", String) == text

verbose && println("$name write(::IOBuffer, ...)")
to = IOBuffer(copy(Vector{UInt8}(text)), false, true)
to = IOBuffer(copy(Base.StringBytes(text)), false, true)
write(to, io())
@test String(take!(to)) == text

Expand Down Expand Up @@ -400,14 +400,14 @@ test_read_nbyte()


let s = "qwerty"
@test read(IOBuffer(s)) == Vector{UInt8}(s)
@test read(IOBuffer(s), 10) == Vector{UInt8}(s)
@test read(IOBuffer(s), 1) == Vector{UInt8}(s)[1:1]
@test read(IOBuffer(s)) == Base.StringBytes(s)
@test read(IOBuffer(s), 10) == Base.StringBytes(s)
@test read(IOBuffer(s), 1) == Base.StringBytes(s)[1:1]

# Test growing output array
x = UInt8[]
n = readbytes!(IOBuffer(s), x, 10)
@test x == Vector{UInt8}(s)
@test x == Base.StringBytes(s)
@test n == length(x)
end

Expand Down
4 changes: 2 additions & 2 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ struct tstStringType <: AbstractString
data::Array{UInt8,1}
end
@testset "AbstractString functions" begin
tstr = tstStringType(Vector{UInt8}("12"))
tstr = tstStringType(unsafe_wrap(Vector{UInt8},"12"))
@test_throws MethodError ncodeunits(tstr)
@test_throws MethodError codeunit(tstr)
@test_throws MethodError codeunit(tstr, 1)
Expand Down Expand Up @@ -697,7 +697,7 @@ end
end
end

@test Vector{UInt8}("\xcc\xdd\xee\xff\x80") == [0xcc,0xdd,0xee,0xff,0x80]
@test unsafe_wrap(Vector{UInt8},"\xcc\xdd\xee\xff\x80") == [0xcc,0xdd,0xee,0xff,0x80]

@test next("a", 1)[2] == 2
@test nextind("a", 1) == 2
Expand Down

0 comments on commit b9504db

Please sign in to comment.