Skip to content

Commit

Permalink
prevent modifying vector after in-place conversion to string
Browse files Browse the repository at this point in the history
part of #24388
  • Loading branch information
JeffBezanson committed Dec 23, 2017
1 parent bd91ac0 commit c8ac7f8
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 19 deletions.
4 changes: 2 additions & 2 deletions base/c.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ function transcode end

transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src
transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src]
transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} = transcode(T, String(src))
transcode(::Type{T}, src::Union{Vector{UInt8},StringBytes}) where {T<:Union{Int32,UInt32}} = transcode(T, String(src))
function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}})
buf = IOBuffer()
for c in src; print(buf, Char(c)); end
Expand All @@ -214,7 +214,7 @@ transcode(::Type{String}, src::String) = src
transcode(T, src::String) = transcode(T, StringBytes(src))
transcode(::Type{String}, src) = String(transcode(UInt8, src))

function transcode(::Type{UInt16}, src::AbstractVector{UInt8})
function transcode(::Type{UInt16}, src::Union{Vector{UInt8},StringBytes})
dst = UInt16[]
i, n = 1, length(src)
n > 0 || return dst
Expand Down
14 changes: 8 additions & 6 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1555,15 +1555,17 @@ export hex2num
@deprecate ctranspose adjoint
@deprecate ctranspose! adjoint!

@deprecate convert(::Type{Vector{UInt8}}, s::AbstractString) copy(Base.StringBytes(s))
@deprecate convert(::Type{Array{UInt8}}, s::AbstractString) copy(Base.StringBytes(s))
function convert(::Union{Type{Vector{UInt8}}, Type{Array{UInt8}}}, s::AbstractString)
depwarn("Strings can no longer be `convert`ed to byte arrays. Use `unsafe_wrap` or `StringBytes` instead.", :Type)
unsafe_wrap(Vector{UInt8}, String(s))
end
function (::Type{Vector{UInt8}})(s::String)
depwarn("Vector{UInt8}(s::String) will copy data in the future. To avoid copying, use StringBytes(s) instead.", :Type)
Base.StringBytes(s)
depwarn("Vector{UInt8}(s::String) will copy data in the future. To avoid copying, use `unsafe_wrap` or `StringBytes` instead.", :Type)
unsafe_wrap(Vector{UInt8}, s)
end
function (::Type{Array{UInt8}})(s::String)
depwarn("Array{UInt8}(s::String) will copy data in the future. To avoid copying, use StringBytes(s) instead.", :Type)
Base.StringBytes(s)
depwarn("Array{UInt8}(s::String) will copy data in the future. To avoid copying, use `unsafe_wrap` or `StringBytes` instead.", :Type)
unsafe_wrap(Vector{UInt8}, s)
end

@deprecate convert(::Type{Vector{Char}}, s::AbstractString) Vector{Char}(s)
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ export
StridedMatrix,
StridedVecOrMat,
StridedVector,
StringBytes,
SubArray,
SubString,
Symmetric,
Expand Down
4 changes: 2 additions & 2 deletions base/strings/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ julia> String(take!(io))
"Haho"
```
"""
IOBuffer(str::String) = IOBuffer(unsafe_wrap(Vector{UInt8},str))
IOBuffer(s::SubString{String}) = IOBuffer(view(unsafe_wrap(Vector{UInt8},s.string), s.offset + 1 : s.offset + sizeof(s)))
IOBuffer(str::String) = IOBuffer(StringBytes(str))
IOBuffer(s::SubString{String}) = IOBuffer(view(StringBytes(s.string), s.offset + 1 : s.offset + sizeof(s)))

# join is implemented using IO

Expand Down
14 changes: 9 additions & 5 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))

unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)

"""
StringBytes(s::String)
Wrap a `String` (without copying) in an immutable vector-like object that accesses the bytes
of the string's representation.
"""
struct StringBytes <: AbstractVector{UInt8}
s::String
end
Expand All @@ -75,12 +81,10 @@ start(s::StringBytes) = 1
next(s::StringBytes, i) = (@_propagate_inbounds_meta; (s[i], i+1))
done(s::StringBytes, i) = (@_inline_meta; i == length(s)+1)

copy(s::StringBytes) = copyto!(Vector{UInt8}(uninitialized, length(s)), s)
(::Type{Vector{UInt8}})(s::StringBytes) = copyto!(Vector{UInt8}(uninitialized, length(s)), s)

unsafe_convert(::Type{Ptr{UInt8}}, s::StringBytes) = convert(Ptr{UInt8}, pointer(s.s))
unsafe_convert(::Type{Ptr{Int8}}, s::StringBytes) = convert(Ptr{Int8}, pointer(s.s))
pointer(s::StringBytes) = pointer(s.s)
pointer(s::StringBytes, i::Integer) = pointer(s.s, i)
unsafe_convert(::Type{Ptr{UInt8}}, s::StringBytes) = unsafe_convert(Ptr{UInt8}, s.s)
unsafe_convert(::Type{Ptr{Int8}}, s::StringBytes) = unsafe_convert(Ptr{Int8}, s.s)

write(io::IO, s::StringBytes) = write(io, s.s)

Expand Down
6 changes: 2 additions & 4 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -478,8 +478,6 @@ function hex2bytes end
hex2bytes(s::AbstractString) = hex2bytes(String(s))
hex2bytes(s::Union{String,AbstractVector{UInt8}}) = hex2bytes!(Vector{UInt8}(uninitialized, length(s) >> 1), s)

_nbytes(s::String) = sizeof(s)
_nbytes(s::AbstractVector{UInt8}) = length(s)
_firstbyteidx(s::String) = 1
_firstbyteidx(s::AbstractVector{UInt8}) = first(eachindex(s))
_lastbyteidx(s::String) = sizeof(s)
Expand All @@ -493,8 +491,8 @@ representation, similar to [`hex2bytes`](@ref) except that the output is written
in `d`. The length of `s` must be exactly twice the length of `d`.
"""
function hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}})
if 2length(d) != _nbytes(s)
isodd(_nbytes(s)) && throw(ArgumentError("input hex array must have even length"))
if 2length(d) != sizeof(s)
isodd(sizeof(s)) && throw(ArgumentError("input hex array must have even length"))
throw(ArgumentError("output array must be half length of input array"))
end
j = first(eachindex(d)) - 1
Expand Down
5 changes: 5 additions & 0 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,11 @@ JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
if (jl_is_string(o)) {
a->flags.isshared = 1;
*(size_t*)o = jl_array_len(a);
a->nrows = 0;
#ifdef STORE_ARRAY_LEN
a->length = 0;
#endif
a->maxsize = 0;
return o;
}
}
Expand Down
8 changes: 8 additions & 0 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -794,3 +794,11 @@ end
@test nextind("\xf8\x9f\x98\x84", 1) == 2
@test next("\xf8\x9f\x98\x84z", 1)[2] == 2
@test nextind("\xf8\x9f\x98\x84z", 1) == 2

# issue #24388
let v = unsafe_wrap(Vector{UInt8}, "abc")
s = String(v)
@test_throws BoundsError v[1]
push!(v, UInt8('x'))
@test s == "abc"
end

0 comments on commit c8ac7f8

Please sign in to comment.