From c8ac7f8080ea5c549298df49d09e802c66131f4b Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Thu, 21 Dec 2017 23:18:32 -0500 Subject: [PATCH] prevent modifying vector after in-place conversion to string part of #24388 --- base/c.jl | 4 ++-- base/deprecated.jl | 14 ++++++++------ base/exports.jl | 1 + base/strings/io.jl | 4 ++-- base/strings/string.jl | 14 +++++++++----- base/strings/util.jl | 6 ++---- src/array.c | 5 +++++ test/strings/basic.jl | 8 ++++++++ 8 files changed, 37 insertions(+), 19 deletions(-) diff --git a/base/c.jl b/base/c.jl index 0989c2854be07..3ed671b70c744 100644 --- a/base/c.jl +++ b/base/c.jl @@ -204,7 +204,7 @@ function transcode end transcode(::Type{T}, src::AbstractVector{T}) where {T<:Union{UInt8,UInt16,UInt32,Int32}} = src transcode(::Type{T}, src::String) where {T<:Union{Int32,UInt32}} = T[T(c) for c in src] -transcode(::Type{T}, src::AbstractVector{UInt8}) where {T<:Union{Int32,UInt32}} = transcode(T, String(src)) +transcode(::Type{T}, src::Union{Vector{UInt8},StringBytes}) where {T<:Union{Int32,UInt32}} = transcode(T, String(src)) function transcode(::Type{UInt8}, src::Vector{<:Union{Int32,UInt32}}) buf = IOBuffer() for c in src; print(buf, Char(c)); end @@ -214,7 +214,7 @@ transcode(::Type{String}, src::String) = src transcode(T, src::String) = transcode(T, StringBytes(src)) transcode(::Type{String}, src) = String(transcode(UInt8, src)) -function transcode(::Type{UInt16}, src::AbstractVector{UInt8}) +function transcode(::Type{UInt16}, src::Union{Vector{UInt8},StringBytes}) dst = UInt16[] i, n = 1, length(src) n > 0 || return dst diff --git a/base/deprecated.jl b/base/deprecated.jl index 0626458f6ec46..77a6a7d6d7f8e 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -1555,15 +1555,17 @@ export hex2num @deprecate ctranspose adjoint @deprecate ctranspose! adjoint! -@deprecate convert(::Type{Vector{UInt8}}, s::AbstractString) copy(Base.StringBytes(s)) -@deprecate convert(::Type{Array{UInt8}}, s::AbstractString) copy(Base.StringBytes(s)) +function convert(::Union{Type{Vector{UInt8}}, Type{Array{UInt8}}}, s::AbstractString) + depwarn("Strings can no longer be `convert`ed to byte arrays. Use `unsafe_wrap` or `StringBytes` instead.", :Type) + unsafe_wrap(Vector{UInt8}, String(s)) +end function (::Type{Vector{UInt8}})(s::String) - depwarn("Vector{UInt8}(s::String) will copy data in the future. To avoid copying, use StringBytes(s) instead.", :Type) - Base.StringBytes(s) + depwarn("Vector{UInt8}(s::String) will copy data in the future. To avoid copying, use `unsafe_wrap` or `StringBytes` instead.", :Type) + unsafe_wrap(Vector{UInt8}, s) end function (::Type{Array{UInt8}})(s::String) - depwarn("Array{UInt8}(s::String) will copy data in the future. To avoid copying, use StringBytes(s) instead.", :Type) - Base.StringBytes(s) + depwarn("Array{UInt8}(s::String) will copy data in the future. To avoid copying, use `unsafe_wrap` or `StringBytes` instead.", :Type) + unsafe_wrap(Vector{UInt8}, s) end @deprecate convert(::Type{Vector{Char}}, s::AbstractString) Vector{Char}(s) diff --git a/base/exports.jl b/base/exports.jl index 2da4595fc35ca..dabdb067266c4 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -109,6 +109,7 @@ export StridedMatrix, StridedVecOrMat, StridedVector, + StringBytes, SubArray, SubString, Symmetric, diff --git a/base/strings/io.jl b/base/strings/io.jl index d83f7b4a18624..b41a418557cd7 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -188,8 +188,8 @@ julia> String(take!(io)) "Haho" ``` """ -IOBuffer(str::String) = IOBuffer(unsafe_wrap(Vector{UInt8},str)) -IOBuffer(s::SubString{String}) = IOBuffer(view(unsafe_wrap(Vector{UInt8},s.string), s.offset + 1 : s.offset + sizeof(s))) +IOBuffer(str::String) = IOBuffer(StringBytes(str)) +IOBuffer(s::SubString{String}) = IOBuffer(view(StringBytes(s.string), s.offset + 1 : s.offset + sizeof(s))) # join is implemented using IO diff --git a/base/strings/string.jl b/base/strings/string.jl index 0415eb427ee75..85e5c17468a63 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -62,6 +62,12 @@ String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s)) unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s) +""" + StringBytes(s::String) + +Wrap a `String` (without copying) in an immutable vector-like object that accesses the bytes +of the string's representation. +""" struct StringBytes <: AbstractVector{UInt8} s::String end @@ -75,12 +81,10 @@ start(s::StringBytes) = 1 next(s::StringBytes, i) = (@_propagate_inbounds_meta; (s[i], i+1)) done(s::StringBytes, i) = (@_inline_meta; i == length(s)+1) -copy(s::StringBytes) = copyto!(Vector{UInt8}(uninitialized, length(s)), s) +(::Type{Vector{UInt8}})(s::StringBytes) = copyto!(Vector{UInt8}(uninitialized, length(s)), s) -unsafe_convert(::Type{Ptr{UInt8}}, s::StringBytes) = convert(Ptr{UInt8}, pointer(s.s)) -unsafe_convert(::Type{Ptr{Int8}}, s::StringBytes) = convert(Ptr{Int8}, pointer(s.s)) -pointer(s::StringBytes) = pointer(s.s) -pointer(s::StringBytes, i::Integer) = pointer(s.s, i) +unsafe_convert(::Type{Ptr{UInt8}}, s::StringBytes) = unsafe_convert(Ptr{UInt8}, s.s) +unsafe_convert(::Type{Ptr{Int8}}, s::StringBytes) = unsafe_convert(Ptr{Int8}, s.s) write(io::IO, s::StringBytes) = write(io, s.s) diff --git a/base/strings/util.jl b/base/strings/util.jl index f9d500b35cb49..522ecf1fd7623 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -478,8 +478,6 @@ function hex2bytes end hex2bytes(s::AbstractString) = hex2bytes(String(s)) hex2bytes(s::Union{String,AbstractVector{UInt8}}) = hex2bytes!(Vector{UInt8}(uninitialized, length(s) >> 1), s) -_nbytes(s::String) = sizeof(s) -_nbytes(s::AbstractVector{UInt8}) = length(s) _firstbyteidx(s::String) = 1 _firstbyteidx(s::AbstractVector{UInt8}) = first(eachindex(s)) _lastbyteidx(s::String) = sizeof(s) @@ -493,8 +491,8 @@ representation, similar to [`hex2bytes`](@ref) except that the output is written in `d`. The length of `s` must be exactly twice the length of `d`. """ function hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}}) - if 2length(d) != _nbytes(s) - isodd(_nbytes(s)) && throw(ArgumentError("input hex array must have even length")) + if 2length(d) != sizeof(s) + isodd(sizeof(s)) && throw(ArgumentError("input hex array must have even length")) throw(ArgumentError("output array must be half length of input array")) end j = first(eachindex(d)) - 1 diff --git a/src/array.c b/src/array.c index 6135704f8e7dd..f42b8cc36f517 100644 --- a/src/array.c +++ b/src/array.c @@ -441,6 +441,11 @@ JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a) if (jl_is_string(o)) { a->flags.isshared = 1; *(size_t*)o = jl_array_len(a); + a->nrows = 0; +#ifdef STORE_ARRAY_LEN + a->length = 0; +#endif + a->maxsize = 0; return o; } } diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 385d8986d046d..831474bee1f46 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -794,3 +794,11 @@ end @test nextind("\xf8\x9f\x98\x84", 1) == 2 @test next("\xf8\x9f\x98\x84z", 1)[2] == 2 @test nextind("\xf8\x9f\x98\x84z", 1) == 2 + +# issue #24388 +let v = unsafe_wrap(Vector{UInt8}, "abc") + s = String(v) + @test_throws BoundsError v[1] + push!(v, UInt8('x')) + @test s == "abc" +end