Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #10959 bugs with UTF-16 conversions #11551

Merged
merged 4 commits into from
Jul 1, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
264 changes: 206 additions & 58 deletions base/utf16.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,42 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

utf16_is_lead(c::UInt16) = (c & 0xfc00) == 0xd800
utf16_is_trail(c::UInt16) = (c & 0xfc00) == 0xdc00
utf16_is_surrogate(c::UInt16) = (c & 0xf800) == 0xd800
utf16_get_supplementary(lead::UInt16, trail::UInt16) = Char(UInt32(lead-0xd7f7)<<10 + trail)
# Quickly copy and set trailing \0
@inline function fast_utf_copy{S <: Union{UTF16String, UTF32String}, T <: Union{UInt16, Char}}(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the purpose of the flag argument, since you always seem to pass true?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was for the conversion that @tkelman absolutely did not want in the PR (which is probably the most useful one for me), conversion of Vector{UInt16} to UTF8String, which I've removed.
I'd rather keep the flag, so that this can be used elsewhere (and it won't affect the code generated anyway, nice thing about Julia!)

::Type{S}, ::Type{T}, len, dat, flag::Bool=false)
S(setindex!(copy!(Vector{T}(len+1), 1, dat, 1, flag ? len : len+1), 0, len+1))
end

# Get rest of character ch from 3-byte UTF-8 sequence in dat
@inline function get_utf8_3byte(dat, pos, ch)
@inbounds return ((ch & 0xf) << 12) | (UInt32(dat[pos-1] & 0x3f) << 6) | (dat[pos] & 0x3f)
end
# Get rest of character ch from 4-byte UTF-8 sequence in dat
@inline function get_utf8_4byte(dat, pos, ch)
@inbounds return (((ch & 0x7) << 18)
| (UInt32(dat[pos-2] & 0x3f) << 12)
| (UInt32(dat[pos-1] & 0x3f) << 6)
| (dat[pos] & 0x3f))
end

# Output a character as a 4-byte UTF-8 sequence
@inline function output_utf8_4byte!(buf, out, ch)
@inbounds begin
buf[out + 1] = 0xf0 | (ch >>> 18)
buf[out + 2] = 0x80 | ((ch >>> 12) & 0x3f)
buf[out + 3] = 0x80 | ((ch >>> 6) & 0x3f)
buf[out + 4] = 0x80 | (ch & 0x3f)
end
end

const empty_utf16 = UTF16String(UInt16[0])

function length(s::UTF16String)
d = s.data
len = length(d) - 1
len == 0 && return 0
cnum = 0
for i = 1:len
@inbounds cnum += !utf16_is_trail(d[i])
@inbounds cnum += !is_surrogate_trail(d[i])
end
cnum
end
Expand All @@ -20,100 +45,220 @@ function endof(s::UTF16String)
d = s.data
i = length(d) - 1
i == 0 && return i
utf16_is_surrogate(d[i]) ? i-1 : i
return is_surrogate_codeunit(d[i]) ? i-1 : i
end

get_supplementary(lead::Unsigned, trail::Unsigned) = (UInt32(lead-0xd7f7)<<10 + trail)

function next(s::UTF16String, i::Int)
if !utf16_is_surrogate(s.data[i])
return Char(s.data[i]), i+1
elseif length(s.data)-1 > i && utf16_is_lead(s.data[i]) && utf16_is_trail(s.data[i+1])
return utf16_get_supplementary(s.data[i], s.data[i+1]), i+2
end
throw(UnicodeError(UTF_ERR_INVALID_INDEX,0,0))
ch = s.data[i]
!is_surrogate_codeunit(ch) && return (Char(ch), i+1)
# check length, account for terminating \0
i >= (length(s.data)-1) && throw(UnicodeError(UTF_ERR_MISSING_SURROGATE, i, UInt32(ch)))
!is_surrogate_lead(ch) && throw(UnicodeError(UTF_ERR_NOT_LEAD, i, ch))
ct = s.data[i+1]
!is_surrogate_trail(ct) && throw((UTF_ERR_NOT_TRAIL, i, ch))
Char(get_supplementary(ch, ct)), i+2
end

function reverseind(s::UTF16String, i::Integer)
j = length(s.data) - i
return Base.utf16_is_trail(s.data[j]) ? j-1 : j
return is_surrogate_trail(s.data[j]) ? j-1 : j
end

lastidx(s::UTF16String) = length(s.data) - 1 # s.data includes NULL terminator

function reverse(s::UTF16String)
d =s.data
d = s.data
out = similar(d)
out[end] = 0 # NULL termination
n = length(d)
for i = 1:n-1
out[i] = d[n-i]
if Base.utf16_is_lead(out[i])
out[i],out[i-1] = out[i-1],out[i]
@inbounds for i = 1:n-1
ch = d[n-i]
if is_surrogate_lead(ch)
out[i],out[i-1] = out[i-1],ch
else
out[i] = ch
end
end
return UTF16String(out)
UTF16String(out)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

leave the return

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's not the convention in the surrounding code, and the code before any of my changes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shows up in the diff as you deleting the return from this line, which is unnecessary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was just making it more consistent with most all of the other locations in the utf*.jl files.

end

# TODO: optimize this
function encode16(s::AbstractString)
buf = UInt16[]
for ch in s
c = reinterpret(UInt32, ch)
sizeof(s::UTF16String) = sizeof(s.data) - sizeof(UInt16)

function isvalid(::Type{UTF16String}, data::AbstractArray{UInt16})
i = 1
n = length(data) # this may include NULL termination; that's okay
@inbounds while i < n # check for unpaired surrogates
if is_surrogate_lead(data[i]) && is_surrogate_trail(data[i+1])
i += 2
elseif is_surrogate_codeunit(data[i])
return false
else
i += 1
end
end
return i > n || !is_surrogate_codeunit(data[i])
end

"
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dislike this particular comment on the sole grounds that it provides no additional information in 14 lines that was not provided by the subsequent one line of code (function convert(::Type{UTF16String}, str::AbstractString))

i agree that base could use more frequent comments (and generally appreciate the inline comments you've put in this PR), but let's not write text just for the sake of writing text.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's not true, it gives both the return type of the function, and lists what errors are thrown.
That information is not present on the first line of the function.
The idea was to have that information also available to the help system, and to things like Doxygen.
If you could list the exceptions thrown by a function, like in CLU, and had a way of specifying the return type
(which has been proposed), that would not be necessary.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but all information which is provided by the signature should not be repeated in the comment. The doc system should be smart enough to extract that automatically. (I think I said that in another comment already.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please go look at the latest version - there is information there that could not be extracted from the signature.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And yes, you are correct, and I totally agree, that if the information is provided by the signature, it doesn't need to be repeated. The signature also does not indicate whether a parameter is used for input, output, or both, something that my original doxygen format comments made nicely explicit, with @param[in], @param[out], and @param[in,out].
I'd redone the comments in #11575, but hadn't gone back to update these PRs, while waiting for some resolution on #11575.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's better, but if you think in terms of how the docs will be presented when they will be supported in Base, the generic convert function will already have its description printed, so you don't need to explain what the first argument means (and it's quite weird to describe an argument which is fixed to UTF16String). As for documenting str, the only interesting point is that \0 is not needed, but I don't think this should be explained here: people using that function should not care about the underlying representation of their string, as they are using the high-level interface.

The rest of the doc looks fine to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

About the comment on '\0', that was to contrast it with the other functions, such as UTF16String(),
which requires an explicit '\0'. I'll just go remove the Input Arguments sections though...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but UTF16String operates on raw data, where the caller needs to know low-level interpretation details.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, the lines between UTF16String(), convert, and utf16 have not been all that well drawn, in that many of the convert functions also work on raw data... I hope that this can all be cleaned up... possibly with @quinnj's String.jl package...

Converts an `AbstractString` to a `UTF16String`

### Returns:
* `UTF16String`

### Throws:
* `UnicodeError`
"
function convert(::Type{UTF16String}, str::AbstractString)
len, flags, num4byte = unsafe_checkstring(str)
buf = Vector{UInt16}(len+num4byte+1)
out = 0
@inbounds for ch in str
c = UInt32(ch)
if c < 0x10000
push!(buf, UInt16(c))
elseif c <= 0x10ffff
push!(buf, UInt16(0xd7c0 + (c>>10)))
push!(buf, UInt16(0xdc00 + (c & 0x3ff)))
buf[out += 1] = UInt16(c)
else
throw(UnicodeError(UTF_ERR_INVALID_CHAR, 0, ch))
# output surrogate pair
buf[out += 1] = UInt16(0xd7c0 + (ch >>> 10))
buf[out += 1] = UInt16(0xdc00 + (ch & 0x3ff))
end
end
push!(buf, 0) # NULL termination
@inbounds buf[out + 1] = 0 # NULL termination
UTF16String(buf)
end

utf16(x) = convert(UTF16String, x)
convert(::Type{UTF16String}, s::UTF16String) = s
convert(::Type{UTF16String}, s::AbstractString) = encode16(s)
convert(::Type{Array{UInt16,1}}, s::UTF16String) = s.data
convert(::Type{Array{UInt16}}, s::UTF16String) = s.data
"
Converts a `UTF8String` to a `UTF16String`

# TODO: optimize this
convert(::Type{UTF8String}, s::UTF16String) =
sprint(length(s.data)-1, io->for c in s; write(io,c::Char); end)
### Returns:
* `UTF16String`

sizeof(s::UTF16String) = sizeof(s.data) - sizeof(UInt16)
unsafe_convert{T<:Union{Int16,UInt16}}(::Type{Ptr{T}}, s::UTF16String) =
convert(Ptr{T}, pointer(s))
### Throws:
* `UnicodeError`
"
function convert(::Type{UTF16String}, str::UTF8String)
dat = str.data
# handle zero length string quickly
sizeof(dat) == 0 && return empty_utf16
# Check that is correct UTF-8 encoding and get number of words needed
len, flags, num4byte = unsafe_checkstring(dat)
len += num4byte
buf = Vector{UInt16}(len+1)
@inbounds buf[len+1] = 0
# Optimize case where no characters > 0x7f
flags == 0 && @inbounds return UTF16String(copy!(buf, dat))
out = 0
pos = 0
@inbounds while out < len
ch::UInt32 = dat[pos += 1]
# Handle ASCII characters
if ch <= 0x7f
buf[out += 1] = ch
# Handle range 0x80-0x7ff
elseif ch < 0xe0
buf[out += 1] = ((ch & 0x1f) << 6) | (dat[pos += 1] & 0x3f)
# Handle range 0x800-0xffff
elseif ch < 0xf0
pos += 2
buf[out += 1] = get_utf8_3byte(dat, pos, ch)
# Handle range 0x10000-0x10ffff
else
pos += 3
ch = get_utf8_4byte(dat, pos, ch)
# output surrogate pair
buf[out += 1] = UInt16(0xd7c0 + (ch >>> 10))
buf[out += 1] = UInt16(0xdc00 + (ch & 0x3ff))
end
end
UTF16String(buf)
end

function isvalid(::Type{UTF16String}, data::AbstractArray{UInt16})
i = 1
n = length(data) # this may include NULL termination; that's okay
while i < n # check for unpaired surrogates
if utf16_is_lead(data[i]) && utf16_is_trail(data[i+1])
i += 2
elseif utf16_is_surrogate(data[i])
return false
"
Converts a `UTF16String` to a `UTF8String`

### Returns:
* `UTF8String`

### Throws:
* `UnicodeError`
"
function convert(::Type{UTF8String}, str::UTF16String)
dat = str.data
len = sizeof(dat) >>> 1
# handle zero length string quickly
len <= 1 && return empty_utf8
# get number of bytes to allocate
len, flags, num4byte, num3byte, num2byte = unsafe_checkstring(dat, 1, len-1)
flags == 0 && @inbounds return UTF8String(copy!(Vector{UInt8}(len), 1, dat, 1, len))
return encode_to_utf8(UInt16, dat, len + num2byte + num3byte*2 + num4byte*3)
end

"
Converts an already validated vector of `UInt16` or `UInt32` to a `UTF8String`

### Input Arguments:
* `dat` Vector of code units (`UInt16` or `UInt32`), explicit `\0` is not converted
* `len` length of output in bytes

### Returns:
* `UTF8String`
"
function encode_to_utf8{T<:Union{UInt16, UInt32}}(::Type{T}, dat, len)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just encode_to_utf8{T<:Union{UInt16, UInt32}}(dat::AbstractVector{T}, len), rather than redundantly passing T ... isn't dat always going to be some kind of vector of T?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was more for the future, where I want the first argument to really be an Encoding, see @quinnj's nice Strings.jl that he's been working on.

buf = Vector{UInt8}(len)
out = 0
pos = 0
@inbounds while out < len
ch::UInt32 = dat[pos += 1]
# Handle ASCII characters
if ch <= 0x7f
buf[out += 1] = ch
# Handle 0x80-0x7ff
elseif ch < 0x800
buf[out += 1] = 0xc0 | (ch >>> 6)
buf[out += 1] = 0x80 | (ch & 0x3f)
# Handle 0x10000-0x10ffff (if input is UInt32)
elseif ch > 0xffff # this is only for T == UInt32, should not be generated for UInt16
output_utf8_4byte!(buf, out, ch)
out += 4
# Handle surrogate pairs
elseif is_surrogate_codeunit(ch)
output_utf8_4byte!(buf, out, get_supplementary(ch, dat[pos += 1]))
out += 4
# Handle 0x800-0xd7ff, 0xe000-0xffff UCS-2 characters
else
i += 1
buf[out += 1] = 0xe0 | ((ch >>> 12) & 0x3f)
buf[out += 1] = 0x80 | ((ch >>> 6) & 0x3f)
buf[out += 1] = 0x80 | (ch & 0x3f)
end
end
return i > n || !utf16_is_surrogate(data[i])
UTF8String(buf)
end

function convert(::Type{UTF16String}, data::AbstractVector{UInt16})
!isvalid(UTF16String, data) && throw(UnicodeError(UTF_ERR_INVALID_16,0,0))
len = length(data)
d = Array(UInt16, len + 1)
d[end] = 0 # NULL terminate
UTF16String(copy!(d,1, data,1, len))
function convert(::Type{UTF16String}, str::ASCIIString)
dat = str.data
@inbounds return fast_utf_copy(UTF16String, UInt16, length(dat), dat, true)
end

convert(::Type{Vector{UInt16}}, str::UTF16String) = str.data
convert(::Type{Array{UInt16}}, str::UTF16String) = str.data

convert(::Type{UTF16String}, str::UTF16String) = str
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that was there before - I'm pretty sure all I did was change s -> str.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, just wasn't showing up very clearly in the diff. Seems like a vacuous definition though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wondered about those myself, but ASCIIString, UTF8String and UTF32String all have the same definition... (I'll probably make them all just have one parameterized definition in another cleanup PR later)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thought just came to me... since strings are not (currently 😀) validated, maybe those converts should actually do the validity checking that the other convert methods normally do...
I think for consistency's sake, that needs to happen, but I'd have to be very careful about seeing where convert might be called with the same type... don't want to kill performance!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better use isvalid explicitly.

@nalimilan The string conversion functions (except for some bugs) all have the contract that they validate the string. However, isvalid is simply too strict to be useful in very many cases.
One of the fixes with this PR was to change to using unsafe_checkstring, which allows handling
Modified UTF-8 and CESU-8. Another PR will need to deal with the inconsistency that there is a convert method for UTF-8 that allows passing a replacement character or string for when invalid sequences are found, instead of throwing an error, but not for ASCII, UTF-16, or UTF-32...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the only behavior that makes sense for convert of a value into its own type is to be a no-op. Constructors could be given additional UTF16String(str::UTF16String) methods for validation if you're really paranoid about converting a string from somewhere else that hasn't been validated into its own type.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not paranoia, it's just that since the type constructors do not guarantee a valid string (in fact, they basically let you get away with anything... because they take Vectors of raw code units, so that you can keep an alias... there is no guarantee even that a string is really immutable...)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe the only way to solve this, is to move all validation/conversion into the inner constructors... which would also eliminate the hole of making a mutable string...
But, that's not for this PR, but another...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that there is already a generic convert definition for convert{T}(::Type{T}, x::T) = x, so we don't need to duplicate that for specific types T.


unsafe_convert{T<:Union{Int16,UInt16}}(::Type{Ptr{T}}, s::UTF16String) =
convert(Ptr{T}, pointer(s))

convert(T::Type{UTF16String}, data::AbstractArray{UInt16}) =
convert(T, reshape(data, length(data)))

convert(T::Type{UTF16String}, data::AbstractArray{Int16}) =
convert(T, reinterpret(UInt16, data))

function convert(::Type{UTF16String}, dat::AbstractVector{UInt16})
len, flags, num4byte = unsafe_checkstring(dat)
@inbounds return fast_utf_copy(UTF16String, UInt16, len+num4byte, dat, true)
end

function convert(T::Type{UTF16String}, bytes::AbstractArray{UInt8})
isempty(bytes) && return UTF16String(UInt16[0])
isodd(length(bytes)) && throw(UnicodeError(UTF_ERR_ODD_BYTES_16, length(bytes), 0))
Expand All @@ -136,6 +281,9 @@ function convert(T::Type{UTF16String}, bytes::AbstractArray{UInt8})
UTF16String(d)
end

convert(::Type{UTF16String}, str::UTF16String) = str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but this was about fixing bugs, not removing stuff, I was told many times not to do too many things in a single PR, so I was planning on addressing that after hopefully these bug fix PRs get in.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It turns out, that that line in essentials.jl never gets used... when I remove the definition above, I get:

julia> @which convert(UTF16String, utf16("foo"))
convert(::Type{UTF16String}, s::AbstractString) at utf16.jl:75

which is defined as:

convert(::Type{UTF16String}, s::AbstractString) = encode16(s)

So I don't think I can do anything about those, they are really there for a reason, to keep the AbstractString version from being used.


utf16(x) = convert(UTF16String, x)
utf16(p::Ptr{UInt16}, len::Integer) = utf16(pointer_to_array(p, len))
utf16(p::Ptr{Int16}, len::Integer) = utf16(convert(Ptr{UInt16}, p), len)
function utf16(p::Union{Ptr{UInt16}, Ptr{Int16}})
Expand All @@ -154,7 +302,7 @@ function map(fun, str::UTF16String)
end
uc = reinterpret(UInt32, c2)
if uc < 0x10000
if utf16_is_surrogate(UInt16(uc))
if is_surrogate_codeunit(UInt16(uc))
throw(UnicodeError(UTF_ERR_INVALID_CHAR, 0, uc))
end
push!(buf, UInt16(uc))
Expand Down
15 changes: 8 additions & 7 deletions base/utf32.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# This file is a part of Julia. License is MIT: http://julialang.org/license

# UTF-32 basic functions
next(s::UTF32String, i::Int) = (s.data[i], i+1)
endof(s::UTF32String) = length(s.data) - 1
length(s::UTF32String) = length(s.data) - 1
Expand Down Expand Up @@ -40,8 +41,8 @@ function convert{T<:ByteString}(::Type{T}, data::AbstractVector{Char})
convert(T, takebuf_string(s))
end

convert(::Type{Array{Char,1}}, s::UTF32String) = s.data
convert(::Type{Array{Char}}, s::UTF32String) = s.data
convert(::Type{Vector{Char}}, str::UTF32String) = str.data
convert(::Type{Array{Char}}, str::UTF32String) = str.data

reverse(s::UTF32String) = UTF32String(reverse!(copy(s.data), 1, length(s)))

Expand All @@ -60,19 +61,19 @@ function convert(T::Type{UTF32String}, bytes::AbstractArray{UInt8})
elseif data[1] == Char(0xfffe0000) # byte-swapped
d = Array(Char, length(data))
for i = 2:length(data)
d[i-1] = bswap(data[i])
@inbounds d[i-1] = bswap(data[i])
end
else
d = Array(Char, length(data) + 1)
copy!(d, 1, data, 1, length(data)) # assume native byte order
end
d[end] = Char(0) # NULL terminate
d[end] = 0 # NULL terminate
UTF32String(d)
end

function isvalid(::Type{UTF32String}, str::Union{Vector{Char}, Vector{UInt32}})
for i=1:length(str)
@inbounds if !isvalid(Char, reinterpret(UInt32, str[i])) ; return false ; end
@inbounds if !isvalid(Char, UInt32(str[i])) ; return false ; end
end
return true
end
Expand All @@ -89,9 +90,9 @@ end
function map(f, s::UTF32String)
d = s.data
out = similar(d)
out[end] = Char(0)
out[end] = 0

for i = 1:(length(d)-1)
@inbounds for i = 1:(length(d)-1)
c2 = f(d[i])
if !isa(c2, Char)
throw(UnicodeError(UTF_ERR_MAP_CHAR, 0, 0))
Expand Down
Loading