Skip to content

Commit

Permalink
make "dec" and ryu functions faster and simpler (#51273)
Browse files Browse the repository at this point in the history
We had some common code in `Ryu.append_c_digits` that can be combined
with Base logic for the same thing. But it turns out all of this
duplicated code in Ryu seems to just make it run slightly slower in most
cases. The old version had many more branches to check, even though
often numbers are small, so only the last check is meaningful. But the
assumption that it would be faster even if all of them were used also
seems to not hold up in practice. Particularly for a function like
`append_nine_digits` which unrolls completely, but the complicated
version has slightly more data dependencies because of they way it is
written.

Similarly, we replace `unsafe_copy` with `@inbounds[]`, since this is
better for the optimizer, which doesn't need to treat this operation as
an unknown reference escape.

Lastly, we use the append_nine_digits trick from Ryu to make printing of
arbitrary big numbers much faster.

```
julia> @Btime string(typemax(Int128))
  402.345 ns (2 allocations: 120 bytes) # before
  151.139 ns (2 allocations: 120 bytes) # after
```
  • Loading branch information
vtjnash committed Sep 13, 2023
1 parent 377f9df commit e9d9314
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 285 deletions.
86 changes: 67 additions & 19 deletions base/intfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ function bit_ndigits0z(x::Base.BitUnsigned64)
end
function bit_ndigits0z(x::UInt128)
n = 0
while x > 0x8ac7230489e80000
while x > 0x8ac7230489e80000 # 10e18
x = div(x,0x8ac7230489e80000)
n += 19
end
Expand Down Expand Up @@ -724,7 +724,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool)
x >>= 0x1
i -= 1
end
if neg; @inbounds a[1]=0x2d; end
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
end

Expand All @@ -738,29 +738,77 @@ function oct(x::Unsigned, pad::Int, neg::Bool)
x >>= 0x3
i -= 1
end
if neg; @inbounds a[1]=0x2d; end
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
end

# 2-digit decimal characters ("00":"99")
const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
const _dec_d100 = UInt16[
# generating expression: UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
# 0 0, 0 1, 0 2, 0 3, and so on in little-endian
0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939
]

function dec(x::Unsigned, pad::Int, neg::Bool)
n = neg + ndigits(x, pad=pad)
a = StringVector(n)
i = n
@inbounds while i >= 2
d, r = divrem(x, 0x64)
d100 = _dec_d100[(r % Int)::Int + 1]
a[i-1] = d100 % UInt8
a[i] = (d100 >> 0x8) % UInt8
x = oftype(x, d)
function append_c_digits(olength::Int, digits::Unsigned, buf, pos::Int)
i = olength
while i >= 2
d, c = divrem(digits, 0x64)
digits = oftype(digits, d)
@inbounds d100 = _dec_d100[(c % Int) + 1]
@inbounds buf[pos + i - 2] = d100 % UInt8
@inbounds buf[pos + i - 1] = (d100 >> 0x8) % UInt8
i -= 2
end
if i > neg
@inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
if i == 1
@inbounds buf[pos] = UInt8('0') + rem(digits, 0xa) % UInt8
i -= 1
end
if neg; @inbounds a[1]=0x2d; end
return pos + olength
end

function append_nine_digits(digits::Unsigned, buf, pos::Int)
if digits == 0
for _ = 1:9
@inbounds buf[pos] = UInt8('0')
pos += 1
end
return pos
end
return @inline append_c_digits(9, digits, buf, pos) # force loop-unrolling on the length
end

function append_c_digits_fast(olength::Int, digits::Unsigned, buf, pos::Int)
i = olength
# n.b. olength may be larger than required to print all of `digits` (and will be padded
# with zeros), but the printed number will be undefined if it is smaller, and may include
# bits of both the high and low bytes.
maxpow10 = 0x3b9aca00 # 10e9 as UInt32
while i > 9 && digits > typemax(UInt)
# do everything in cheap math chunks, using the processor's native math size
d, c = divrem(digits, maxpow10)
digits = oftype(digits, d)
append_nine_digits(c % UInt32, buf, pos + i - 9)
i -= 9
end
append_c_digits(i, digits % UInt, buf, pos)
return pos + olength
end


function dec(x::Unsigned, pad::Int, neg::Bool)
n = neg + ndigits(x, pad=pad)
a = StringVector(n)
append_c_digits_fast(n, x, a, 1)
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
end

Expand All @@ -781,7 +829,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool)
d = (x % UInt8)::UInt8 & 0xf
@inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
end
if neg; @inbounds a[1]=0x2d; end
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
end

Expand All @@ -806,7 +854,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
end
i -= 1
end
if neg; @inbounds a[1]=0x2d; end
neg && (@inbounds a[1] = 0x2d) # UInt8('-')
String(a)
end

Expand Down
70 changes: 37 additions & 33 deletions base/ryu/exp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,33 @@ function writeexp(buf, pos, v::T,

# special cases
if x == 0
buf[pos] = UInt8('0')
@inbounds buf[pos] = UInt8('0')
pos += 1
if precision > 0 && !trimtrailingzeros
buf[pos] = decchar
@inbounds buf[pos] = decchar
pos += 1
for _ = 1:precision
buf[pos] = UInt8('0')
@inbounds buf[pos] = UInt8('0')
pos += 1
end
elseif hash
buf[pos] = decchar
@inbounds buf[pos] = decchar
pos += 1
end
buf[pos] = expchar
buf[pos + 1] = UInt8('+')
buf[pos + 2] = UInt8('0')
buf[pos + 3] = UInt8('0')
@inbounds buf[pos] = expchar
@inbounds buf[pos + 1] = UInt8('+')
@inbounds buf[pos + 2] = UInt8('0')
@inbounds buf[pos + 3] = UInt8('0')
return pos + 4
elseif isnan(x)
buf[pos] = UInt8('N')
buf[pos + 1] = UInt8('a')
buf[pos + 2] = UInt8('N')
@inbounds buf[pos] = UInt8('N')
@inbounds buf[pos + 1] = UInt8('a')
@inbounds buf[pos + 2] = UInt8('N')
return pos + 3
elseif !isfinite(x)
buf[pos] = UInt8('I')
buf[pos + 1] = UInt8('n')
buf[pos + 2] = UInt8('f')
@inbounds buf[pos] = UInt8('I')
@inbounds buf[pos + 1] = UInt8('n')
@inbounds buf[pos + 2] = UInt8('f')
return pos + 3
end

Expand Down Expand Up @@ -80,10 +80,10 @@ function writeexp(buf, pos, v::T,
if precision > 1
pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
else
buf[pos] = UInt8('0') + digits
@inbounds buf[pos] = UInt8('0') + digits
pos += 1
if hash
buf[pos] = decchar
@inbounds buf[pos] = decchar
pos += 1
end
end
Expand Down Expand Up @@ -121,10 +121,10 @@ function writeexp(buf, pos, v::T,
if precision > 1
pos = append_d_digits(availableDigits, digits, buf, pos, decchar)
else
buf[pos] = UInt8('0') + digits
@inbounds buf[pos] = UInt8('0') + digits
pos += 1
if hash
buf[pos] = decchar
@inbounds buf[pos] = decchar
pos += 1
end
end
Expand Down Expand Up @@ -162,7 +162,7 @@ function writeexp(buf, pos, v::T,
if printedDigits != 0
if digits == 0
for _ = 1:maximum
buf[pos] = UInt8('0')
@inbounds buf[pos] = UInt8('0')
pos += 1
end
else
Expand All @@ -172,10 +172,10 @@ function writeexp(buf, pos, v::T,
if precision > 1
pos = append_d_digits(maximum, digits, buf, pos, decchar)
else
buf[pos] = UInt8('0') + digits
@inbounds buf[pos] = UInt8('0') + digits
pos += 1
if hash
buf[pos] = decchar
@inbounds buf[pos] = decchar
pos += 1
end
end
Expand All @@ -184,52 +184,56 @@ function writeexp(buf, pos, v::T,
roundPos = pos
while true
roundPos -= 1
if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-') || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
buf[roundPos + 1] = UInt8('1')
if roundPos == (startpos - 1) || (@inbounds buf[roundPos]) == UInt8('-') || (plus && (@inbounds buf[roundPos]) == UInt8('+')) || (space && (@inbounds buf[roundPos]) == UInt8(' '))
@inbounds buf[roundPos + 1] = UInt8('1')
e += 1
break
end
c = roundPos > 0 ? buf[roundPos] : 0x00
c = roundPos > 0 ? (@inbounds buf[roundPos]) : 0x00
if c == decchar
continue
elseif c == UInt8('9')
buf[roundPos] = UInt8('0')
@inbounds buf[roundPos] = UInt8('0')
roundUp = 1
continue
else
if roundUp == 2 && UInt8(c) % 2 == 0
break
end
buf[roundPos] = c + 1
@inbounds buf[roundPos] = c + 1
break
end
end
end
if trimtrailingzeros
while buf[pos - 1] == UInt8('0')
while @inbounds buf[pos - 1] == UInt8('0')
pos -= 1
end
if buf[pos - 1] == decchar && !hash
if @inbounds buf[pos - 1] == decchar && !hash
pos -= 1
end
end
buf[pos] = expchar
pos += 1
if e < 0
buf[pos] = UInt8('-')
@inbounds buf[pos] = UInt8('-')
pos += 1
e = -e
else
buf[pos] = UInt8('+')
@inbounds buf[pos] = UInt8('+')
pos += 1
end
if e >= 100
c = e % 10
unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * div(e, 10) + 1, 2)
buf[pos + 2] = UInt8('0') + c
@inbounds d100 = DIGIT_TABLE16[div(e, 10) + 1]
@inbounds buf[pos] = d100 % UInt8
@inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
@inbounds buf[pos + 2] = UInt8('0') + c
pos += 3
else
unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * e + 1, 2)
@inbounds d100 = DIGIT_TABLE16[e + 1]
@inbounds buf[pos] = d100 % UInt8
@inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8
pos += 2
end
return pos
Expand Down
2 changes: 1 addition & 1 deletion base/ryu/fixed.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ function writefixed(buf, pos, v::T,
pos = append_nine_digits(digits, buf, pos)
elseif digits != 0
olength = decimallength(digits)
pos = append_n_digits(olength, digits, buf, pos)
pos = append_c_digits(olength, digits, buf, pos)
nonzero = true
end
i -= 1
Expand Down
Loading

0 comments on commit e9d9314

Please sign in to comment.