Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support the flag ' for thousand separator in Printf, issue #29077 #42145

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
168 changes: 150 additions & 18 deletions stdlib/Printf/src/Printf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,22 @@ struct Spec{T} # T => %type => Val{'type'}
space::Bool
zero::Bool
hash::Bool
apostrophe::Bool
width::Int
precision::Int
end

# recreate the format specifier string from a typed Spec
Base.string(f::Spec{T}; modifier::String="") where {T} =
string("%", f.leftalign ? "-" : "", f.plus ? "+" : "", f.space ? " " : "",
f.zero ? "0" : "", f.hash ? "#" : "", f.width > 0 ? f.width : "",
f.zero ? "0" : "", f.hash ? "#" : "", f.apostrophe ? "'" : "", f.width > 0 ? f.width : "",
f.precision == 0 ? ".0" : f.precision > 0 ? ".$(f.precision)" : "", modifier, char(T))
Base.show(io::IO, f::Spec) = print(io, string(f))

floatfmt(s::Spec{T}) where {T} =
Spec{Val{'f'}}(s.leftalign, s.plus, s.space, s.zero, s.hash, s.width, 0)
Spec{Val{'f'}}(s.leftalign, s.plus, s.space, s.zero, s.hash, s.apostrophe, s.width, 0)
ptrfmt(s::Spec{T}, x) where {T} =
Spec{Val{'x'}}(s.leftalign, s.plus, s.space, s.zero, true, s.width, sizeof(x) == 8 ? 16 : 8)
Spec{Val{'x'}}(s.leftalign, s.plus, s.space, s.zero, true, s.apostrophe, s.width, sizeof(x) == 8 ? 16 : 8)

"""
Printf.Format(format_str)
Expand Down Expand Up @@ -105,7 +106,7 @@ function Format(f::AbstractString)
pos += 1
# positioned at start of first format str %
# parse flags
leftalign = plus = space = zero = hash = false
leftalign = plus = space = zero = hash = apostrophe = false
while true
if b == UInt8('-')
leftalign = true
Expand All @@ -117,6 +118,8 @@ function Format(f::AbstractString)
zero = true
elseif b == UInt8('#')
hash = true
elseif b == UInt8(''')
apostrophe = true
else
break
end
Expand Down Expand Up @@ -178,7 +181,7 @@ function Format(f::AbstractString)
elseif type <: Floats && !parsedprecdigits
precision = 6
end
push!(fmts, Spec{type}(leftalign, plus, space, zero, hash, width, precision))
push!(fmts, Spec{type}(leftalign, plus, space, zero, hash, apostrophe, width, precision))
start = pos
while pos <= len
b = bytes[pos]
Expand Down Expand Up @@ -287,17 +290,17 @@ fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
fmt(buf, pos, arg, floatfmt(spec))

@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Ints}
leftalign, plus, space, zero, hash, width, prec =
spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
leftalign, plus, space, zero, hash, apostrophe, width, prec =
spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.apostrophe, spec.width, spec.precision
bs = base(T)
arg2 = toint(arg)
n = i = ndigits(arg2, base=bs, pad=1)
neg = arg2 < 0
x = arg2 isa Base.BitSigned ? unsigned(abs(arg2)) : abs(arg2)
arglen = n + (neg || (plus | space)) +
numsep = apostrophe ? countthousandsep(spec, arg2) : 0
x, neg = arg2 < 0 ? (-arg2, true) : (arg2, false)
arglen = n + (neg || (plus | space)) + numsep +
(T == Val{'o'} && hash ? 1 : 0) +
(T == Val{'x'} && hash ? 2 : 0) + (T == Val{'X'} && hash ? 2 : 0)
arglen2 = arglen < width && prec > 0 ? arglen + min(max(0, prec - n), width - arglen) : arglen
arglen2 = arglen < width && prec > 0 ? arglen + min(max(0, prec - n), width - arglen) - numsep : arglen
if !leftalign && !zero && arglen2 < width
# pad left w/ spaces
for _ = 1:(width - arglen2)
Expand Down Expand Up @@ -329,8 +332,8 @@ fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
buf[pos] = UInt8('0')
pos += 1
end
elseif n < prec
for _ = 1:(prec - n)
elseif (n + numsep) < prec
for _ = 1:(prec - (n + numsep))
buf[pos] = UInt8('0')
pos += 1
end
Expand All @@ -340,6 +343,14 @@ fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
pos += 1
end
end
headpos = pos
if apostrophe && numsep > 0 && T in (Val{'d'}, Val{'i'}, Val{'u'})
# pad left for thousand separators
for _ = 1:numsep
buf[pos] = UInt8(' ')
pos += 1
end
end
while i > 0
@inbounds buf[pos + i - 1] = bs == 16 ?
(T == Val{'x'} ? hex[(x & 0x0f) + 1] : HEX[(x & 0x0f) + 1]) :
Expand All @@ -354,6 +365,10 @@ fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
i -= 1
end
pos += n
if apostrophe && numsep > 0
onesplace = pos - 1
insertsep(buf, headpos, numsep, onesplace)
end
if leftalign && arglen2 < width
# pad right
for _ = 1:(width - arglen2)
Expand Down Expand Up @@ -397,11 +412,31 @@ _snprintf(ptr, siz, str, arg) =
const __BIG_FLOAT_MAX__ = 8192

@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
leftalign, plus, space, zero, hash, width, prec =
spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
leftalign, plus, space, zero, hash, apostrophe, width, prec =
spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.apostrophe, spec.width, spec.precision
x = tofloat(arg)
numsep = countthousandsep(spec, x)
hassep = apostrophe && numsep > 0 && T in (Val{'f'}, Val{'F'}, Val{'g'}, Val{'G'})
headpos = pos
if x isa BigFloat
if isfinite(x)
if hassep
# pad left for thousand separators
for _ = 1:numsep
buf[pos] = UInt8(' ')
pos += 1
end
spec = Spec{T}(
spec.leftalign,
spec.plus,
spec.space,
spec.zero,
spec.hash,
spec.apostrophe,
spec.width - numsep,
spec.precision,
)
end
GC.@preserve buf begin
siz = length(buf) - pos + 1
str = string(spec; modifier="R")
Expand All @@ -415,6 +450,32 @@ const __BIG_FLOAT_MAX__ = 8192
len = _snprintf(pointer(buf, pos), len + 1, str, x)
end
len > 0 || throw(ArgumentError("invalid printf formatting $str for BigFloat"))
isexp = UInt8('e') in buf[pos:(pos + len - 1)] || UInt8('E') in buf[pos:(pos + len - 1)]
if hassep && isexp
# fix left padding when scientific notation is used
for _ = 1:numsep
pos -= 1
end
spec = Spec{T}(
spec.leftalign,
spec.plus,
spec.space,
spec.zero,
spec.hash,
spec.apostrophe,
spec.width + numsep,
spec.precision,
)
siz = length(buf) - pos + 1
str = string(spec; modifier="R")
len = _snprintf(pointer(buf, pos), siz, str, x)
end
if hassep && !isexp
neg = x < 0 || x === -Base.zero(x)
lenleftpad = findfirst(!=(UInt(' ')), buf[headpos:(pos + len - 1)]) - numsep - 1
onesplace = findonesplace(buf, headpos, pos + len - 1)
insertsep(buf, headpos, numsep, onesplace, numskip=(neg ? 1 : 0) + lenleftpad)
end
return pos + len
end
end
Expand All @@ -423,7 +484,20 @@ const __BIG_FLOAT_MAX__ = 8192
if T == Val{'e'} || T == Val{'E'}
newpos = Ryu.writeexp(buf, pos, x, prec, plus, space, hash, char(T), UInt8('.'))
elseif T == Val{'f'} || T == Val{'F'}
if hassep
# pad left for thousand separators
for _ = 1:numsep
buf[pos] = UInt8(' ')
pos += 1
end
end
newpos = Ryu.writefixed(buf, pos, x, prec, plus, space, hash, UInt8('.'))
if hassep
neg = x < 0 || x === -Base.zero(x)
onesplace = findonesplace(buf, headpos, newpos - 1)
insertsep(buf, headpos, numsep, onesplace, numskip=(neg ? 1 : 0))
pos -= numsep
end
elseif T == Val{'g'} || T == Val{'G'}
if isinf(x) || isnan(x)
newpos = Ryu.writeshortest(buf, pos, x, plus, space)
Expand All @@ -446,7 +520,20 @@ const __BIG_FLOAT_MAX__ = 8192
flipsign(exp, sign)
end
if -4 ≤ exp < prec
if hassep
# pad left for thousand separators
for _ = 1:numsep
buf[pos] = UInt8(' ')
pos += 1
end
end
newpos = Ryu.writefixed(buf, pos, x, prec - (exp + 1), plus, space, hash, UInt8('.'), !hash)
if hassep
neg = x < 0 || x === -Base.zero(x)
onesplace = findonesplace(buf, headpos, newpos - 1)
insertsep(buf, headpos, numsep, onesplace, numskip=(neg ? 1 : 0))
pos -= numsep
end
else
newpos = Ryu.writeexp(buf, pos, x, prec - 1, plus, space, hash, T == Val{'g'} ? UInt8('e') : UInt8('E'), UInt8('.'), !hash)
end
Expand Down Expand Up @@ -762,6 +849,48 @@ const UNROLL_UPTO = 16
return pos
end


@inline function findonesplace(buf, lbound, rbound)
decimalpoint = findlast(==(UInt8('.')), buf[lbound:rbound])
ki-chi marked this conversation as resolved.
Show resolved Hide resolved
# find last digit of rounded float
if isnothing(decimalpoint)
for i in lbound:(rbound - 1)
if 0x30 <= buf[i] <= 0x39 && buf[i + 1] == UInt(' ')
decimalpoint = i + 1
break
end
end
end
onesplace = isnothing(decimalpoint) ? rbound : decimalpoint - 1
return onesplace
end


function insertsep(buf, headpos, numsep, onesplace; numskip=0)
intlength = (onesplace - headpos + 1) - numsep - numskip

headdivlength = mod1(intlength, 3) + numskip
seconddiv = headpos + numsep + headdivlength
separation = headpos + headdivlength
buf[headpos:(separation - 1)] = buf[(headpos + numsep):(headpos + numsep + headdivlength - 1)]

for i in 1:numsep
div = seconddiv + 3 * (i - 1)
buf[separation] = UInt8(',')
buf[(separation + 1):(separation + 3)] = buf[div:(div + 2)]
ki-chi marked this conversation as resolved.
Show resolved Hide resolved
separation += 4
end
end


countthousandsep(::Spec, x) = 0
@inline function countthousandsep(f::Spec{T}, x) where {T <: Union{Ints, Floats}}
(isnan(x) || isinf(x) || base(T) != 10) && return 0
x2 = trunc(BigInt, round(x, digits=f.precision))
ki-chi marked this conversation as resolved.
Show resolved Hide resolved
numsep = div((ndigits(x2) - 1), 3)
return numsep
end

function plength(f::Spec{T}, x) where {T <: Chars}
c = Char(first(x))
w = textwidth(c)
Expand All @@ -778,13 +907,13 @@ end

function plength(f::Spec{T}, x) where {T <: Ints}
x2 = toint(x)
return max(f.width, f.precision + ndigits(x2, base=base(T), pad=1) + 5)
return max(f.width, f.precision + ndigits(x2, base=base(T), pad=1) + countthousandsep(f, x) + 5)
end

plength(f::Spec{T}, x::AbstractFloat) where {T <: Ints} =
max(f.width, 0 + 309 + 17 + f.hash + 5)
max(f.width, 0 + 309 + 17 + f.hash + countthousandsep(f, x) + 5)
plength(f::Spec{T}, x) where {T <: Floats} =
max(f.width, f.precision + 309 + 17 + f.hash + 5)
max(f.width, f.precision + 309 + 17 + f.hash + countthousandsep(f, x) + 5)
plength(::Spec{PositionCounter}, x) = 0

@inline function computelen(substringranges, formats, args)
Expand Down Expand Up @@ -860,6 +989,9 @@ Padded with zeros to length 6 000123

julia> @printf "Use shorter of decimal or scientific %g %g" 1.23 12300000.0
Use shorter of decimal or scientific 1.23 1.23e+07

julia> @printf "Use thousand separators %'d" 1234567
Use thousand separators 1,234,567
```

For a systematic specification of the format, see [here](https://www.cplusplus.com/reference/cstdio/printf/).
Expand Down