Skip to content

Commit

Permalink
tryparse: parse string to Nullable
Browse files Browse the repository at this point in the history
Introduces the tryparse method:
- tryparse{T<:Integer}(::Type{T<:Integer},s::AbstractString)
- tryparse(::Type{Float..},s::AbstractString)
- a few variants of the above

And:
- tryparse(Float.., ...) call the corresponding C functions jl_try_strtof, jl_try_substrtof, jl_try_strtod and jl_try_substrtod.
- The parseint, parsefloat, float64_isvalid and float32_isvalid methods wrap the corresponding tryparse methods.
- The jl_strtod, jl_strtof, ... functions are wrappers over the jl_try_str... functions.

This should fix #10498 as well.

Ref: discussions at #9316, #3631, #5704
  • Loading branch information
tanmaykm committed Mar 17, 2015
1 parent 2a4fd44 commit 2c327d3
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 130 deletions.
1 change: 0 additions & 1 deletion base/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -277,4 +277,3 @@ immutable Nullable{T}
Nullable() = new(true)
Nullable(value::T) = new(false, value)
end

21 changes: 0 additions & 21 deletions base/combinatorics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,6 @@ const _fact_table64 =
87178291200,1307674368000,20922789888000,355687428096000,6402373705728000,
121645100408832000,2432902008176640000]

const _fact_table128 =
UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]

function factorial_lookup(n::Integer, table, lim)
n < 0 && throw(DomainError())
n > lim && throw(OverflowError())
Expand All @@ -30,8 +11,6 @@ function factorial_lookup(n::Integer, table, lim)
return oftype(n, f)
end

factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)
factorial(n::Union(Int64,UInt64)) = factorial_lookup(n, _fact_table64, 20)

if Int === Int32
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ export
fldmod,
flipsign,
float,
tryparse,
floor,
fma,
frexp,
Expand Down
18 changes: 13 additions & 5 deletions base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export BigInt
import Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), ($),
binomial, cmp, convert, div, divrem, factorial, fld, gcd, gcdx, lcm, mod,
ndigits, promote_rule, rem, show, isqrt, string, isprime, powermod,
sum, trailing_zeros, trailing_ones, count_ones, base, parseint,
sum, trailing_zeros, trailing_ones, count_ones, base, parseint, tryparse_internal,
serialize, deserialize, bin, oct, dec, hex, isequal, invmod,
prevpow2, nextpow2, ndigits0z, widen, signed

Expand Down Expand Up @@ -76,15 +76,23 @@ signed(x::BigInt) = x
BigInt(x::BigInt) = x
BigInt(s::AbstractString) = parseint(BigInt,s)

function Base.parseint_nocheck(::Type{BigInt}, s::AbstractString, base::Int)
function tryparse_internal(::Type{BigInt}, s::AbstractString, base::Int, raise::Bool)
_n = Nullable{BigInt}()
s = bytestring(s)
sgn, base, i = Base.parseint_preamble(true,s,base)
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
return _n
end
z = BigInt()
err = ccall((:__gmpz_set_str, :libgmp),
Int32, (Ptr{BigInt}, Ptr{UInt8}, Int32),
&z, SubString(s,i), base)
err == 0 || throw(ArgumentError("invalid BigInt: $(repr(s))"))
return sgn < 0 ? -z : z
if err != 0
raise && throw(ArgumentError("invalid BigInt: $(repr(s))"))
return _n
end
Nullable(sgn < 0 ? -z : z)
end

function BigInt(x::Union(Clong,Int32))
Expand Down Expand Up @@ -217,7 +225,7 @@ function serialize(s, n::BigInt)
serialize(s, base(62,n))
end

deserialize(s, ::Type{BigInt}) = Base.parseint_nocheck(BigInt, deserialize(s), 62)
deserialize(s, ::Type{BigInt}) = get(tryparse_internal(BigInt, deserialize(s), 62, true))

# Binary ops
for (fJ, fC) in ((:+, :add), (:-,:sub), (:*, :mul),
Expand Down
4 changes: 2 additions & 2 deletions base/nullable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ convert( ::Type{Nullable }, ::Void) = Nullable{Union()}()

function show{T}(io::IO, x::Nullable{T})
if x.isnull
@printf(io, "Nullable{%s}()", repr(T))
println(io, "Nullable{$(repr(T))}()")
else
@printf(io, "Nullable(%s)", repr(x.value))
println(io, "Nullable($(repr(x.value)))")
end
end

Expand Down
129 changes: 81 additions & 48 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1487,27 +1487,33 @@ parseint{T<:Integer}(::Type{T}, c::Char, base::Integer) = convert(T,parseint(c,b
parseint{T<:Integer}(::Type{T}, c::Char) = convert(T,parseint(c))

function parseint_next(s::AbstractString, i::Int=start(s))
done(s,i) && throw(ArgumentError("premature end of integer: $(repr(s))"))
done(s,i) && (return Char(0), 0, 0)
j = i
c, i = next(s,i)
c, i, j
end

function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
c, i, j = parseint_next(s)

while isspace(c)
c, i, j = parseint_next(s,i)
end
(j == 0) && (return 0, 0, 0)

sgn = 1
if signed
if c == '-' || c == '+'
(c == '-') && (sgn = -1)
c, i, j = parseint_next(s,i)
end
end

while isspace(c)
c, i, j = parseint_next(s,i)
end
(j == 0) && (return 0, 0, 0)

if base == 0
if c == '0' && !done(s,i)
c, i = next(s,i)
Expand All @@ -1522,94 +1528,121 @@ function parseint_preamble(signed::Bool, s::AbstractString, base::Int)
return sgn, base, j
end

function parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int)
safe_add{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? (n1 > (typemax(T) - n2)) : (n1 < (typemin(T) - n2))) ? Nullable{T}() : Nullable{T}(n1 + n2)
safe_mul{T<:Integer}(n1::T, n2::T) = ((n2 > 0) ? ((n1 > div(typemax(T),n2)) || (n1 < div(typemin(T),n2))) :
(n2 < -1) ? ((n1 > div(typemin(T),n2)) || (n1 < div(typemax(T),n2))) :
((n2 == -1) && n1 == typemin(T))) ? Nullable{T}() : Nullable{T}(n1 * n2)

function tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, a::Int, raise::Bool)
_n = Nullable{T}()
sgn, base, i = parseint_preamble(T<:Signed,s,base)
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
return _n
end
c, i = parseint_next(s,i)
if i == 0
raise && throw(ArgumentError("premature end of integer: $(repr(s))"))
return _n
end

base = convert(T,base)
## FIXME: remove 128-bit specific code once 128-bit div doesn't rely on BigInt
m::T = T===UInt128 || T===Int128 ? typemax(T) : div(typemax(T)-base+1,base)

This comment has been minimized.

Copy link
@tkelman

tkelman Mar 21, 2015

Contributor

Interestingly, this change makes parsing Int128 literals rely on compiler intrinsics that MSVC doesn't have. Those missing intrinsics are the same reason the MSVC build immediately fails on tests (rand(1:2) doesn't work, since it promotes to Int128 somewhere along the way), hopefully we'll be able to get those intrinsics from LLVM after we upgrade.

This comment has been minimized.

Copy link
@JeffBezanson

JeffBezanson Mar 21, 2015

Member

rand(1:2) really should not use Int128. Could you dig into why that's happening?

This comment has been minimized.

Copy link
@tkelman

tkelman Mar 21, 2015

Contributor

I don't know jack about RNG's (aside from, someone should port PCG-random to Julia). I think we need to ask @mschauer or @rfourquet about cf26c7e - there was an attempted change at 57bd2b7 but that got reverted.

This comment has been minimized.

Copy link
@mschauer

mschauer Mar 21, 2015

Contributor

The number of different values representable by an UInt64 is a number not representable by an UInt64. So if you ask how many times the range 1:k fits into 0:typemax(UInt64) you need div(typemax(UInt64)+1,k). Maybe this can be avoided. The change 57bd2b7 does not adress this at all.

This comment has been minimized.

This comment has been minimized.

Copy link
@tkelman

tkelman Mar 22, 2015

Contributor

thanks, are the answers there usable to avoid UInt128?

This comment has been minimized.

Copy link
@mschauer

mschauer Mar 23, 2015

Contributor

yes, I can make a pull request. maxmultiple becomes
maxmultiple{T<:Unsigned}(k::T) = div(typemax(T) - k + one(k), k + (k == 0))*k + k - one(k)

This comment has been minimized.

Copy link
@mschauer

mschauer Mar 23, 2015

Contributor

@tkelman Here you go #10606

m::T = div(typemax(T)-base+1,base)
n::T = 0
while n <= m
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
if d >= base
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
return _n
end
n *= base
n += d
if done(s,i)
n *= sgn
return n
return Nullable{T}(n)
end
c, i = next(s,i)
isspace(c) && break
end
(T <: Signed) && (n *= sgn)
while !isspace(c)
d::T = '0' <= c <= '9' ? c-'0' :
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
d < base || throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
'A' <= c <= 'Z' ? c-'A'+10 :
'a' <= c <= 'z' ? c-'a'+a : base
if d >= base
raise && throw(ArgumentError("invalid base $base digit $(repr(c)) in $(repr(s))"))
return _n
end
(T <: Signed) && (d *= sgn)
n = checked_mul(n,base)
n = checked_add(n,d)
done(s,i) && return n

safe_n = safe_mul(n, base)
isnull(safe_n) || (safe_n = safe_add(get(safe_n), d))
if isnull(safe_n)
raise && throw(OverflowError())
return _n
end
n = get(safe_n)
done(s,i) && return Nullable{T}(n)
c, i = next(s,i)
end
while !done(s,i)
c, i = next(s,i)
isspace(c) || throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
if !isspace(c)
raise && throw(ArgumentError("extra characters after whitespace in $(repr(s))"))
return _n
end
end
return n
return Nullable{T}(n)
end
parseint_nocheck{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
parseint_nocheck(T, s, base, base <= 36 ? 10 : 36)
tryparse_internal{T<:Integer}(::Type{T}, s::AbstractString, base::Int, raise::Bool) =
tryparse_internal(T, s, base, base <= 36 ? 10 : 36, raise)
tryparse{T<:Integer}(::Type{T}, s::AbstractString, base::Int) =
2 <= base <= 62 ? tryparse_internal(T,s,Int(base),false) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
tryparse{T<:Integer}(::Type{T}, s::AbstractString) = tryparse_internal(T,s,0,false)

parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer) =
2 <= base <= 62 ? parseint_nocheck(T,s,Int(base)) : throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
parseint{T<:Integer}(::Type{T}, s::AbstractString) = parseint_nocheck(T,s,0)
function parseint{T<:Integer}(::Type{T}, s::AbstractString, base::Integer)
(2 <= base <= 62) || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
get(tryparse_internal(T, s, base, true))
end
parseint{T<:Integer}(::Type{T}, s::AbstractString) = get(tryparse_internal(T, s, 0, true))
parseint(s::AbstractString, base::Integer) = parseint(Int,s,base)
parseint(s::AbstractString) = parseint_nocheck(Int,s,0)
parseint(s::AbstractString) = parseint(Int,s)

## stringifying integers more efficiently ##

string(x::Union(Int8,Int16,Int32,Int64,Int128)) = dec(x)

## string to float functions ##

float64_isvalid(s::AbstractString, out::Array{Float64,1}) =
ccall(:jl_strtod, Int32, (Ptr{UInt8},Ptr{Float64}), s, out) == 0
float32_isvalid(s::AbstractString, out::Array{Float32,1}) =
ccall(:jl_strtof, Int32, (Ptr{UInt8},Ptr{Float32}), s, out) == 0

float64_isvalid(s::SubString, out::Array{Float64,1}) =
ccall(:jl_substrtod, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float64}), s.string, s.offset, s.endof, out) == 0
float32_isvalid(s::SubString, out::Array{Float32,1}) =
ccall(:jl_substrtof, Int32, (Ptr{UInt8},Csize_t,Cint,Ptr{Float32}), s.string, s.offset, s.endof, out) == 0

begin
local tmp::Array{Float64,1} = Array(Float64,1)
local tmpf::Array{Float32,1} = Array(Float32,1)
global parsefloat
function parsefloat(::Type{Float64}, s::AbstractString)
if !float64_isvalid(s, tmp)
throw(ArgumentError("parsefloat(Float64,::AbstractString): invalid number format $(repr(s))"))
end
return tmp[1]
end
tryparse(::Type{Float64}, s::AbstractString) = ccall(:jl_try_strtod, Nullable{Float64}, (Ptr{UInt8},), s)
tryparse(::Type{Float64}, s::SubString) = ccall(:jl_try_substrtod, Nullable{Float64}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)

function parsefloat(::Type{Float32}, s::AbstractString)
if !float32_isvalid(s, tmpf)
throw(ArgumentError("parsefloat(Float32,::AbstractString): invalid number format $(repr(s))"))
end
return tmpf[1]
end
tryparse(::Type{Float32}, s::AbstractString) = ccall(:jl_try_strtof, Nullable{Float32}, (Ptr{UInt8},), s)
tryparse(::Type{Float32}, s::SubString) = ccall(:jl_try_substrtof, Nullable{Float32}, (Ptr{UInt8},Csize_t,Cint), s.string, s.offset, s.endof)

function parse{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString)
nf = tryparse(T, s)
isnull(nf) ? throw(ArgumentError("invalid number format $(repr(s)) for $T")) : get(nf)
end

float(x::AbstractString) = parsefloat(x)
parsefloat(x::AbstractString) = parsefloat(Float64,x)
parsefloat{T<:Union(Float32,Float64)}(::Type{T}, s::AbstractString) = parse(T,s)

float(x::AbstractString) = parse(Float64,x)
parsefloat(x::AbstractString) = parse(Float64,x)

float{S<:AbstractString}(a::AbstractArray{S}) = map!(float, similar(a,typeof(float(0))), a)

function float_isvalid{T<:Union(Float32,Float64)}(s::AbstractString, out::Array{T,1})
tf = tryparse(T, s)
isnull(tf) || (out[1] = get(tf))
!isnull(tf)
end

float32_isvalid(s::AbstractString, out::Array{Float32,1}) = float_isvalid(s, out)
float64_isvalid(s::AbstractString, out::Array{Float64,1}) = float_isvalid(s, out)

# find the index of the first occurrence of a value in a byte array

typealias ByteArray Union(Array{UInt8,1},Array{Int8,1})
Expand Down
26 changes: 23 additions & 3 deletions base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ include("env.jl")
include("path.jl")
include("intfuncs.jl")

# nullable types
include("nullable.jl")

# I/O
include("task.jl")
Expand Down Expand Up @@ -176,6 +178,27 @@ big(n::Integer) = convert(BigInt,n)
big(x::FloatingPoint) = convert(BigFloat,x)
big(q::Rational) = big(num(q))//big(den(q))

const _fact_table128 =
UInt128[0x00000000000000000000000000000001, 0x00000000000000000000000000000002,
0x00000000000000000000000000000006, 0x00000000000000000000000000000018,
0x00000000000000000000000000000078, 0x000000000000000000000000000002d0,
0x000000000000000000000000000013b0, 0x00000000000000000000000000009d80,
0x00000000000000000000000000058980, 0x00000000000000000000000000375f00,
0x00000000000000000000000002611500, 0x0000000000000000000000001c8cfc00,
0x0000000000000000000000017328cc00, 0x0000000000000000000000144c3b2800,
0x00000000000000000000013077775800, 0x00000000000000000000130777758000,
0x00000000000000000001437eeecd8000, 0x00000000000000000016beecca730000,
0x000000000000000001b02b9306890000, 0x000000000000000021c3677c82b40000,
0x0000000000000002c5077d36b8c40000, 0x000000000000003ceea4c2b3e0d80000,
0x000000000000057970cd7e2933680000, 0x00000000000083629343d3dcd1c00000,
0x00000000000cd4a0619fb0907bc00000, 0x00000000014d9849ea37eeac91800000,
0x00000000232f0fcbb3e62c3358800000, 0x00000003d925ba47ad2cd59dae000000,
0x0000006f99461a1e9e1432dcb6000000, 0x00000d13f6370f96865df5dd54000000,
0x0001956ad0aae33a4560c5cd2c000000, 0x0032ad5a155c6748ac18b9a580000000,
0x0688589cc0e9505e2f2fee5580000000, 0xde1bc4d19efcac82445da75b00000000]
factorial(n::Int128) = factorial_lookup(n, _fact_table128, 33)
factorial(n::UInt128) = factorial_lookup(n, _fact_table128, 34)

# more hashing definitions
include("hashing2.jl")

Expand All @@ -188,9 +211,6 @@ importall .Random
include("printf.jl")
importall .Printf

# nullable types
include("nullable.jl")

# concurrency and parallelism
include("serialize.jl")
include("multi.jl")
Expand Down
Loading

0 comments on commit 2c327d3

Please sign in to comment.