Skip to content

Commit

Permalink
fix parser (#522)
Browse files Browse the repository at this point in the history
* fix parser

* hups

* use BigInt when parsing rational expressions to avoid failure on 32 bits systems

* take care of a very nasty corner case

* Update src/parsing.jl

Co-authored-by: Benoît Richard <kolaru@hotmail.com>

* better docs and exceptions handling

* add tests for Float32 and bigfloat

Co-authored-by: Luca Ferranti <lucaferranti@users.noreply.github.com>
Co-authored-by: Benoît Richard <kolaru@hotmail.com>
  • Loading branch information
3 people committed May 25, 2022
1 parent f385b7f commit 1b84a19
Show file tree
Hide file tree
Showing 11 changed files with 357 additions and 318 deletions.
1 change: 0 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ version = "0.20.5"

[deps]
CRlibm = "96374032-68de-5a5b-8d9e-752f78720389"
CombinedParsers = "5ae71ed2-6f8a-4ed1-b94f-e14e8158f19e"
EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56"
FastRounding = "fa42c844-2597-5d31-933b-ebd51ab2693f"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand Down
1 change: 0 additions & 1 deletion src/IntervalArithmetic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import CRlibm
import FastRounding
import RoundingEmulator

using CombinedParsers
using LinearAlgebra
using Markdown
using StaticArrays
Expand Down
2 changes: 1 addition & 1 deletion src/decorations/intervals.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ macro decorated(ex...)
lo = :($x.lo)
hi = :($x.hi)
else
lo, hi = ex
lo, hi = ex
end

return :(DecoratedInterval($lo, $hi))
Expand Down
333 changes: 209 additions & 124 deletions src/parsing.jl
Original file line number Diff line number Diff line change
@@ -1,156 +1,241 @@
# Functions to parse strings to intervals

"""
parse(DecoratedInterval, s::AbstractString)
Parse a string of the form `"[a, b]_dec"` as a `DecoratedInterval`
with decoration `dec`.
Parse a string of the form `"[a, b]_dec"` as a `DecoratedInterval` with decoration `dec`.
If the decoration is not specified, it is computed based on the parsed interval.
If the input is an invalid string, a warning is printed and [NaI] is returned. The parser is
case unsensitive.
### Examples
```jldoctest
julia> @format true
Display parameters:
- format: standard
- decorations: true
- significant figures: 6
julia> parse(DecoratedInterval{Float64}, "[1, 2]")
[1, 2]_com
julia> parse(DecoratedInterval{Float64}, "[1, 2]_def")
[1, 2]_def
julia> parse(DecoratedInterval{Float64}, "foobar")
┌ Warning: invalid input, returning [NaI]
└ @ IntervalArithmetic ~/.julia/dev/IntervalArithmetic/src/parsing.jl:44
[NaN, NaN]_ill
```
"""
function parse(::Type{DecoratedInterval{T}}, s::AbstractString) where T
if '_' s
x = parse(Interval{T}, s)
return DecoratedInterval(x.lo, x.hi)
end
s = lowercase(strip(s))
s == "[nai]" && return nai(T)
try
if '_' s
ival, _ = _parse(Interval{T}, s)
return DecoratedInterval{T}(ival)
end

decorations = Dict(
"ill" => ill,
"trv" => trv,
"def" => def,
"dac" => dac,
"com" => com)

decorations = Dict(
"ill" => ill,
"trv" => trv,
"def" => def,
"dac" => dac,
"com" => com)
interval_string, dec = split(s, "_")

interval_string, dec = split(s, "_")
interval = parse(Interval{T}, interval_string)
return DecoratedInterval(interval, decorations[lowercase(dec)])
ival, isnotcom = _parse(Interval{T}, interval_string)
dec_calc = decoration(ival)

haskey(decorations, dec) || throw(ArgumentError("invalid decoration $dec"))
dec_given = decorations[dec]

#=
If I try to give a decoration that is too high, e.g. [1, Inf]_com, then it
should error and return [NaI]. Exception to this is if the interval would be com
but becomes dac because of finite precision, e.g. "[1e403]_com" when parse to
Interval{Float64} is allowed to become [prevfloat(Inf), Inf]_dac without erroring.
The isnotcom flag returned by _parse is used to track if the interval was originally
smaller than com or became dac because of overflow.
=#
dec_given > dec_calc && isnotcom && throw(ArgumentError("invalid decoration $dec for $ival"))

return DecoratedInterval{T}(ival, min(dec_given, dec_calc))
catch e
if e isa ArgumentError
@warn "invalid input, returning [NaI]"
return nai(T)
else
rethrow(e)
end
end
end

"""
parse(Interval, s::AbstractString)
Parse a string as an `Interval`, according to the grammar specified
in Section 9.7 of the IEEE Std 1788-2015, with some extensions.
in Section 9.7 of the IEEE Std 1788-2015.
The created interval is tight around the value described by the string,
including for number that have no exact float representation like "0.1".
Roughly speaking, the valid forms are
- `[ 1.33 ]` or simply `1.33` : The interval containing only `1.33``.
- `[ 1.44, 2.78 ]` : The interval `[1.44, 2.78]`.
- `7.88 ± 0.03`: The interval `[7.85, 7.91]`.j
- `6.42?2` : The interval `6.42 ± 0.02`. The number after `?` represent
the uncertainty in the last digit.
Physicists would write it as `6.42(2)`.
Strangely enough, according to the standard, the default
value is `0.5` (e.g. `2.3? == 2.3 ± 0.05`).
The direction of the uncertainty can be given by adding 'u' or 'd' at
the end for the error going only up or down respectively (e.g.
`4.5?5u == [4.5, 5]`).
including for number that have no exact float representation like "0.1". If the input is
an invalid string, a warning is printed and an empty interval is returned. The parser is
case unsensitive.
### Allowed format
Here are some examples of allowed formats, for more details see sections 9.7 and 12.11 of
the standard
- `[ 1.33 ]` or simply `1.33` : The interval containing only `1.33``.
- `[ 1.44, 2.78 ]` : The interval `[1.44, 2.78]`.
- `[empty]` : the empty interval
- `[entire]` or `[,]`: the interval `[-∞, ∞]`
- `[3,]`: The interval `[3, ∞]`
- `6.42?2` : The interval `6.42 ± 0.02`. The number after `?` represent the uncertainty in
the last digit. The default value is `0.5` (e.g. `2.3? == 2.3 ± 0.05`). The direction of
the uncertainty can be given by adding 'u' or 'd' at the end for the error going only up
or down respectively (e.g. `4.5?5u == [4.5, 5]`).
- `6.42?2e2` : The interval `(6.42 ± 0.02)⋅10³ == 642 ± 2`
- `3??u` : the interval `[3, ∞]`
- `3??u` : the interval `[3, ∞]`
- `3??` : the interval `[-∞, ∞]`
### Examples
```julia
julia> parse(Interval{Float64}, "[1, 2]")
[1, 2]
julia> parse(Interval{Float64}, "[1,]")
[1, ∞]
julia> parse(Interval{Float64}, "[,]")
[-∞, ∞]
julia> parse(Interval{Float64}, "6.42?2e2")
[640, 644]
julia> parse(Interval{Float64}, "foobar")
┌ Warning: invalid input, empty interval returned
└ @ IntervalArithmetic ~/.julia/dev/IntervalArithmetic/src/parsing.jl:68
```
"""
function parse(::Type{Interval{T}}, s::AbstractString) where T
p = interval_parser(Interval{T})
s = lowercase(strip(s))
try
return parse(p, s)
ival, _ = _parse(Interval{T}, s)
return ival
catch e
# We avoid CombinedParsers stacktraces because they are hard to read.
if !(e isa ArgumentError)
rethrow()
if e isa ArgumentError
@warn "invalid input, empty interval returned"
return emptyinterval(T)
else
rethrow(e)
end
end

throw(ArgumentError("string \"$s\" can not be parsed to an interval."))
end

"""
Same as `parse(T, s, rounding_mode)`, but also accept string representing rational numbers.
"""
function extended_parse(T, s, rounding_mode)
if '/' in s
num, denum = parse.(Int, split(s, '/'))
return T(num//denum, rounding_mode)
end
_parse(::Type{Interval{T}}, s::AbstractString) where T
return parse(T, s, rounding_mode)
end
tries to parse the string `s` to an interval of type `Interval{T}` and throws an argument
error if an invalid string is given.
"""
interval_parser(::Type{Interval})
### Output
Return a parser that processes a string according to Section 9.7 of
the IEEE Std 1788-2015 and return an interval of the given type.
It is an extension of the specification in the standard, more lenient
on what is allowed.
- the parsed interval
- a flag `isnotcom`, which is set to true if the input interval is not `com` and to false
otherwise. This is used to distinguish the case when an interval is supposed to be
unbounded (e.g. input `"[3, infinity]"`) or becomes unbounded because of overflow
(e.g. the input `"[3, 1e400]", which is parse to `[3, ∞]` when using `Float64`).
"""
function interval_parser(::Type{Interval{T}}) where T
# Exclude a minimal number of Char and let Julia parse the number
# when needed
any_number = Lazy(Repeat1(CharNotIn("[]?ud")))
integer = Lazy(Repeat1(CharIn("0123456789")))

point_interval = Either(
Sequence(seq -> seq[2], "[", trim(any_number), "]"),
trim(any_number)
) do x
lo = extended_parse(T, join(x), RoundDown)
hi = extended_parse(T, join(x), RoundUp)
return checked_interval(lo, hi)
end
infsup_interval = Sequence(
"[",
trim(any_number),
",",
trim(any_number),
"]") do seq
lo = extended_parse(T, join(seq[2]), RoundDown)
hi = extended_parse(T, join(seq[4]), RoundUp)
return checked_interval(lo, hi)
end
uncert_interval = Sequence(
any_number,
"?",
Optional(integer, default=missing), # The radius
Optional(CharIn("ud"), default='?'),
Optional(Sequence("e", any_number), default=("e", "0"))
) do seq
x = join(seq[1])
radius = ismissing(seq[3]) ? 1//2 : Rational(parse(Int, join(seq[3])))
dir = seq[4]
exponent = parse(Int, join(seq[5][2]))

# TODO The ulp calculation is wrong when x has an exponent
# e.g. 1.33e6?1
# This is in principle not allowed by the standard but I see no reason
# to disallow it
# Remove the decimal point, scaling accordingly
if '.' in x
core, decimals = split(x, '.')
exponent -= length(decimals)
x = join([core, decimals])
function _parse(::Type{Interval{T}}, s::AbstractString) where T
isnotcom = occursin("inf", s)
if startswith(s, '[') && endswith(s, ']') # parse as interval
s = strip(s[2:end-1])
if ',' in s # infsupinterval
lo, hi = strip.(split(s, ','))
isempty(lo) && (lo = "-inf"; isnotcom = true)
isempty(hi) && (hi = "inf"; isnotcom = true)
lo = parse_num(T, lo, RoundDown)
hi = parse_num(T, hi, RoundUp)
else # point interval

(s == "empty" || isempty(s)) && return emptyinterval(T), true # emptyinterval
s == "entire" && return entireinterval(T), true # entireinterval
lo = parse_num(T, s, RoundDown)
hi = parse_num(T, s, RoundUp)
end

rhi = dir == 'd' ? Rational(0) : radius
rlo = dir == 'u' ? Rational(0) : radius
scale = exponent >= 0 ? 10^exponent : 1//10^-exponent
x = parse(Int, x)
lo = T((x - rlo)*scale, RoundDown)
hi = T((x + rhi)*scale, RoundUp)
return Interval(lo, hi)
elseif '?' in s # uncertainty interval
if occursin("??", s) # unbounded interval
isnotcom = true
m, _ = split(s, "??")
if 'u' in s # interval in the form [m, Inf]
lo = parse(T, m, RoundDown)
hi = T(Inf)
elseif 'd' in s # interval in the form [-Inf, m]
lo = T(-Inf)
hi = parse(T, m, RoundUp)
else
return entireinterval(T), true
end
else
m , vde = split(s, '?')

# ulp computation
if '.' in m # ulp is last decimal position
ulp = 1/(big(10.0)^length(split(m, '.')[2]))
else # no decimal, hence ulp is unit
ulp = big(1.0)
end
m = parse(BigFloat, m)

if 'e' in vde
vd, e = split(vde, 'e')
e = big(10.0) ^ parse(Int, e)
else
vd = vde
e = big(1.0)
end

if 'u' in vd || 'd' in vd
d = last(vd)
v = vd[1:end-1]
else
d = 'b' # both directions
v = vd
end

v = isempty(v) ? 1//2 : parse(BigInt, v)
if d == 'd'
lo = T((m - v * ulp) * e, RoundDown)
hi = T(m * e, RoundUp)
elseif d == 'u'
lo = T(m * e, RoundDown)
hi = T((m + v * ulp) * e, RoundUp)
else
lo = T((m - v * ulp) * e, RoundDown)
hi = T((m + v * ulp) * e, RoundUp)
end
end
else # number
lo = parse_num(T, s, RoundDown)
hi = parse_num(T, s, RoundUp)
end
is_valid_interval(lo, hi) && return Interval(lo, hi), isnotcom
throw(ArgumentError("input $s can not be parsed as an interval."))
end

# Not in the standard
pm_interval = Sequence(
any_number,
trim("±"),
any_number
) do seq
a = parse(T, join(seq[1]))
b = parse(T, join(seq[3]))
return a ± b
"""
Same as `parse(T, s, rounding_mode)`, but also accept string representing rational numbers.
"""
function parse_num(T, s, rounding_mode)
if '/' in s
num, denum = parse.(BigInt, split(s, '/'))
return T(num//denum, rounding_mode)
end

return Sequence(
first,
Either(pm_interval, uncert_interval, infsup_interval, point_interval),
AtEnd()
)
end
return T(parse(BigFloat, s), rounding_mode)
end
Loading

0 comments on commit 1b84a19

Please sign in to comment.