diff --git a/Project.toml b/Project.toml index d4828be..bce5f97 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "JSON" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "1.4.0" +version = "1.5.0" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/src/lazy.jl b/src/lazy.jl index 384b94d..ed87a04 100644 --- a/src/lazy.jl +++ b/src/lazy.jl @@ -59,6 +59,7 @@ Currently supported keyword arguments include: - `inf::String = "Infinity"`: the string that will be used to parse `Inf` if `allownan=true` - `nan::String = "NaN"`: the string that will be sued to parse `NaN` if `allownan=true` - `jsonlines::Bool = false`: whether the JSON input should be treated as an implicit array, with newlines separating individual JSON elements with no leading `'['` or trailing `']'` characters. Common in logging or streaming workflows. Defaults to `true` when used with `JSON.parsefile` and the filename extension is `.jsonl` or `ndjson`. Note this ensures that parsing will _always_ return an array at the root-level. + - `isroot::Bool = true`: whether this is the root LazyValue encompassing the entire json buffer. If `false` parses only the first JSON value and ignores trailing characters. Note that validation is only fully done on `null`, `true`, and `false`, while other values are only lazily inferred from the first non-whitespace character: @@ -88,7 +89,7 @@ lazyfile(file; jsonlines::Union{Bool, Nothing}=nothing, kw...) = open(io -> lazy @doc (@doc lazy) lazyfile -function lazy(buf::Union{AbstractVector{UInt8}, AbstractString}; kw...) +function lazy(buf::Union{AbstractVector{UInt8}, AbstractString}; isroot::Bool=true, kw...) if !applicable(pointer, buf, 1) || (buf isa AbstractVector{UInt8} && !isone(only(strides(buf)))) if buf isa AbstractString buf = String(buf) @@ -116,7 +117,7 @@ function lazy(buf::Union{AbstractVector{UInt8}, AbstractString}; kw...) # detect and ignore UTF-8 BOM pos = (len >= 3 && getbyte(buf, pos) == 0xef && getbyte(buf, pos + 1) == 0xbb && getbyte(buf, pos + 2) == 0xbf) ? pos + 3 : pos @nextbyte - return _lazy(buf, pos, len, b, LazyOptions(; kw...), true) + return _lazy(buf, pos, len, b, LazyOptions(; kw...), isroot) @label invalid invalid(error, buf, pos, Any) diff --git a/src/parse.jl b/src/parse.jl index 32c63a5..41e7c39 100644 --- a/src/parse.jl +++ b/src/parse.jl @@ -15,6 +15,7 @@ Currently supported keyword arguments include: * `inf`: string to use for `Inf` (default: `"Infinity"`) * `nan`: string to use for `NaN` (default: `"NaN"`) * `jsonlines`: treat the `json` input as an implicit JSON array, delimited by newlines, each element being parsed from each row/line in the input + * `isroot`: whether this is the root LazyValue encompassing the entire json buffer. If `false` parses only the first JSON value and ignores trailing characters. (default: `true`) * `dicttype`: a custom `AbstractDict` type to use instead of `$DEFAULT_OBJECT_TYPE` as the default type for JSON object materialization * `null`: a custom value to use for JSON null values (default: `nothing`) * `style`: a custom `StructUtils.StructStyle` subtype instance to be used in calls to `StructUtils.make` and `StructUtils.lift`. This allows overriding diff --git a/test/parse.jl b/test/parse.jl index a7102aa..b293612 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -771,3 +771,18 @@ end @test_throws ArgumentError JSON.parse("{}", Tuple{Int, Int, Int}) @test_throws ArgumentError JSON.parse("{\"a\":1,\"b\":2}", Tuple{Int, Int, Int}) end + +@testset "isroot=false allows trailing" begin + # default behavior: trailing content causes an error + @test_throws ArgumentError JSON.parse("{\"hello\": \"world\"} asdaa") + @test_throws ArgumentError JSON.parse("[1,2,3] extra") + @test_throws ArgumentError JSON.parse("123 {}") + + # isroot=false: trailing content is ignored + @test JSON.parse("{\"hello\": \"world\"} asdaa", isroot=false) == JSON.Object("hello" => "world") + @test JSON.parse("[1,2,3] extra", isroot=false) == Any[1, 2, 3] + @test JSON.parse("123 {}", isroot=false) == 123 + + # isroot=false with typed parse + @test JSON.parse("{\"a\": 1, \"b\": 2.0, \"c\": \"hi\"} trailing", D; isroot=false) == D(1, 2.0, "hi") +end