From 3c7c03c6e31fcb683aa15d73e999fe2760b06f46 Mon Sep 17 00:00:00 2001 From: tan Date: Tue, 10 Mar 2026 08:30:56 +0530 Subject: [PATCH 1/3] option to allow trailing characters while parsing This adds an option `allowtrailing` to tolerate additional trailing characters in the buffer while parsing json. It is off by default, which keeps the parser strict and tries to parse the entire buffer as json. But when it is switched on, it allows parsing a valid json from the beginning of the buffer and ignore any additional following characters if they are present. This is useful in parsing scenarios that contain multiple json objects without a delimiter. E.g. `{"name": "value"}{"name": "value"}`. Or a json followed by other characters. E.g. `{"name": "value"} : this is...`. This also matches the pre 1.x behavior of this package. --- src/lazy.jl | 5 ++++- src/parse.jl | 4 ++-- test/parse.jl | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/lazy.jl b/src/lazy.jl index 384b94d..3154562 100644 --- a/src/lazy.jl +++ b/src/lazy.jl @@ -59,6 +59,7 @@ Currently supported keyword arguments include: - `inf::String = "Infinity"`: the string that will be used to parse `Inf` if `allownan=true` - `nan::String = "NaN"`: the string that will be sued to parse `NaN` if `allownan=true` - `jsonlines::Bool = false`: whether the JSON input should be treated as an implicit array, with newlines separating individual JSON elements with no leading `'['` or trailing `']'` characters. Common in logging or streaming workflows. Defaults to `true` when used with `JSON.parsefile` and the filename extension is `.jsonl` or `ndjson`. Note this ensures that parsing will _always_ return an array at the root-level. + - `allowtrailing::Bool = false`: whether to tolerate trailing characters after parsing a valid JSON element Note that validation is only fully done on `null`, `true`, and `false`, while other values are only lazily inferred from the first non-whitespace character: @@ -80,6 +81,7 @@ function lazy end inf::String = "Infinity" nan::String = "NaN" jsonlines::Bool = false + allowtrailing::Bool = false end lazy(io::Union{IO, Base.AbstractCmd}; kw...) = lazy(Base.read(io); kw...) @@ -163,6 +165,7 @@ getpos(x) = getfield(x, :pos) gettype(x) = getfield(x, :type) getopts(x) = getfield(x, :opts) getisroot(x) = getfield(x, :isroot) +getallowtrailing(x) = getopts(x).allowtrailing const LazyValues{T} = Union{LazyValue{T}, LazyObject{T}, LazyArray{T}} @@ -376,7 +379,7 @@ function applyarray(keyvalfunc, x::LazyValues) # for jsonlines, we need to make sure that recursive # lazy values *don't* consider individual lines *also* # to be jsonlines - opts = LazyOptions(; allownan=opts.allownan, ninf=opts.ninf, inf=opts.inf, nan=opts.nan, jsonlines=false) + opts = LazyOptions(; allownan=opts.allownan, ninf=opts.ninf, inf=opts.inf, nan=opts.nan, jsonlines=false, allowtrailing=opts.allowtrailing) end i = 1 while true diff --git a/src/parse.jl b/src/parse.jl index 32c63a5..c77a734 100644 --- a/src/parse.jl +++ b/src/parse.jl @@ -191,7 +191,7 @@ parse(x::LazyValue, ::Type{T}=Any; dicttype::Type{O}=DEFAULT_OBJECT_TYPE, null=n function _parse(x::LazyValue, ::Type{T}, dicttype::Type{O}, null, style::StructStyle) where {T,O} y, pos = StructUtils.make(style, T, x) - getisroot(x) && checkendpos(x, T, pos) + getisroot(x) && !getallowtrailing(x) && checkendpos(x, T, pos) return y end @@ -205,7 +205,7 @@ end function _parse(x::LazyValue, ::Type{Any}, ::Type{DEFAULT_OBJECT_TYPE}, null, ::StructStyle) out = ValueClosure() pos = applyvalue(out, x, null) - getisroot(x) && checkendpos(x, Any, pos) + getisroot(x) && !getallowtrailing(x) && checkendpos(x, Any, pos) return out.value end diff --git a/test/parse.jl b/test/parse.jl index a7102aa..c026c95 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -771,3 +771,18 @@ end @test_throws ArgumentError JSON.parse("{}", Tuple{Int, Int, Int}) @test_throws ArgumentError JSON.parse("{\"a\":1,\"b\":2}", Tuple{Int, Int, Int}) end + +@testset "allowtrailing" begin + # default behavior: trailing content causes an error + @test_throws ArgumentError JSON.parse("{\"hello\": \"world\"} asdaa") + @test_throws ArgumentError JSON.parse("[1,2,3] extra") + @test_throws ArgumentError JSON.parse("123 {}") + + # allowtrailing=true: trailing content is ignored + @test JSON.parse("{\"hello\": \"world\"} asdaa", allowtrailing=true) == JSON.Object("hello" => "world") + @test JSON.parse("[1,2,3] extra", allowtrailing=true) == Any[1, 2, 3] + @test JSON.parse("123 {}", allowtrailing=true) == 123 + + # allowtrailing=true with typed parse + @test JSON.parse("{\"a\": 1, \"b\": 2.0, \"c\": \"hi\"} trailing", D; allowtrailing=true) == D(1, 2.0, "hi") +end From 6673cab7d9f657857d77747605f2d1af38a3e978 Mon Sep 17 00:00:00 2001 From: tan Date: Tue, 17 Mar 2026 09:23:10 +0530 Subject: [PATCH 2/3] use isroot instead --- src/lazy.jl | 10 ++++------ src/parse.jl | 4 ++-- test/parse.jl | 14 +++++++------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/lazy.jl b/src/lazy.jl index 3154562..ed87a04 100644 --- a/src/lazy.jl +++ b/src/lazy.jl @@ -59,7 +59,7 @@ Currently supported keyword arguments include: - `inf::String = "Infinity"`: the string that will be used to parse `Inf` if `allownan=true` - `nan::String = "NaN"`: the string that will be sued to parse `NaN` if `allownan=true` - `jsonlines::Bool = false`: whether the JSON input should be treated as an implicit array, with newlines separating individual JSON elements with no leading `'['` or trailing `']'` characters. Common in logging or streaming workflows. Defaults to `true` when used with `JSON.parsefile` and the filename extension is `.jsonl` or `ndjson`. Note this ensures that parsing will _always_ return an array at the root-level. - - `allowtrailing::Bool = false`: whether to tolerate trailing characters after parsing a valid JSON element + - `isroot::Bool = true`: whether this is the root LazyValue encompassing the entire json buffer. If `false` parses only the first JSON value and ignores trailing characters. Note that validation is only fully done on `null`, `true`, and `false`, while other values are only lazily inferred from the first non-whitespace character: @@ -81,7 +81,6 @@ function lazy end inf::String = "Infinity" nan::String = "NaN" jsonlines::Bool = false - allowtrailing::Bool = false end lazy(io::Union{IO, Base.AbstractCmd}; kw...) = lazy(Base.read(io); kw...) @@ -90,7 +89,7 @@ lazyfile(file; jsonlines::Union{Bool, Nothing}=nothing, kw...) = open(io -> lazy @doc (@doc lazy) lazyfile -function lazy(buf::Union{AbstractVector{UInt8}, AbstractString}; kw...) +function lazy(buf::Union{AbstractVector{UInt8}, AbstractString}; isroot::Bool=true, kw...) if !applicable(pointer, buf, 1) || (buf isa AbstractVector{UInt8} && !isone(only(strides(buf)))) if buf isa AbstractString buf = String(buf) @@ -118,7 +117,7 @@ function lazy(buf::Union{AbstractVector{UInt8}, AbstractString}; kw...) # detect and ignore UTF-8 BOM pos = (len >= 3 && getbyte(buf, pos) == 0xef && getbyte(buf, pos + 1) == 0xbb && getbyte(buf, pos + 2) == 0xbf) ? pos + 3 : pos @nextbyte - return _lazy(buf, pos, len, b, LazyOptions(; kw...), true) + return _lazy(buf, pos, len, b, LazyOptions(; kw...), isroot) @label invalid invalid(error, buf, pos, Any) @@ -165,7 +164,6 @@ getpos(x) = getfield(x, :pos) gettype(x) = getfield(x, :type) getopts(x) = getfield(x, :opts) getisroot(x) = getfield(x, :isroot) -getallowtrailing(x) = getopts(x).allowtrailing const LazyValues{T} = Union{LazyValue{T}, LazyObject{T}, LazyArray{T}} @@ -379,7 +377,7 @@ function applyarray(keyvalfunc, x::LazyValues) # for jsonlines, we need to make sure that recursive # lazy values *don't* consider individual lines *also* # to be jsonlines - opts = LazyOptions(; allownan=opts.allownan, ninf=opts.ninf, inf=opts.inf, nan=opts.nan, jsonlines=false, allowtrailing=opts.allowtrailing) + opts = LazyOptions(; allownan=opts.allownan, ninf=opts.ninf, inf=opts.inf, nan=opts.nan, jsonlines=false) end i = 1 while true diff --git a/src/parse.jl b/src/parse.jl index c77a734..32c63a5 100644 --- a/src/parse.jl +++ b/src/parse.jl @@ -191,7 +191,7 @@ parse(x::LazyValue, ::Type{T}=Any; dicttype::Type{O}=DEFAULT_OBJECT_TYPE, null=n function _parse(x::LazyValue, ::Type{T}, dicttype::Type{O}, null, style::StructStyle) where {T,O} y, pos = StructUtils.make(style, T, x) - getisroot(x) && !getallowtrailing(x) && checkendpos(x, T, pos) + getisroot(x) && checkendpos(x, T, pos) return y end @@ -205,7 +205,7 @@ end function _parse(x::LazyValue, ::Type{Any}, ::Type{DEFAULT_OBJECT_TYPE}, null, ::StructStyle) out = ValueClosure() pos = applyvalue(out, x, null) - getisroot(x) && !getallowtrailing(x) && checkendpos(x, Any, pos) + getisroot(x) && checkendpos(x, Any, pos) return out.value end diff --git a/test/parse.jl b/test/parse.jl index c026c95..b293612 100644 --- a/test/parse.jl +++ b/test/parse.jl @@ -772,17 +772,17 @@ end @test_throws ArgumentError JSON.parse("{\"a\":1,\"b\":2}", Tuple{Int, Int, Int}) end -@testset "allowtrailing" begin +@testset "isroot=false allows trailing" begin # default behavior: trailing content causes an error @test_throws ArgumentError JSON.parse("{\"hello\": \"world\"} asdaa") @test_throws ArgumentError JSON.parse("[1,2,3] extra") @test_throws ArgumentError JSON.parse("123 {}") - # allowtrailing=true: trailing content is ignored - @test JSON.parse("{\"hello\": \"world\"} asdaa", allowtrailing=true) == JSON.Object("hello" => "world") - @test JSON.parse("[1,2,3] extra", allowtrailing=true) == Any[1, 2, 3] - @test JSON.parse("123 {}", allowtrailing=true) == 123 + # isroot=false: trailing content is ignored + @test JSON.parse("{\"hello\": \"world\"} asdaa", isroot=false) == JSON.Object("hello" => "world") + @test JSON.parse("[1,2,3] extra", isroot=false) == Any[1, 2, 3] + @test JSON.parse("123 {}", isroot=false) == 123 - # allowtrailing=true with typed parse - @test JSON.parse("{\"a\": 1, \"b\": 2.0, \"c\": \"hi\"} trailing", D; allowtrailing=true) == D(1, 2.0, "hi") + # isroot=false with typed parse + @test JSON.parse("{\"a\": 1, \"b\": 2.0, \"c\": \"hi\"} trailing", D; isroot=false) == D(1, 2.0, "hi") end From 5b0ddcd1e4fca408518980488819b6db5004eada Mon Sep 17 00:00:00 2001 From: tan Date: Tue, 17 Mar 2026 13:21:45 +0530 Subject: [PATCH 3/3] add docs, bump minor version --- Project.toml | 2 +- src/parse.jl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index d4828be..bce5f97 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "JSON" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "1.4.0" +version = "1.5.0" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/src/parse.jl b/src/parse.jl index 32c63a5..41e7c39 100644 --- a/src/parse.jl +++ b/src/parse.jl @@ -15,6 +15,7 @@ Currently supported keyword arguments include: * `inf`: string to use for `Inf` (default: `"Infinity"`) * `nan`: string to use for `NaN` (default: `"NaN"`) * `jsonlines`: treat the `json` input as an implicit JSON array, delimited by newlines, each element being parsed from each row/line in the input + * `isroot`: whether this is the root LazyValue encompassing the entire json buffer. If `false` parses only the first JSON value and ignores trailing characters. (default: `true`) * `dicttype`: a custom `AbstractDict` type to use instead of `$DEFAULT_OBJECT_TYPE` as the default type for JSON object materialization * `null`: a custom value to use for JSON null values (default: `nothing`) * `style`: a custom `StructUtils.StructStyle` subtype instance to be used in calls to `StructUtils.make` and `StructUtils.lift`. This allows overriding