diff --git a/.gitignore b/.gitignore index 462bd1f..b380ecf 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ /.vscode/ Manifest.toml +Manifest-*.toml fixture.tar.gz fixture .CondaPkg diff --git a/ChunkCodecCore/CHANGELOG.md b/ChunkCodecCore/CHANGELOG.md index cb038c1..73f6143 100644 --- a/ChunkCodecCore/CHANGELOG.md +++ b/ChunkCodecCore/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased +### BREAKING `can_concatenate` is now a decoder method instead of a `Codec` method [#73](https://github.com/JuliaIO/ChunkCodecs.jl/pull/73) + ### BREAKING the return type of `try_encode`, `try_decode`, and `try_resize_decode!` changed to a new `MaybeSize` type [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) ## [v0.5.3](https://github.com/JuliaIO/ChunkCodecs.jl/tree/ChunkCodecCore-v0.5.3) - 2025-08-09 diff --git a/ChunkCodecCore/src/interface.jl b/ChunkCodecCore/src/interface.jl index 5964ba0..25a89dc 100644 --- a/ChunkCodecCore/src/interface.jl +++ b/ChunkCodecCore/src/interface.jl @@ -100,17 +100,6 @@ Return the default decode options for the codec. """ function decode_options end -""" - can_concatenate(::Codec)::Bool - -Return `true` if the codec has concatenation transparency. - -If `true`, and some encoded data `a` and `b` decode to `x` and `y` respectively, then -the concatenation of `a` and `b` will -decode to the concatenation of `x` and `y` -""" -can_concatenate(::Codec) = false - """ decoded_size_range(e)::StepRange{Int64, Int64} @@ -258,10 +247,22 @@ function try_resize_decode!(d, dst::AbstractVector{UInt8}, src::AbstractVector{U end end +""" + can_concatenate(d)::Bool + +Return `true` if the decoder has concatenation transparency. + +If `true`, and some encoded data `a` and `b` decode to `x` and `y` respectively, then +the concatenation of `a` and `b` will +decode to the concatenation of `x` and `y` +""" +can_concatenate(::Any) = false + # allow passing codec to decode try_find_decoded_size(c::Codec, src::AbstractVector{UInt8}) = try_find_decoded_size(decode_options(c), src) try_decode!(c::Codec, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...) = try_decode!(decode_options(c), dst, src; kwargs...) try_resize_decode!(c::Codec, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}, max_size::Int64; kwargs...) = try_resize_decode!(decode_options(c), dst, src, max_size; kwargs...) +can_concatenate(c::Codec) = can_concatenate(decode_options(c)) """ check_contiguous(x::AbstractVector{UInt8}) diff --git a/ChunkCodecCore/src/noop.jl b/ChunkCodecCore/src/noop.jl index 12bdba9..c11ad5b 100644 --- a/ChunkCodecCore/src/noop.jl +++ b/ChunkCodecCore/src/noop.jl @@ -9,7 +9,6 @@ Copies the input. See also [`NoopEncodeOptions`](@ref) and [`NoopDecodeOptions`](@ref) """ struct NoopCodec <: Codec end -can_concatenate(::NoopCodec) = true decode_options(::NoopCodec) = NoopDecodeOptions() # default decode options """ @@ -72,6 +71,8 @@ end is_thread_safe(::NoopDecodeOptions) = true +can_concatenate(::NoopDecodeOptions) = true + function try_find_decoded_size(::NoopDecodeOptions, src::AbstractVector{UInt8})::Int64 length(src) end diff --git a/ChunkCodecCore/src/types.jl b/ChunkCodecCore/src/types.jl index 61f72b8..0453cf7 100644 --- a/ChunkCodecCore/src/types.jl +++ b/ChunkCodecCore/src/types.jl @@ -7,9 +7,6 @@ Properties are public for reading. Required methods for a type `T <: Codec` to implement: - `decode_options(::T)::DecodeOptions` - -Optional methods to implement: -- `can_concatenate(::T)::Bool`: defaults to `false`. """ abstract type Codec end @@ -51,6 +48,7 @@ Required methods for a type `T <: DecodeOptions` to implement: Optional methods to implement: - `is_thread_safe(::T)::Bool`: defaults to `false`. +- `can_concatenate(::T)::Bool`: defaults to `false`. - `try_resize_decode!(::T, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}, max_size::Int64; kwargs...)::MaybeSize`: defaults to using `try_decode!` and `try_find_decoded_size` """ abstract type DecodeOptions end \ No newline at end of file diff --git a/ChunkCodecTests/CHANGELOG.md b/ChunkCodecTests/CHANGELOG.md index 161704b..a202cee 100644 --- a/ChunkCodecTests/CHANGELOG.md +++ b/ChunkCodecTests/CHANGELOG.md @@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased -- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) +- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) [#73](https://github.com/JuliaIO/ChunkCodecs.jl/pull/73) - Added support for Julia 1.6 [#68](https://github.com/JuliaIO/ChunkCodecs.jl/pull/68) ## [v0.1.5](https://github.com/JuliaIO/ChunkCodecs.jl/tree/ChunkCodecTests-v0.1.5) - 2025-07-28 diff --git a/ChunkCodecTests/src/ChunkCodecTests.jl b/ChunkCodecTests/src/ChunkCodecTests.jl index 086880a..642ae49 100644 --- a/ChunkCodecTests/src/ChunkCodecTests.jl +++ b/ChunkCodecTests/src/ChunkCodecTests.jl @@ -27,6 +27,7 @@ export test_codec, test_encoder_decoder, rand_test_data function test_codec(c::Codec, e::EncodeOptions, d::DecodeOptions; trials=100) @test decode_options(c) isa DecodeOptions @test can_concatenate(c) isa Bool + @test can_concatenate(d) isa Bool @test e.codec == c @test d.codec == c @test is_thread_safe(e) isa Bool @@ -39,15 +40,6 @@ function test_codec(c::Codec, e::EncodeOptions, d::DecodeOptions; trials=100) @test typeof(d)(;NamedTuple{d_props}(getproperty.((d,), d_props))...) == d test_encoder_decoder(e, d; trials) - - # can_concatenate tests - if can_concatenate(c) - srange = decoded_size_range(e) - a = rand(UInt8, 100*step(srange)) - b = rand(UInt8, 200*step(srange)) - @test decode(d, [encode(e, a); encode(e, b);]) == [a; b;] - @test decode(d, [encode(e, UInt8[]); encode(e, UInt8[]);]) == UInt8[] - end end function test_encoder_decoder(e, d; trials=100) @@ -141,6 +133,15 @@ function test_encoder_decoder(e, d; trials=100) @test decode(d, encoded) == data end + + # can_concatenate tests + if can_concatenate(d) + srange = decoded_size_range(e) + a = rand(UInt8, 100*step(srange)) + b = rand(UInt8, 200*step(srange)) + @test decode(d, [encode(e, a); encode(e, b);]) == [a; b;] + @test decode(d, [encode(e, UInt8[]); encode(e, UInt8[]);]) == UInt8[] + end end function rand_test_data(s::Int64)::Vector{UInt8} diff --git a/LibBzip2/CHANGELOG.md b/LibBzip2/CHANGELOG.md index 5b84864..9ff5b3a 100644 --- a/LibBzip2/CHANGELOG.md +++ b/LibBzip2/CHANGELOG.md @@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased -- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) +- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) [#73](https://github.com/JuliaIO/ChunkCodecs.jl/pull/73) - Added support for Julia 1.6 [#68](https://github.com/JuliaIO/ChunkCodecs.jl/pull/68) ## [v0.2.1](https://github.com/JuliaIO/ChunkCodecs.jl/tree/LibBzip2-v0.2.1) - 2025-07-28 diff --git a/LibBzip2/src/ChunkCodecLibBzip2.jl b/LibBzip2/src/ChunkCodecLibBzip2.jl index 7e0e0b4..134cd02 100644 --- a/LibBzip2/src/ChunkCodecLibBzip2.jl +++ b/LibBzip2/src/ChunkCodecLibBzip2.jl @@ -50,8 +50,6 @@ struct BZ2Codec <: Codec end decode_options(::BZ2Codec) = BZ2DecodeOptions() -can_concatenate(::BZ2Codec) = true - include("encode.jl") include("decode.jl") diff --git a/LibBzip2/src/decode.jl b/LibBzip2/src/decode.jl index 69ff54c..75d4fc5 100644 --- a/LibBzip2/src/decode.jl +++ b/LibBzip2/src/decode.jl @@ -45,6 +45,7 @@ function BZ2DecodeOptions(; BZ2DecodeOptions(codec) end is_thread_safe(::BZ2DecodeOptions) = true +can_concatenate(::BZ2DecodeOptions) = true function try_find_decoded_size(::BZ2DecodeOptions, src::AbstractVector{UInt8})::Nothing nothing diff --git a/LibLz4/CHANGELOG.md b/LibLz4/CHANGELOG.md index fe618fe..edb48e0 100644 --- a/LibLz4/CHANGELOG.md +++ b/LibLz4/CHANGELOG.md @@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased -- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) +- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) [#73](https://github.com/JuliaIO/ChunkCodecs.jl/pull/73) - Added support for Julia 1.6 [#68](https://github.com/JuliaIO/ChunkCodecs.jl/pull/68) ## [v0.2.2](https://github.com/JuliaIO/ChunkCodecs.jl/tree/LibLz4-v0.2.2) - 2025-07-28 diff --git a/LibLz4/src/ChunkCodecLibLz4.jl b/LibLz4/src/ChunkCodecLibLz4.jl index 24d11a9..af7ed5d 100644 --- a/LibLz4/src/ChunkCodecLibLz4.jl +++ b/LibLz4/src/ChunkCodecLibLz4.jl @@ -65,7 +65,6 @@ See also [`LZ4FrameEncodeOptions`](@ref) and [`LZ4FrameDecodeOptions`](@ref) struct LZ4FrameCodec <: Codec end decode_options(::LZ4FrameCodec) = LZ4FrameDecodeOptions() # default decode options -can_concatenate(::LZ4FrameCodec) = true """ struct LZ4BlockCodec <: Codec diff --git a/LibLz4/src/decode.jl b/LibLz4/src/decode.jl index d950085..0c8abbe 100644 --- a/LibLz4/src/decode.jl +++ b/LibLz4/src/decode.jl @@ -43,6 +43,8 @@ end is_thread_safe(::LZ4FrameDecodeOptions) = true +can_concatenate(::LZ4FrameDecodeOptions) = true + function try_find_decoded_size(::LZ4FrameDecodeOptions, src::AbstractVector{UInt8})::Nothing # TODO This might be possible to do using a method similar to ZstdDecodeOptions # For now just return nothing diff --git a/LibZlib/CHANGELOG.md b/LibZlib/CHANGELOG.md index f4e1642..fd4c36f 100644 --- a/LibZlib/CHANGELOG.md +++ b/LibZlib/CHANGELOG.md @@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased -- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) +- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) [#73](https://github.com/JuliaIO/ChunkCodecs.jl/pull/73) - Added support for Julia 1.6 [#68](https://github.com/JuliaIO/ChunkCodecs.jl/pull/68) ## [v0.2.1](https://github.com/JuliaIO/ChunkCodecs.jl/tree/LibZlib-v0.2.1) - 2025-07-28 diff --git a/LibZlib/src/decode.jl b/LibZlib/src/decode.jl index 568de19..ec039b7 100644 --- a/LibZlib/src/decode.jl +++ b/LibZlib/src/decode.jl @@ -80,6 +80,8 @@ function GzipDecodeOptions(; GzipDecodeOptions(codec) end +can_concatenate(::GzipDecodeOptions) = true + const _AllDecodeOptions = Union{ZlibDecodeOptions, DeflateDecodeOptions, GzipDecodeOptions} is_thread_safe(::_AllDecodeOptions) = true @@ -103,7 +105,7 @@ function try_resize_decode!(d::_AllDecodeOptions, dst::AbstractVector{UInt8}, sr end cconv_src = Base.cconvert(Ptr{UInt8}, src) # This outer loop is to decode a concatenation of multiple compressed streams. - # If `can_concatenate(d.codec)` is false, this outer loop doesn't rerun. + # If `can_concatenate(d)` is false, this outer loop doesn't rerun. while true stream = ZStream() inflateInit2(stream, _windowBits(d.codec)) @@ -166,8 +168,8 @@ function try_resize_decode!(d::_AllDecodeOptions, dst::AbstractVector{UInt8}, sr @assert real_dst_size ∈ 0:length(dst) return real_dst_size else - if can_concatenate(d.codec) - # try and decompress next stream if the codec can_concatenate + if can_concatenate(d) + # try and decompress next stream if the decoder can_concatenate # there must be progress @assert stream.avail_in < start_avail_in || stream.avail_out < start_avail_out break diff --git a/LibZstd/CHANGELOG.md b/LibZstd/CHANGELOG.md index 8367f0c..a6bff79 100644 --- a/LibZstd/CHANGELOG.md +++ b/LibZstd/CHANGELOG.md @@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased -- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) +- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72) [#73](https://github.com/JuliaIO/ChunkCodecs.jl/pull/73) - Added support for Julia 1.6 [#68](https://github.com/JuliaIO/ChunkCodecs.jl/pull/68) ## [v0.2.1](https://github.com/JuliaIO/ChunkCodecs.jl/tree/LibZstd-v0.2.1) - 2025-07-28 diff --git a/LibZstd/src/ChunkCodecLibZstd.jl b/LibZstd/src/ChunkCodecLibZstd.jl index 9e6b602..3b90c4a 100644 --- a/LibZstd/src/ChunkCodecLibZstd.jl +++ b/LibZstd/src/ChunkCodecLibZstd.jl @@ -66,7 +66,6 @@ See also [`ZstdEncodeOptions`](@ref) and [`ZstdDecodeOptions`](@ref) struct ZstdCodec <: Codec end decode_options(::ZstdCodec) = ZstdDecodeOptions() -can_concatenate(::ZstdCodec) = true include("encode.jl") include("decode.jl") diff --git a/LibZstd/src/decode.jl b/LibZstd/src/decode.jl index 53f5588..b29aa8b 100644 --- a/LibZstd/src/decode.jl +++ b/LibZstd/src/decode.jl @@ -50,6 +50,8 @@ end is_thread_safe(::ZstdDecodeOptions) = true +can_concatenate(::ZstdDecodeOptions) = true + # find_decompressed_size is modified from CodecZstd.jl # https://github.com/JuliaIO/CodecZstd.jl/blob/2f7d084b8b157d83ed85e9d15105f0a708038e45/src/libzstd.jl#L157C1-L215C4 # From mkitti's PR https://github.com/JuliaIO/CodecZstd.jl/pull/63