Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Bitshuffle/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## Unreleased

- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72)

## [v0.1.1](https://github.com/JuliaIO/ChunkCodecs.jl/tree/Bitshuffle-v0.1.1) - 2025-08-09

### Added
Expand Down
4 changes: 2 additions & 2 deletions Bitshuffle/Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
name = "ChunkCodecBitshuffle"
uuid = "1d859bbf-6282-4c80-a370-34c59bf7ec11"
authors = ["nhz2 <nhz2@cornell.edu>"]
version = "0.1.1"
version = "0.2.0-dev"

[deps]
ChunkCodecCore = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1"

[compat]
ChunkCodecCore = "0.5.1"
ChunkCodecCore = "0.6"
julia = "1.6"

[workspace]
Expand Down
19 changes: 11 additions & 8 deletions Bitshuffle/src/ChunkCodecBitshuffle.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ using ChunkCodecCore:
DecodeOptions,
check_in_range,
check_contiguous,
DecodingError
DecodingError,
MaybeSize,
NOT_SIZE,
is_size
import ChunkCodecCore:
decode_options,
try_decode!,
Expand Down Expand Up @@ -215,17 +218,17 @@ decoded_size_range(e::BShufCodec) = Int64(0):e.element_size:typemax(Int64)-1

encode_bound(::BShufCodec, src_size::Int64)::Int64 = src_size

function try_encode!(e::BShufCodec, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
function try_encode!(e::BShufCodec, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
dst_size::Int64 = length(dst)
src_size::Int64 = length(src)
element_size = e.element_size
block_size = e.block_size
check_in_range(decoded_size_range(e); src_size)
if dst_size < src_size
nothing
NOT_SIZE
else
apply_blocks!(trans_bit_elem!, src, dst, element_size, block_size)
return src_size
src_size
end
end

Expand Down Expand Up @@ -255,7 +258,7 @@ decoded_size_range(x::BShufEncodeOptions) = decoded_size_range(x.codec)

encode_bound(x::BShufEncodeOptions, src_size::Int64)::Int64 = encode_bound(x.codec, src_size)

function try_encode!(x::BShufEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
function try_encode!(x::BShufEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
try_encode!(x.codec, dst, src)
end

Expand Down Expand Up @@ -285,7 +288,7 @@ function try_find_decoded_size(::BShufDecodeOptions, src::AbstractVector{UInt8})
length(src)
end

function try_decode!(d::BShufDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
function try_decode!(d::BShufDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
dst_size::Int64 = length(dst)
src_size::Int64 = length(src)
element_size = d.codec.element_size
Expand All @@ -295,10 +298,10 @@ function try_decode!(d::BShufDecodeOptions, dst::AbstractVector{UInt8}, src::Abs
throw(BShufDecodingError("src_size isn't a multiple of element_size"))
end
if dst_size < src_size
nothing
NOT_SIZE
else
apply_blocks!(untrans_bit_elem!, src, dst, element_size, block_size)
return src_size
src_size
end
end

Expand Down
28 changes: 14 additions & 14 deletions Bitshuffle/src/compress.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,14 @@ function encode_bound(e::BShufLZEncodeOptions, src_size::Int64)::Int64
bound
end

function try_encode!(e::BShufLZEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
function try_encode!(e::BShufLZEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
check_contiguous(dst)
check_contiguous(src)
src_size::Int64 = length(src)
dst_size::Int64 = length(dst)
check_in_range(decoded_size_range(e); src_size)
if dst_size < 12
return nothing
return NOT_SIZE
end
elem_size = e.codec.element_size
# This get used to write to the header
Expand Down Expand Up @@ -155,30 +155,30 @@ function try_encode!(e::BShufLZEncodeOptions, dst::AbstractVector{UInt8}, src::A
# The leftover bytes are copied at the end if needed.
while src_left ≥ BLOCKED_MULT*elem_size
if dst_left < 4
return nothing # no space for block header
return NOT_SIZE # no space for block header
end
if src_left < block_size*elem_size
block_size = fld(src_left, BLOCKED_MULT*elem_size) * BLOCKED_MULT
end
src_offset = src_size - src_left
trans_bit_elem!(tmp_buf_bshuf, Int64(0), src, src_offset, elem_size, block_size)
compressed_nbytes = try_encode!(
maybe_compressed_nbytes = try_encode!(
e.options,
@view(dst[end-dst_left+1+4:end]),
@view(tmp_buf_bshuf[begin:begin+elem_size*block_size-1])
)
if isnothing(compressed_nbytes)
return nothing # no space for compressed block
)::MaybeSize
if !is_size(maybe_compressed_nbytes)
return NOT_SIZE # no space for compressed block
end
@assert !signbit(compressed_nbytes)
compressed_nbytes = Int64(maybe_compressed_nbytes)
store_int32_BE!(dst, dst_size - dst_left, Int32(compressed_nbytes))
src_left -= block_size*elem_size
dst_left -= 4 + compressed_nbytes
@assert dst_left ∈ 0:dst_size
@assert src_left ∈ 0:src_size
end
if src_left > dst_left
return nothing # no space for leftover bytes
return NOT_SIZE # no space for leftover bytes
end
src_offset = src_size - src_left
dst_offset = dst_size - dst_left
Expand Down Expand Up @@ -242,14 +242,14 @@ function try_find_decoded_size(d::BShufLZDecodeOptions, src::AbstractVector{UInt
end
end

function try_decode!(d::BShufLZDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
function try_decode!(d::BShufLZDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
check_contiguous(dst)
check_contiguous(src)
decoded_size = try_find_decoded_size(d, src)
decoded_size::Int64 = try_find_decoded_size(d, src)
src_size::Int64 = length(src)
dst_size::Int64 = length(dst)
if decoded_size > dst_size
return nothing
return NOT_SIZE
end
src_left::Int64 = src_size
dst_left::Int64 = decoded_size
Expand Down Expand Up @@ -294,9 +294,9 @@ function try_decode!(d::BShufLZDecodeOptions, dst::AbstractVector{UInt8}, src::A
d.options,
@view(tmp_buf_decode[begin:begin+block_size*elem_size-1]),
@view(src[end-src_left+1:end-src_left+c_size])
)
)::MaybeSize
src_left -= c_size
if ret != block_size*elem_size
if ret.val != block_size*elem_size
throw(BShufDecodingError("saved decoded size is not correct"))
end
untrans_bit_elem!(dst, dst_offset, tmp_buf_decode, Int64(0), elem_size, block_size)
Expand Down
16 changes: 9 additions & 7 deletions Bitshuffle/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ using ChunkCodecCore:
DecodeOptions,
NoopCodec,
try_find_decoded_size,
try_encode!
try_encode!,
MaybeSize,
is_size
using ChunkCodecTests: test_codec, test_encoder_decoder
using ChunkCodecLibLz4
using ChunkCodecLibZstd
Expand Down Expand Up @@ -75,7 +77,7 @@ function TestNoopEncodeOptions(;
end
ChunkCodecCore.encode_bound(::TestNoopEncodeOptions, src_size::Int64)::Int64 = src_size
ChunkCodecCore.decoded_size_range(e::TestNoopEncodeOptions) = Int64(8):e.element_size:typemax(Int64)-Int64(1)
function ChunkCodecCore.try_encode!(e::TestNoopEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
function ChunkCodecCore.try_encode!(e::TestNoopEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
dst_size::Int64 = length(dst)
src_size::Int64 = length(src)
check_in_range(decoded_size_range(e); src_size)
Expand Down Expand Up @@ -324,22 +326,22 @@ end
c = encode(e, u)
@test decode(d, c) == u
for i in 1:length(c)
@test isnothing(try_encode!(e, c[1:i-1], u))
@test !is_size(try_encode!(e, c[1:i-1], u))
end
# zero length
u = UInt8[]
c = zeros(UInt8, 12)
@test try_encode!(e, c, u) == length(c)
@test try_encode!(e, c, u) == MaybeSize(length(c))
@test decode(d, c) == u
for i in 1:length(c)
@test isnothing(try_encode!(e, c[1:i-1], u))
@test !is_size(try_encode!(e, c[1:i-1], u))
end
# one length
u = UInt8[0x00]
c = zeros(UInt8, 12+1)
@test try_encode!(e, c, u) == length(c)
@test try_encode!(e, c, u) == MaybeSize(length(c))
@test decode(d, c) == u
for i in 1:length(c)
@test isnothing(try_encode!(e, c[1:i-1], u))
@test !is_size(try_encode!(e, c[1:i-1], u))
end
end
2 changes: 2 additions & 0 deletions ChunkCodecCore/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## Unreleased

### BREAKING the return type of `try_encode`, `try_decode`, and `try_resize_decode!` changed to a new `MaybeSize` type [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72)

## [v0.5.3](https://github.com/JuliaIO/ChunkCodecs.jl/tree/ChunkCodecCore-v0.5.3) - 2025-08-09

- Added support for Julia 1.6 [#68](https://github.com/JuliaIO/ChunkCodecs.jl/pull/68)
Expand Down
2 changes: 1 addition & 1 deletion ChunkCodecCore/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ChunkCodecCore"
uuid = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1"
authors = ["nhz2 <nhz2@cornell.edu>"]
version = "0.5.3"
version = "0.6.0-dev"

[compat]
julia = "1.6"
Expand Down
3 changes: 3 additions & 0 deletions ChunkCodecCore/src/ChunkCodecCore.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ if VERSION >= v"1.11.0-DEV.469"
EncodeOptions,
DecodeOptions,

MaybeSize,
is_size,
NOT_SIZE,
DecodingError,
DecodedSizeError,
decode!,
Expand Down
87 changes: 72 additions & 15 deletions ChunkCodecCore/src/errors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,91 @@ Generic error for data that cannot be decoded.
"""
abstract type DecodingError <: Exception end

"""
struct MaybeSize
val::Int64
end

If `val ≥ 0` it is a size, and can be converted back and forth with `Int64`.
If `val < 0` converting to and from `Int64` will error.
If `val == typemin(Int64)` it is an unknown size.
Otherwise it is a size hint of `-val`.

"""
struct MaybeSize
val::Int64
end

"""
const NOT_SIZE = MaybeSize(typemin(Int64))
"""
const NOT_SIZE = MaybeSize(typemin(Int64))
function is_size(x::MaybeSize)::Bool
!signbit(x.val)
end
function Base.Int64(x::MaybeSize)
if !is_size(x)
throw(InexactError(:Int64, Int64, x))
else
x.val
end
end
function Base.convert(::Type{Int64}, x::MaybeSize)
Int64(x)
end
function Base.convert(::Type{MaybeSize}, x::Int64)::MaybeSize
if signbit(x)
throw(InexactError(:convert, MaybeSize, x))
else
MaybeSize(x)
end
end

"""
struct DecodedSizeError <: Exception
DecodedSizeError(max_size, decoded_size)

Unable to decode the data because the decoded size is larger than `max_size`
or smaller than expected.
If the decoded size is unknown `decoded_size` is `nothing`.
Exception thrown when the decoded data size doesn't match expectations or exceeds limits.

# Fields
- `max_size::Int64`: The maximum allowed or expected size in bytes
- `decoded_size::MaybeSize`: The actual decoded size, size hint, or `NOT_SIZE` if unknown

This error can occur in several scenarios:
1. Decoded size exceeds the maximum allowed size
2. Decoded size is less than expected
3. Decoder provides a size hint when the decoded size exceeds limits
4. Decoded size is completely unknown but exceeds limits
"""
struct DecodedSizeError <: Exception
max_size::Int64
decoded_size::Union{Nothing, Int64}
decoded_size::MaybeSize
end

function Base.showerror(io::IO, err::DecodedSizeError)
print(io, "DecodedSizeError: ")
if isnothing(err.decoded_size)
print(io, "decoded size is greater than max size: ")
if err.decoded_size === NOT_SIZE
print(io, "decoded size > ")
print(io, err.max_size)
elseif err.decoded_size < err.max_size
print(io, "decoded size: ")
print(io, err.decoded_size)
print(io, " is less than expected size: ")
elseif !is_size(err.decoded_size)
suggested_size = -err.decoded_size.val
print(io, "decoded size > ")
print(io, err.max_size)
print(io, ", try max_size = ")
print(io, suggested_size)
else
print(io, "decoded size: ")
print(io, err.decoded_size)
print(io, " is greater than max size: ")
print(io, err.max_size)
decoded_size = err.decoded_size.val
if decoded_size < err.max_size
print(io, "decoded size ")
print(io, decoded_size)
print(io, " < expected ")
print(io, err.max_size)
else
print(io, "decoded size ")
print(io, decoded_size)
print(io, " > ")
print(io, err.max_size)
end
end
nothing
end
end
Loading
Loading