Skip to content

Commit

Permalink
Merge pull request #112 from theabhirath/chunk-size
Browse files Browse the repository at this point in the history
Extend `chunk` to take `size` as an argument
  • Loading branch information
ToucheSir committed Jul 10, 2022
2 parents b29bf41 + 1128724 commit 387b371
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLUtils"
uuid = "f1d291b0-491e-4a28-83b9-f70985020b54"
authors = ["Carlo Lucibello <carlo.lucibello@gmail.com> and contributors"]
version = "0.2.9"
version = "0.2.10"

[deps]
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Expand Down
2 changes: 1 addition & 1 deletion src/batchview.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ function BatchView(data::T; batchsize::Int=1, partial::Bool=true, collate=Val(no
throw(ArgumentError("`collate` must be one of `nothing`, `true` or `false`."))
end
E = _batchviewelemtype(data, collate)
count = partial ? ceil(Int, n / batchsize) : floor(Int, n / batchsize)
count = partial ? cld(n, batchsize) : fld(n, batchsize)
BatchView{E,T,typeof(collate)}(data, batchsize, count, partial)
end

Expand Down
49 changes: 35 additions & 14 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ unstack(xs; dims::Int) = [copy(selectdim(xs, dims, i)) for i in 1:size(xs, dims)

"""
chunk(x, n; [dims])
chunk(x; [size, dims])
Split `x` into `n` parts. The parts contain the same number of elements
except possibly for the last one that can be smaller.
Split `x` into `n` parts or alternatively, into equal chunks of size `size`. The parts contain
the same number of elements except possibly for the last one that can be smaller.
If `x` is an array, `dims` can be used to specify along which dimension to
split (defaults to the last dimension).
Expand All @@ -138,6 +139,14 @@ julia> chunk(1:10, 3)
5:8
9:10
julia> chunk(1:10; size = 2)
5-element Vector{UnitRange{Int64}}:
1:2
3:4
5:6
7:8
9:10
julia> x = reshape(collect(1:20), (5, 4))
5×4 Matrix{Int64}:
1 6 11 16
Expand All @@ -156,30 +165,42 @@ julia> xs[1]
1 6 11 16
2 7 12 17
3 8 13 18
julia> xes = chunk(x; size = 2, dims = 2)
2-element Vector{SubArray{Int64, 2, Matrix{Int64}, Tuple{Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}}, true}}:
[1 6; 2 7; … ; 4 9; 5 10]
[11 16; 12 17; … ; 14 19; 15 20]
julia> xes[2]
5×2 view(::Matrix{Int64}, :, 3:4) with eltype Int64:
11 16
12 17
13 18
14 19
15 20
```
"""
chunk(x, n::Int) = collect(Iterators.partition(x, ceil(Int, length(x) / n)))
chunk(x; size::Int) = collect(Iterators.partition(x, size))
chunk(x, n::Int) = chunk(x; size = cld(length(x), n))

function chunk(x::AbstractArray, n::Int; dims::Int=ndims(x))
idxs = _partition_idxs(x, n, dims)
function chunk(x::AbstractArray; size::Int, dims::Int=ndims(x))
idxs = _partition_idxs(x, size, dims)
[selectdim(x, dims, i) for i in idxs]
end
chunk(x::AbstractArray, n::Int; dims::Int=ndims(x)) = chunk(x; size = cld(size(x, dims), n), dims)

function _partition_idxs(x, n, dims)
bs = ceil(Int, size(x, dims) / n)
Iterators.partition(axes(x, dims), bs)
end

function rrule(::typeof(chunk), x::AbstractArray, n::Int; dims::Int=ndims(x))
function rrule(::typeof(chunk), x::AbstractArray; size::Int, dims::Int=ndims(x))
# this is the implementation of chunk
idxs = _partition_idxs(x, n, dims)
idxs = _partition_idxs(x, size, dims)
y = [selectdim(x, dims, i) for i in idxs]
valdims = Val(dims)
chunk_pullback(dy) = (NoTangent(), ∇chunk(unthunk(dy), x, idxs, valdims), NoTangent())
chunk_pullback(dy) = (NoTangent(), ∇chunk(unthunk(dy), x, idxs, valdims))

return y, chunk_pullback
end

_partition_idxs(x, size, dims) = Iterators.partition(axes(x, dims), size)

# Similar to ∇eachslice https://github.com/JuliaDiff/ChainRules.jl/blob/8108a77a96af5d4b0c460aac393e44f8943f3c5e/src/rulesets/Base/indexing.jl#L77
function ∇chunk(dys, x::AbstractArray, idxs, vd::Val{dim}) where {dim}
i1 = findfirst(dy -> !(dy isa AbstractZero), dys)
Expand Down
21 changes: 14 additions & 7 deletions test/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,27 @@ end
x = reshape(collect(1:20), (5, 4))
cs = chunk(x, 2)
@test length(cs) == 2
cs[1] == [1 6; 2 7; 3 8; 4 9; 5 10]
cs[2] == [11 16; 12 17; 13 18; 14 19; 15 20]

@test cs[1] == [1 6; 2 7; 3 8; 4 9; 5 10]
@test cs[2] == [11 16; 12 17; 13 18; 14 19; 15 20]

x = permutedims(reshape(collect(1:10), (2, 5)))
cs = chunk(x; size = 2, dims = 1)
@test length(cs) == 3
@test cs[1] == [1 2; 3 4]
@test cs[2] == [5 6; 7 8]
@test cs[3] == [9 10]

# test gradient
test_zygote(chunk, rand(10), 3, check_inferred=false)

# indirect test of second order derivates
n = 2
dims = 2
x = rand(4, 5)
y = chunk(x, 2)
dy = randn!.(collect.(y))
idxs = MLUtils._partition_idxs(x, n, dims)
test_zygote(MLUtils.∇chunk, dy, x, idxs, Val(dims), check_inferred=false)
l = chunk(x, 2)
dl = randn!.(collect.(l))
idxs = MLUtils._partition_idxs(x, cld(size(x, dims), n), dims)
test_zygote(MLUtils.∇chunk, dl, x, idxs, Val(dims), check_inferred=false)
end

@testset "group_counts" begin
Expand Down

2 comments on commit 387b371

@CarloLucibello
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/65329

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.2.10 -m "<description of version>" 387b371287d00ce7e40a1be092db0dc4ad624b58
git push origin v0.2.10

Please sign in to comment.