From d5339ea6090cab94650173b9ad677980b6c5a941 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Thu, 2 Jan 2014 15:08:14 -0600 Subject: [PATCH 01/12] move reducedim related stuff to a separate file --- base/abstractarray.jl | 25 ------------------------- base/reducedim.jl | 28 ++++++++++++++++++++++++++++ base/sysimg.jl | 1 + 3 files changed, 29 insertions(+), 25 deletions(-) create mode 100644 base/reducedim.jl diff --git a/base/abstractarray.jl b/base/abstractarray.jl index c62a26b75a47e..c55b0ef651c30 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1409,34 +1409,9 @@ function nnz{T}(a::AbstractArray{T}) return n end -# for reductions that expand 0 dims to 1 -reduced_dims(A, region) = ntuple(ndims(A), i->(in(i, region) ? 1 : - size(A,i))) - -# keep 0 dims in place -reduced_dims0(A, region) = ntuple(ndims(A), i->(size(A,i)==0 ? 0 : - in(i, region) ? 1 : - size(A,i))) - -reducedim(f::Function, A, region, v0) = - reducedim(f, A, region, v0, similar(A, reduced_dims(A, region))) - -maximum{T}(A::AbstractArray{T}, region) = - isempty(A) ? similar(A,reduced_dims0(A,region)) : reducedim(scalarmax,A,region,typemin(T)) -minimum{T}(A::AbstractArray{T}, region) = - isempty(A) ? similar(A,reduced_dims0(A,region)) : reducedim(scalarmin,A,region,typemax(T)) -sum{T}(A::AbstractArray{T}, region) = reducedim(+,A,region,zero(T)) -prod{T}(A::AbstractArray{T}, region) = reducedim(*,A,region,one(T)) - -all(A::AbstractArray{Bool}, region) = reducedim(&,A,region,true) -any(A::AbstractArray{Bool}, region) = reducedim(|,A,region,false) -sum(A::AbstractArray{Bool}, region) = reducedim(+,A,region,0,similar(A,Int,reduced_dims(A,region))) sum(A::AbstractArray{Bool}) = nnz(A) prod(A::AbstractArray{Bool}) = error("use all() instead of prod() for boolean arrays") -prod(A::AbstractArray{Bool}, region) = - error("use all() instead of prod() for boolean arrays") - # a fast implementation of sum in sequential order (from left to right) function sum_seq{T}(a::AbstractArray{T}, ifirst::Int, ilast::Int) diff --git a/base/reducedim.jl b/base/reducedim.jl new file mode 100644 index 0000000000000..57d59d61b2fe2 --- /dev/null +++ b/base/reducedim.jl @@ -0,0 +1,28 @@ + +# for reductions that expand 0 dims to 1 +reduced_dims(A, region) = ntuple(ndims(A), i->(in(i, region) ? 1 : + size(A,i))) + +# keep 0 dims in place +reduced_dims0(A, region) = ntuple(ndims(A), i->(size(A,i)==0 ? 0 : + in(i, region) ? 1 : + size(A,i))) + +reducedim(f::Function, A, region, v0) = + reducedim(f, A, region, v0, similar(A, reduced_dims(A, region))) + +maximum{T}(A::AbstractArray{T}, region) = + isempty(A) ? similar(A,reduced_dims0(A,region)) : reducedim(scalarmax,A,region,typemin(T)) +minimum{T}(A::AbstractArray{T}, region) = + isempty(A) ? similar(A,reduced_dims0(A,region)) : reducedim(scalarmin,A,region,typemax(T)) +sum{T}(A::AbstractArray{T}, region) = reducedim(+,A,region,zero(T)) +prod{T}(A::AbstractArray{T}, region) = reducedim(*,A,region,one(T)) + +all(A::AbstractArray{Bool}, region) = reducedim(&,A,region,true) +any(A::AbstractArray{Bool}, region) = reducedim(|,A,region,false) +sum(A::AbstractArray{Bool}, region) = reducedim(+,A,region,0,similar(A,Int,reduced_dims(A,region))) + +prod(A::AbstractArray{Bool}, region) = + error("use all() instead of prod() for boolean arrays") + + \ No newline at end of file diff --git a/base/sysimg.jl b/base/sysimg.jl index e5da016db2fba..1503d7ebebf6b 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -42,6 +42,7 @@ include("pointer.jl") include("float.jl") include("reduce.jl") +include("reducedim.jl") include("complex.jl") include("rational.jl") From 994bbceb6813e8178caf1a94884c62ce52c04f59 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Thu, 2 Jan 2014 15:18:43 -0600 Subject: [PATCH 02/12] move reducedim codes from array.jl to reducedim.jl --- base/array.jl | 76 ----------------------------------------------- base/reducedim.jl | 75 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 77 deletions(-) diff --git a/base/array.jl b/base/array.jl index 197e203229bb6..7ddecb892de91 100644 --- a/base/array.jl +++ b/base/array.jl @@ -1401,82 +1401,6 @@ function indcopy(sz::Dims, I::(RangeIndex...)) end -## Reductions ## - -# TODO: -# - find out why inner loop with dimsA[i] instead of size(A,i) is way too slow - -let reducedim_cache = nothing -# generate the body of the N-d loop to compute a reduction -function gen_reducedim_func(n, f) - ivars = { symbol(string("i",i)) for i=1:n } - # limits and vars for reduction loop - lo = { symbol(string("lo",i)) for i=1:n } - hi = { symbol(string("hi",i)) for i=1:n } - rvars = { symbol(string("r",i)) for i=1:n } - setlims = { quote - # each dim of reduction is either 1:sizeA or ivar:ivar - if in($i,region) - $(lo[i]) = 1 - $(hi[i]) = size(A,$i) - else - $(lo[i]) = $(hi[i]) = $(ivars[i]) - end - end for i=1:n } - rranges = { :( $(lo[i]):$(hi[i]) ) for i=1:n } # lo:hi for all dims - body = - quote - _tot = v0 - $(setlims...) - $(make_loop_nest(rvars, rranges, - :(_tot = ($f)(_tot, A[$(rvars...)])))) - R[_ind] = _tot - _ind += 1 - end - quote - local _F_ - function _F_(f, A, region, R, v0) - _ind = 1 - $(make_loop_nest(ivars, { :(1:size(R,$i)) for i=1:n }, body)) - end - _F_ - end -end - -global reducedim -function reducedim(f::Function, A, region, v0, R) - ndimsA = ndims(A) - - if is(reducedim_cache,nothing) - reducedim_cache = Dict() - end - - key = ndimsA - fname = :f - - if (is(f,+) && (fname=:+;true)) || - (is(f,*) && (fname=:*;true)) || - (is(f,scalarmax) && (fname=:scalarmax;true)) || - (is(f,scalarmin) && (fname=:scalarmin;true)) || - (is(f,&) && (fname=:&;true)) || - (is(f,|) && (fname=:|;true)) - key = (fname, ndimsA) - end - - if !haskey(reducedim_cache,key) - fexpr = gen_reducedim_func(ndimsA, fname) - func = eval(fexpr) - reducedim_cache[key] = func - else - func = reducedim_cache[key] - end - - func(f, A, region, R, v0) - - return R -end -end - ## Filter ## # given a function returning a boolean and an array, return matching elements diff --git a/base/reducedim.jl b/base/reducedim.jl index 57d59d61b2fe2..0c129543f8e45 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -25,4 +25,77 @@ sum(A::AbstractArray{Bool}, region) = reducedim(+,A,region,0,similar(A,Int,reduc prod(A::AbstractArray{Bool}, region) = error("use all() instead of prod() for boolean arrays") - \ No newline at end of file +# TODO: +# - find out why inner loop with dimsA[i] instead of size(A,i) is way too slow + +let reducedim_cache = nothing +# generate the body of the N-d loop to compute a reduction +function gen_reducedim_func(n, f) + ivars = { symbol(string("i",i)) for i=1:n } + # limits and vars for reduction loop + lo = { symbol(string("lo",i)) for i=1:n } + hi = { symbol(string("hi",i)) for i=1:n } + rvars = { symbol(string("r",i)) for i=1:n } + setlims = { quote + # each dim of reduction is either 1:sizeA or ivar:ivar + if in($i,region) + $(lo[i]) = 1 + $(hi[i]) = size(A,$i) + else + $(lo[i]) = $(hi[i]) = $(ivars[i]) + end + end for i=1:n } + rranges = { :( $(lo[i]):$(hi[i]) ) for i=1:n } # lo:hi for all dims + body = + quote + _tot = v0 + $(setlims...) + $(make_loop_nest(rvars, rranges, + :(_tot = ($f)(_tot, A[$(rvars...)])))) + R[_ind] = _tot + _ind += 1 + end + quote + local _F_ + function _F_(f, A, region, R, v0) + _ind = 1 + $(make_loop_nest(ivars, { :(1:size(R,$i)) for i=1:n }, body)) + end + _F_ + end +end + +global reducedim +function reducedim(f::Function, A, region, v0, R) + ndimsA = ndims(A) + + if is(reducedim_cache,nothing) + reducedim_cache = Dict() + end + + key = ndimsA + fname = :f + + if (is(f,+) && (fname=:+;true)) || + (is(f,*) && (fname=:*;true)) || + (is(f,scalarmax) && (fname=:scalarmax;true)) || + (is(f,scalarmin) && (fname=:scalarmin;true)) || + (is(f,&) && (fname=:&;true)) || + (is(f,|) && (fname=:|;true)) + key = (fname, ndimsA) + end + + if !haskey(reducedim_cache,key) + fexpr = gen_reducedim_func(ndimsA, fname) + func = eval(fexpr) + reducedim_cache[key] = func + else + func = reducedim_cache[key] + end + + func(f, A, region, R, v0) + + return R +end +end + From 40603916d233d6247908f3f4235b6aca9d734177 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Fri, 3 Jan 2014 14:17:26 -0600 Subject: [PATCH 03/12] more efficient implementation of reduced_dims --- base/reducedim.jl | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index 0c129543f8e45..b5f2be7400732 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -1,12 +1,43 @@ +## Functions to compute the reduced shape + # for reductions that expand 0 dims to 1 -reduced_dims(A, region) = ntuple(ndims(A), i->(in(i, region) ? 1 : - size(A,i))) +reduced_dims(a::AbstractArray, region) = reduced_dims(size(a), region) + +# for reductions that keep 0 dims as 0 +reduced_dims0(a::AbstractArray, region) = reduced_dims0(size(a), region) + +reduced_dims{N}(siz::NTuple{N,Int}, d::Int, rd::Int) = d == 1 ? tuple(rd, siz[d+1:N]...) : + d == N ? tuple(siz[1:N-1]..., rd) : + 1 < d < N ? tuple(siz[1:d-1]..., rd, siz[d+1:N]...) : + siz + +reduced_dims{N}(siz::NTuple{N,Int}, d::Int) = reduced_dims(siz, d, 1) + +reduced_dims0{N}(siz::NTuple{N,Int}, d::Int) = reduced_dims(siz, d, (siz[d] == 0 ? 0 : 1)) + +function reduced_dims{N}(siz::NTuple{N,Int}, region) + rsiz = [siz...] + for i in region + if 1 <= i <= N + rsiz[i] = 1 + end + end + tuple(rsiz...) +end + +function reduced_dims0{N}(siz::NTuple{N,Int}, region) + rsiz = [siz...] + for i in region + if i <= i <= N + rsiz[i] = (rsiz[i] == 0 ? 0 : 1) + end + end + tuple(rsiz...) +end + -# keep 0 dims in place -reduced_dims0(A, region) = ntuple(ndims(A), i->(size(A,i)==0 ? 0 : - in(i, region) ? 1 : - size(A,i))) +# reduction codes reducedim(f::Function, A, region, v0) = reducedim(f, A, region, v0, similar(A, reduced_dims(A, region))) From 579ab44d399bc688a43856228ca1d5cfed51b504 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Fri, 3 Jan 2014 22:14:13 -0600 Subject: [PATCH 04/12] Fast sum along dimensions over contiguous arrays --- base/reducedim.jl | 201 +++++++++++++++++++++++++++++++++++++++++++++- base/sysimg.jl | 4 +- test/arrayops.jl | 5 ++ test/reducedim.jl | 44 ++++++++++ 4 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 test/reducedim.jl diff --git a/base/reducedim.jl b/base/reducedim.jl index b5f2be7400732..f678e83145d27 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -37,7 +37,7 @@ function reduced_dims0{N}(siz::NTuple{N,Int}, region) end -# reduction codes +##### (original) reduction codes for arbitrary arrays reducedim(f::Function, A, region, v0) = reducedim(f, A, region, v0, similar(A, reduced_dims(A, region))) @@ -130,3 +130,202 @@ function reducedim(f::Function, A, region, v0, R) end end + + +##### Below are optimized codes for contiguous arrays + +function rcompress_dims{N}(siz::NTuple{N,Int}, region) + isrd = fill(false, N) + for k in region + if 1 <= k <= N + isrd[k] = true + end + end + + sdims = Int[] + sizehint(sdims, N) + b = isrd[1] + n = siz[1] + i = 2 + while i <= N + if isrd[i] == b + n *= siz[i] + else + b = !b + push!(sdims, n) + n = siz[i] + end + i += 1 + end + if n != 1 + push!(sdims, n) + end + return (isrd[1], sdims) +end + +function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) + # Parameters: + # + # - fname: the interface function name (e.g. sum, maximum) + # - comb: the combination operation (e.g. +) + # - sker: a kernel function that reduces a vector (or a range of it) to a scalar + # - ker0!: a kernel that initializes an accumulator array using the first column of terms + # - ker1!: a kernel that accumulates a term column to an accumulating column + # + + + fname! = symbol("$(fname)!") + fa! = symbol("$(fname)_a!") + fb! = symbol("$(fname)_b!") + + quote + global $(fname!) + function $(fname!)(dst::Array, a::Array, region) + isrd1, secs = rcompress_dims(size(a), region) + @assert prod(secs) == length(a) + if isrd1 + $(fa!)(true, dst, 0, a, 0, secs[end:-1:1]...) + else + $(fb!)(true, dst, 0, a, 0, secs[end:-1:1]...) + end + dst + end + + # $(fa!) + global $(fa!) + function $(fa!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int) + if isinit + dst[od+1] = $(sker)(a, oa+1, oa+n1) + else + dst[od+1] = $(comb)(dst[od+1], $(sker)(a, oa+1, oa+n1)) + end + end + + function $(fa!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int, n2::Int) + if isinit + for j = 1:n1 + alast = oa + n2 + dst[od+j] = $(sker)(a, oa+1, alast) + oa = alast + end + else + for j = 1:n1 + alast = oa + n2 + dst[od+j] = $(comb)(dst[od+j], $(sker)(a, oa+1, alast)) + oa = alast + end + end + end + + function $(fa!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int, n2::Int, n3::Int, ns::Int...) + as::Int = *(n2, n3, ns...) + if length(ns) & 1 == 0 + $(fa!)(isinit, dst, od, a, oa, n2, n3, ns...) + oa += as + + for j = 2:n1 + $(fa!)(false, dst, od, a, oa, n2, n3, ns...) + oa += as + end + else + ds::Int = *(n3, ns[2:2:end]...) + for j = 1:n1 + $(fa!)(isinit, dst, od, a, oa, n2, n3, ns...) + od += ds + oa += as + end + end + end + + # $(fb!) + global $(fb!) + function $(fb!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int) + if isinit + $(ker0!)(dst, od, a, oa, n1) + else + $(ker1!)(dst, od, a, oa, n1) + end + end + + function $(fb!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int, n2::Int) + if isinit + $(ker0!)(dst, od, a, oa, n2) + else + $(ker1!)(dst, od, a, oa, n2) + end + oa += n2 + + for j = 2:n1 + $(ker1!)(dst, od, a, oa, n2) + oa += n2 + end + end + + function $(fb!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int, n2::Int, n3::Int, ns::Int...) + as = *(n2, n3, ns...) + if length(ns) & 1 == 0 + ds::Int = *(n3, ns[2:2:end]...) + for j = 1:n1 + $(fb!)(isinit, dst, od, a, oa, n2, n3, ns...) + od += ds + oa += as + end + else + $(fb!)(isinit, dst, od, a, oa, n2, n3, ns...) + oa += as + + for j = 2:n1 + $(fb!)(false, dst, od, a, oa, n2, n3, ns...) + oa += as + end + end + end + end +end + +macro code_reducedim(fname, comb, sker, ker0, ker1) + esc(generate_reducedim_funcs(fname, comb, sker, ker0, ker1)) +end + + +# sum + +function vcopy!(dst::Array, od::Int, a::Array, oa::Int, n::Int) + for i = 1:n + @inbounds dst[od+i] = a[oa+i] + end +end + +vcopy!{T}(dst::Array{T}, od::Int, a::Array{T}, oa::Int, n::Int) = copy!(dst, od+1, a, oa+1, n) + +function vadd!(dst::Array, od::Int, a::Array, oa::Int, n::Int) + for i = 1:n + @inbounds dst[od+i] += a[oa+i] + end +end + +@code_reducedim sum (+) sum_seq vcopy! vadd! +function sum{T<:Number}(a::Array{T}, region) + dst = Array(T, reduced_dims(a, region)) + if !isempty(dst) + if isempty(a) + fill!(dst, zero(T)) + else + sum!(dst, a, region) + end + end + return dst +end + +function sum(a::Array{Bool}, region) + dst = Array(Int, reduced_dims(a, region)) + if !isempty(dst) + if isempty(a) + fill!(dst, 0) + else + sum!(dst, a, region) + end + end + return dst +end + diff --git a/base/sysimg.jl b/base/sysimg.jl index 1503d7ebebf6b..f019b3c14e9c4 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -42,7 +42,6 @@ include("pointer.jl") include("float.jl") include("reduce.jl") -include("reducedim.jl") include("complex.jl") include("rational.jl") @@ -180,6 +179,9 @@ importall .LinAlg include("broadcast.jl") importall .Broadcast +# reduction along dims +include("reducedim.jl") # macros in this file relies on string.jl + # signal processing include("fftw.jl") include("dsp.jl") diff --git a/test/arrayops.jl b/test/arrayops.jl index dbc8d5945d893..83be870ee8ce8 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -790,3 +790,8 @@ A = [NaN]; B = [NaN] @test !(A==B) @test isequal(A,B) @test A!==B + +# complete testsuite for reducedim + +include("reducedim.jl") + diff --git a/test/reducedim.jl b/test/reducedim.jl new file mode 100644 index 0000000000000..0a79003ef0d88 --- /dev/null +++ b/test/reducedim.jl @@ -0,0 +1,44 @@ + +# Supporting routines + +@test Base.rcompress_dims((3, 4), 1) == (true, [3, 4]) +@test Base.rcompress_dims((3, 4), 2) == (false, [3, 4]) +@test Base.rcompress_dims((3, 4), 3) == (false, [12]) +@test Base.rcompress_dims((3, 4), (1, 2)) == (true, [12]) + +@test Base.rcompress_dims((3, 4, 5), 1) == (true, [3, 20]) +@test Base.rcompress_dims((3, 4, 5), 2) == (false, [3, 4, 5]) +@test Base.rcompress_dims((3, 4, 5), 3) == (false, [12, 5]) +@test Base.rcompress_dims((3, 4, 5), (1, 2)) == (true, [12, 5]) +@test Base.rcompress_dims((3, 4, 5), (1, 3)) == (true, [3, 4, 5]) +@test Base.rcompress_dims((3, 4, 5), (2, 3)) == (false, [3, 20]) +@test Base.rcompress_dims((3, 4, 5), (1, 2, 3)) == (true, [60]) + +@test Base.rcompress_dims((3, 4, 5, 2), 1) == (true, [3, 40]) +@test Base.rcompress_dims((3, 4, 5, 2), 2) == (false, [3, 4, 10]) +@test Base.rcompress_dims((3, 4, 5, 2), 3) == (false, [12, 5, 2]) +@test Base.rcompress_dims((3, 4, 5, 2), 4) == (false, [60, 2]) +@test Base.rcompress_dims((3, 4, 5, 2), (1, 2)) == (true, [12, 10]) +@test Base.rcompress_dims((3, 4, 5, 2), (1, 3)) == (true, [3, 4, 5, 2]) +@test Base.rcompress_dims((3, 4, 5, 2), (1, 4)) == (true, [3, 20, 2]) +@test Base.rcompress_dims((3, 4, 5, 2), (2, 3)) == (false, [3, 20, 2]) +@test Base.rcompress_dims((3, 4, 5, 2), (2, 4)) == (false, [3, 4, 5, 2]) +@test Base.rcompress_dims((3, 4, 5, 2), (3, 4)) == (false, [12, 10]) +@test Base.rcompress_dims((3, 4, 5, 2), (1, 2, 3)) == (true, [60, 2]) +@test Base.rcompress_dims((3, 4, 5, 2), (1, 2, 4)) == (true, [12, 5, 2]) +@test Base.rcompress_dims((3, 4, 5, 2), (1, 3, 4)) == (true, [3, 4, 10]) +@test Base.rcompress_dims((3, 4, 5, 2), (2, 3, 4)) == (false, [3, 40]) +@test Base.rcompress_dims((3, 4, 5, 2), (1, 2, 3, 4)) == (true, [120]) + +# main tests + +safe_sum{T}(A::Array{T}, region) = reducedim(+,A,region,zero(T)) + +Areduc = rand(3, 4, 5, 6) +for region in { + 1, 2, 3, 4, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4), + (1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)} + + @test_approx_eq sum(Areduc, region) safe_sum(Areduc, region) +end + From 2ffdbfaaf799df0b2c84100f1b46ed16f4c72acd Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Fri, 3 Jan 2014 23:17:10 -0600 Subject: [PATCH 05/12] specialized for cases with region being an integer --- base/reducedim.jl | 30 +++++++++++++++++++++++++----- test/reducedim.jl | 2 +- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index f678e83145d27..c5edad11636a4 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -180,13 +180,33 @@ function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) quote global $(fname!) + function $(fname!)(dst::Array, a::Array, dim::Integer) + nd = ndims(a) + siz = size(a) + if 1 <= dim <= nd + if dim == 1 + $(fa!)(true, dst, 0, a, 0, prod(siz[2:nd]), siz[1]) + elseif dim == nd + $(fb!)(true, dst, 0, a, 0, siz[nd], prod(siz[1:nd-1])) + else + $(fb!)(true, dst, 0, a, 0, prod(siz[dim+1:nd]), siz[dim], prod(siz[1:dim-1])) + end + else + $(ker0!)(dst, 0, a, 0, length(a)) + end + dst + end + function $(fname!)(dst::Array, a::Array, region) - isrd1, secs = rcompress_dims(size(a), region) - @assert prod(secs) == length(a) - if isrd1 - $(fa!)(true, dst, 0, a, 0, secs[end:-1:1]...) + if length(region) == 1 + $(fname!)(dst, a, region[1]) else - $(fb!)(true, dst, 0, a, 0, secs[end:-1:1]...) + isrd1, secs = rcompress_dims(size(a), region) + if isrd1 + $(fa!)(true, dst, 0, a, 0, secs[end:-1:1]...) + else + $(fb!)(true, dst, 0, a, 0, secs[end:-1:1]...) + end end dst end diff --git a/test/reducedim.jl b/test/reducedim.jl index 0a79003ef0d88..662098305ed6f 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -36,7 +36,7 @@ safe_sum{T}(A::Array{T}, region) = reducedim(+,A,region,zero(T)) Areduc = rand(3, 4, 5, 6) for region in { - 1, 2, 3, 4, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4), + 1, 2, 3, 4, 5, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4), (1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)} @test_approx_eq sum(Areduc, region) safe_sum(Areduc, region) From 01dfa4aa60b7b10703f2214e4c8856a527b94036 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 4 Jan 2014 14:26:59 -0600 Subject: [PATCH 06/12] some tuning of the vector kernels --- base/reducedim.jl | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index c5edad11636a4..920c6c53b026a 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -173,7 +173,6 @@ function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) # - ker1!: a kernel that accumulates a term column to an accumulating column # - fname! = symbol("$(fname)!") fa! = symbol("$(fname)_a!") fb! = symbol("$(fname)_b!") @@ -192,7 +191,7 @@ function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) $(fb!)(true, dst, 0, a, 0, prod(siz[dim+1:nd]), siz[dim], prod(siz[1:dim-1])) end else - $(ker0!)(dst, 0, a, 0, length(a)) + $(ker0!)(dst, 1, a, 1, length(a)) end dst end @@ -261,22 +260,22 @@ function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) global $(fb!) function $(fb!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int) if isinit - $(ker0!)(dst, od, a, oa, n1) + $(ker0!)(dst, od+1, a, oa+1, n1) else - $(ker1!)(dst, od, a, oa, n1) + $(ker1!)(dst, od+1, a, oa+1, n1) end end function $(fb!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int, n2::Int) if isinit - $(ker0!)(dst, od, a, oa, n2) + $(ker0!)(dst, od+1, a, oa+1, n2) else - $(ker1!)(dst, od, a, oa, n2) + $(ker1!)(dst, od+1, a, oa+1, n2) end oa += n2 for j = 2:n1 - $(ker1!)(dst, od, a, oa, n2) + $(ker1!)(dst, od+1, a, oa+1, n2) oa += n2 end end @@ -312,15 +311,19 @@ end function vcopy!(dst::Array, od::Int, a::Array, oa::Int, n::Int) for i = 1:n - @inbounds dst[od+i] = a[oa+i] + @inbounds dst[od] = a[oa] + od += 1 + oa += 1 end end -vcopy!{T}(dst::Array{T}, od::Int, a::Array{T}, oa::Int, n::Int) = copy!(dst, od+1, a, oa+1, n) +vcopy!{T}(dst::Array{T}, od::Int, a::Array{T}, oa::Int, n::Int) = copy!(dst, od, a, oa, n) function vadd!(dst::Array, od::Int, a::Array, oa::Int, n::Int) for i = 1:n - @inbounds dst[od+i] += a[oa+i] + @inbounds dst[od] += a[oa] + od += 1 + oa += 1 end end From c3771fe90d6f2e13f87f18ccd1249f2dbf3cc0c8 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 4 Jan 2014 14:32:28 -0600 Subject: [PATCH 07/12] allow specify the result type of sum. --- base/reducedim.jl | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index 920c6c53b026a..7ba59a8d62dd3 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -328,23 +328,12 @@ function vadd!(dst::Array, od::Int, a::Array, oa::Int, n::Int) end @code_reducedim sum (+) sum_seq vcopy! vadd! -function sum{T<:Number}(a::Array{T}, region) - dst = Array(T, reduced_dims(a, region)) - if !isempty(dst) - if isempty(a) - fill!(dst, zero(T)) - else - sum!(dst, a, region) - end - end - return dst -end -function sum(a::Array{Bool}, region) - dst = Array(Int, reduced_dims(a, region)) +function sum{R}(rt::Type{R}, a::Array, region) + dst = Array(R, reduced_dims(a, region)) if !isempty(dst) if isempty(a) - fill!(dst, 0) + fill!(dst, zero(R)) else sum!(dst, a, region) end @@ -352,3 +341,6 @@ function sum(a::Array{Bool}, region) return dst end +sum{T}(a::Array{T}, region) = sum(T, a, region) +sum(a::Array{Bool}, region) = sum(Int, a, region) + From 483e13f040f5c8c1ef67ccdb4d60c80df9c02335 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 4 Jan 2014 14:41:21 -0600 Subject: [PATCH 08/12] allow minimum and maximum over a subvector --- base/abstractarray.jl | 54 ++++++++++++------------------------------- 1 file changed, 15 insertions(+), 39 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index c55b0ef651c30..2dadc1ceb1364 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1580,45 +1580,19 @@ function prod{T}(A::AbstractArray{T}) v end -function minimum{T<:Real}(A::AbstractArray{T}) - if isempty(A); error("argument must not be empty"); end - v = A[1] - for i=2:length(A) - @inbounds x = A[i] - if x < v - v = x - end - end - v -end -function maximum{T<:Real}(A::AbstractArray{T}) - if isempty(A); error("argument must not be empty"); end - v = A[1] - for i=2:length(A) - @inbounds x = A[i] - if x > v - v = x - end - end - v -end - -# specialized versions for floating-point, which deal with NaNs - -function minimum{T<:FloatingPoint}(A::AbstractArray{T}) - if isempty(A); error("argument must not be empty"); end - n = length(A) +function minimum{T<:Real}(A::AbstractArray{T}, first::Int, last::Int) + if first > last; error("argument range must not be empty"); end # locate the first non NaN number - v = A[1] - i = 2 - while v != v && i <= n + v = A[first] + i = first + 1 + while v != v && i <= last @inbounds v = A[i] i += 1 end - while i <= n + while i <= last @inbounds x = A[i] if x < v v = x @@ -1628,19 +1602,18 @@ function minimum{T<:FloatingPoint}(A::AbstractArray{T}) v end -function maximum{T<:FloatingPoint}(A::AbstractArray{T}) - if isempty(A); error("argument must not be empty"); end - n = length(A) +function maximum{T<:Real}(A::AbstractArray{T}, first::Int, last::Int) + if first > last; error("argument range must not be empty"); end # locate the first non NaN number - v = A[1] - i = 2 - while v != v && i <= n + v = A[first] + i = first + 1 + while v != v && i <= last @inbounds v = A[i] i += 1 end - while i <= n + while i <= last @inbounds x = A[i] if x > v v = x @@ -1650,6 +1623,9 @@ function maximum{T<:FloatingPoint}(A::AbstractArray{T}) v end +minimum{T<:Real}(A::AbstractArray{T}) = minimum(A, 1, length(A)) +maximum{T<:Real}(A::AbstractArray{T}) = maximum(A, 1, length(A)) + # extrema function extrema{T<:Real}(A::AbstractArray{T}) From 54ba4700569a24d0d454824c030b96f92b49daaf Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 4 Jan 2014 14:48:54 -0600 Subject: [PATCH 09/12] apply the new reduction framework to maximum & minimum --- base/reducedim.jl | 24 ++++++++++++++++++++++++ test/reducedim.jl | 6 +++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index 7ba59a8d62dd3..fe969608bf936 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -344,3 +344,27 @@ end sum{T}(a::Array{T}, region) = sum(T, a, region) sum(a::Array{Bool}, region) = sum(Int, a, region) + +# maximum & minimum + +function vmax!(dst::Array, od::Int, a::Array, oa::Int, n::Int) + for i = 1:n + @inbounds dst[od] = scalarmax(dst[od], a[oa]) + od += 1 + oa += 1 + end +end + +function vmin!(dst::Array, od::Int, a::Array, oa::Int, n::Int) + for i = 1:n + @inbounds dst[od] = scalarmin(dst[od], a[oa]) + od += 1 + oa += 1 + end +end + +@code_reducedim maximum scalarmax maximum vcopy! vmax! +@code_reducedim minimum scalarmin minimum vcopy! vmin! + + + diff --git a/test/reducedim.jl b/test/reducedim.jl index 662098305ed6f..eb1b506f4863f 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -32,7 +32,9 @@ # main tests -safe_sum{T}(A::Array{T}, region) = reducedim(+,A,region,zero(T)) +safe_sum{T}(A::Array{T}, region) = reducedim(+, A, region, zero(T)) +safe_maximum{T}(A::Array{T}, region) = reducedim(Base.scalarmax, A, region, typemin(T)) +safe_minimum{T}(A::Array{T}, region) = reducedim(Base.scalarmin, A, region, typemax(T)) Areduc = rand(3, 4, 5, 6) for region in { @@ -40,5 +42,7 @@ for region in { (1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)} @test_approx_eq sum(Areduc, region) safe_sum(Areduc, region) + @test_approx_eq maximum(Areduc, region) safe_maximum(Areduc, region) + @test_approx_eq minimum(Areduc, region) safe_minimum(Areduc, region) end From 3b1a8ead33aba5f1c2fabad8009c631e8126043c Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 4 Jan 2014 15:01:36 -0600 Subject: [PATCH 10/12] apply new reduction framework to prod --- base/abstractarray.jl | 15 +++++++++------ base/reducedim.jl | 26 ++++++++++++++++++++++++++ test/reducedim.jl | 2 ++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 2dadc1ceb1364..2ce2121296ac4 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1569,16 +1569,19 @@ function cumsum_kbn{T<:FloatingPoint}(A::AbstractArray{T}, axis::Integer) return B + C end -function prod{T}(A::AbstractArray{T}) - if isempty(A) + +function prod_rgn{T}(A::AbstractArray{T}, first::Int, last::Int) + if first > last return one(T) end - v = A[1] - for i=2:length(A) - @inbounds v *= A[i] + i = first + v = A[i] + while i < last + @inbounds v *= A[i+=1] end - v + return v end +prod{T}(A::AbstractArray{T}) = prod_rgn(A, 1, length(A)) function minimum{T<:Real}(A::AbstractArray{T}, first::Int, last::Int) diff --git a/base/reducedim.jl b/base/reducedim.jl index fe969608bf936..b1fa2a14f3db2 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -344,6 +344,32 @@ end sum{T}(a::Array{T}, region) = sum(T, a, region) sum(a::Array{Bool}, region) = sum(Int, a, region) +# prod + +function vmultiply!(dst::Array, od::Int, a::Array, oa::Int, n::Int) + for i = 1:n + @inbounds dst[od] *= a[oa] + od += 1 + oa += 1 + end +end + +@code_reducedim prod (*) prod_rgn vcopy! vmultiply! + +function prod{R}(rt::Type{R}, a::Array, region) + dst = Array(R, reduced_dims(a, region)) + if !isempty(dst) + if isempty(a) + fill!(dst, one(R)) + else + prod!(dst, a, region) + end + end + return dst +end + +prod{T}(a::Array{T}, region) = prod(T, a, region) + # maximum & minimum diff --git a/test/reducedim.jl b/test/reducedim.jl index eb1b506f4863f..b66d227d10ab6 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -33,6 +33,7 @@ # main tests safe_sum{T}(A::Array{T}, region) = reducedim(+, A, region, zero(T)) +safe_prod{T}(A::Array{T}, region) = reducedim(*, A, region, one(T)) safe_maximum{T}(A::Array{T}, region) = reducedim(Base.scalarmax, A, region, typemin(T)) safe_minimum{T}(A::Array{T}, region) = reducedim(Base.scalarmin, A, region, typemax(T)) @@ -42,6 +43,7 @@ for region in { (1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)} @test_approx_eq sum(Areduc, region) safe_sum(Areduc, region) + @test_approx_eq prod(Areduc, region) safe_prod(Areduc, region) @test_approx_eq maximum(Areduc, region) safe_maximum(Areduc, region) @test_approx_eq minimum(Areduc, region) safe_minimum(Areduc, region) end From 9ecc3213ece7ecedec5ff8062f2e686e64a6f93f Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 4 Jan 2014 15:10:33 -0600 Subject: [PATCH 11/12] updates to the maximum & minimum codes --- base/abstractarray.jl | 8 ++++---- base/reducedim.jl | 20 +++++++++++++++++--- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 2ce2121296ac4..eb1456b2042b9 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1584,7 +1584,7 @@ end prod{T}(A::AbstractArray{T}) = prod_rgn(A, 1, length(A)) -function minimum{T<:Real}(A::AbstractArray{T}, first::Int, last::Int) +function minimum_rgn{T<:Real}(A::AbstractArray{T}, first::Int, last::Int) if first > last; error("argument range must not be empty"); end # locate the first non NaN number @@ -1605,7 +1605,7 @@ function minimum{T<:Real}(A::AbstractArray{T}, first::Int, last::Int) v end -function maximum{T<:Real}(A::AbstractArray{T}, first::Int, last::Int) +function maximum_rgn{T<:Real}(A::AbstractArray{T}, first::Int, last::Int) if first > last; error("argument range must not be empty"); end # locate the first non NaN number @@ -1626,8 +1626,8 @@ function maximum{T<:Real}(A::AbstractArray{T}, first::Int, last::Int) v end -minimum{T<:Real}(A::AbstractArray{T}) = minimum(A, 1, length(A)) -maximum{T<:Real}(A::AbstractArray{T}) = maximum(A, 1, length(A)) +minimum{T<:Real}(A::AbstractArray{T}) = minimum_rgn(A, 1, length(A)) +maximum{T<:Real}(A::AbstractArray{T}) = maximum_rgn(A, 1, length(A)) # extrema diff --git a/base/reducedim.jl b/base/reducedim.jl index b1fa2a14f3db2..879c45ecc7863 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -14,7 +14,7 @@ reduced_dims{N}(siz::NTuple{N,Int}, d::Int, rd::Int) = d == 1 ? tuple(rd, siz[d+ reduced_dims{N}(siz::NTuple{N,Int}, d::Int) = reduced_dims(siz, d, 1) -reduced_dims0{N}(siz::NTuple{N,Int}, d::Int) = reduced_dims(siz, d, (siz[d] == 0 ? 0 : 1)) +reduced_dims0{N}(siz::NTuple{N,Int}, d::Int) = 1 <= d <= N ? reduced_dims(siz, d, (siz[d] == 0 ? 0 : 1)) : siz function reduced_dims{N}(siz::NTuple{N,Int}, region) rsiz = [siz...] @@ -389,8 +389,22 @@ function vmin!(dst::Array, od::Int, a::Array, oa::Int, n::Int) end end -@code_reducedim maximum scalarmax maximum vcopy! vmax! -@code_reducedim minimum scalarmin minimum vcopy! vmin! +@code_reducedim maximum scalarmax maximum_rgn vcopy! vmax! +@code_reducedim minimum scalarmin minimum_rgn vcopy! vmin! +function maximum{T}(a::Array{T}, region) + dst = Array(T, reduced_dims0(a, region)) + if !isempty(dst) + maximum!(dst, a, region) + end + return dst +end +function minimum{T}(a::Array{T}, region) + dst = Array(T, reduced_dims0(a, region)) + if !isempty(dst) + minimum!(dst, a, region) + end + return dst +end From da2d02c0da44a06f958bc2f73e912ea59175eb71 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 4 Jan 2014 15:32:58 -0600 Subject: [PATCH 12/12] more flexible code generation functions Now the code-gen allows generating reduction functions that may contain more than one input arrays. --- base/reducedim.jl | 79 ++++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index 879c45ecc7863..20f424146d56c 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -163,10 +163,13 @@ function rcompress_dims{N}(siz::NTuple{N,Int}, region) return (isrd[1], sdims) end -function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) +function generate_reducedim_funcs(fname, params, args, sizexpr, comb, sker, ker0!, ker1!) # Parameters: # - # - fname: the interface function name (e.g. sum, maximum) + # - fname: the interface function name (e.g. sum, maximum) + # - params: a list of input parameters in function signatures + # - args: a list of argument symbols + # - sizexpr: the expression that calculates the input size # - comb: the combination operation (e.g. +) # - sker: a kernel function that reduces a vector (or a range of it) to a scalar # - ker0!: a kernel that initializes an accumulator array using the first column of terms @@ -179,32 +182,32 @@ function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) quote global $(fname!) - function $(fname!)(dst::Array, a::Array, dim::Integer) - nd = ndims(a) - siz = size(a) + function $(fname!)(dst::Array, $(params...), dim::Integer) + siz = $(sizexpr) + nd = length(siz) if 1 <= dim <= nd if dim == 1 - $(fa!)(true, dst, 0, a, 0, prod(siz[2:nd]), siz[1]) + $(fa!)(true, dst, 0, $(args...), 0, prod(siz[2:nd]), siz[1]) elseif dim == nd - $(fb!)(true, dst, 0, a, 0, siz[nd], prod(siz[1:nd-1])) + $(fb!)(true, dst, 0, $(args...), 0, siz[nd], prod(siz[1:nd-1])) else - $(fb!)(true, dst, 0, a, 0, prod(siz[dim+1:nd]), siz[dim], prod(siz[1:dim-1])) + $(fb!)(true, dst, 0, $(args...), 0, prod(siz[dim+1:nd]), siz[dim], prod(siz[1:dim-1])) end else - $(ker0!)(dst, 1, a, 1, length(a)) + $(ker0!)(dst, 1, $(args...), 1, prod(siz)) end dst end - function $(fname!)(dst::Array, a::Array, region) + function $(fname!)(dst::Array, $(params...), region) if length(region) == 1 - $(fname!)(dst, a, region[1]) + $(fname!)(dst, $(args...), region[1]) else - isrd1, secs = rcompress_dims(size(a), region) + isrd1, secs = rcompress_dims($(sizexpr), region) if isrd1 - $(fa!)(true, dst, 0, a, 0, secs[end:-1:1]...) + $(fa!)(true, dst, 0, $(args...), 0, secs[end:-1:1]...) else - $(fb!)(true, dst, 0, a, 0, secs[end:-1:1]...) + $(fb!)(true, dst, 0, $(args...), 0, secs[end:-1:1]...) end end dst @@ -212,44 +215,44 @@ function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) # $(fa!) global $(fa!) - function $(fa!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int) + function $(fa!)(isinit::Bool, dst::Array, od::Int, $(params...), oa::Int, n1::Int) if isinit - dst[od+1] = $(sker)(a, oa+1, oa+n1) + dst[od+1] = $(sker)($(args...), oa+1, oa+n1) else - dst[od+1] = $(comb)(dst[od+1], $(sker)(a, oa+1, oa+n1)) + dst[od+1] = $(comb)(dst[od+1], $(sker)($(args...), oa+1, oa+n1)) end end - function $(fa!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int, n2::Int) + function $(fa!)(isinit::Bool, dst::Array, od::Int, $(params...), oa::Int, n1::Int, n2::Int) if isinit for j = 1:n1 alast = oa + n2 - dst[od+j] = $(sker)(a, oa+1, alast) + dst[od+j] = $(sker)($(args...), oa+1, alast) oa = alast end else for j = 1:n1 alast = oa + n2 - dst[od+j] = $(comb)(dst[od+j], $(sker)(a, oa+1, alast)) + dst[od+j] = $(comb)(dst[od+j], $(sker)($(args...), oa+1, alast)) oa = alast end end end - function $(fa!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int, n2::Int, n3::Int, ns::Int...) + function $(fa!)(isinit::Bool, dst::Array, od::Int, $(params...), oa::Int, n1::Int, n2::Int, n3::Int, ns::Int...) as::Int = *(n2, n3, ns...) if length(ns) & 1 == 0 - $(fa!)(isinit, dst, od, a, oa, n2, n3, ns...) + $(fa!)(isinit, dst, od, $(args...), oa, n2, n3, ns...) oa += as for j = 2:n1 - $(fa!)(false, dst, od, a, oa, n2, n3, ns...) + $(fa!)(false, dst, od, $(args...), oa, n2, n3, ns...) oa += as end else ds::Int = *(n3, ns[2:2:end]...) for j = 1:n1 - $(fa!)(isinit, dst, od, a, oa, n2, n3, ns...) + $(fa!)(isinit, dst, od, $(args...), oa, n2, n3, ns...) od += ds oa += as end @@ -258,43 +261,42 @@ function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) # $(fb!) global $(fb!) - function $(fb!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int) + function $(fb!)(isinit::Bool, dst::Array, od::Int, $(params...), oa::Int, n1::Int) if isinit - $(ker0!)(dst, od+1, a, oa+1, n1) + $(ker0!)(dst, od+1, $(args...), oa+1, n1) else - $(ker1!)(dst, od+1, a, oa+1, n1) + $(ker1!)(dst, od+1, $(args...), oa+1, n1) end end - function $(fb!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int, n2::Int) + function $(fb!)(isinit::Bool, dst::Array, od::Int, $(params...), oa::Int, n1::Int, n2::Int) if isinit - $(ker0!)(dst, od+1, a, oa+1, n2) + $(ker0!)(dst, od+1, $(args...), oa+1, n2) else - $(ker1!)(dst, od+1, a, oa+1, n2) + $(ker1!)(dst, od+1, $(args...), oa+1, n2) end oa += n2 - for j = 2:n1 - $(ker1!)(dst, od+1, a, oa+1, n2) + $(ker1!)(dst, od+1, $(args...), oa+1, n2) oa += n2 end end - function $(fb!)(isinit::Bool, dst::Array, od::Int, a::Array, oa::Int, n1::Int, n2::Int, n3::Int, ns::Int...) + function $(fb!)(isinit::Bool, dst::Array, od::Int, $(params...), oa::Int, n1::Int, n2::Int, n3::Int, ns::Int...) as = *(n2, n3, ns...) if length(ns) & 1 == 0 ds::Int = *(n3, ns[2:2:end]...) for j = 1:n1 - $(fb!)(isinit, dst, od, a, oa, n2, n3, ns...) + $(fb!)(isinit, dst, od, $(args...), oa, n2, n3, ns...) od += ds oa += as end else - $(fb!)(isinit, dst, od, a, oa, n2, n3, ns...) + $(fb!)(isinit, dst, od, $(args...), oa, n2, n3, ns...) oa += as for j = 2:n1 - $(fb!)(false, dst, od, a, oa, n2, n3, ns...) + $(fb!)(false, dst, od, $(args...), oa, n2, n3, ns...) oa += as end end @@ -302,6 +304,11 @@ function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) end end +function generate_reducedim_funcs(fname, comb, sker, ker0!, ker1!) + # specialized method to generate functions with single input arguments + generate_reducedim_funcs(fname, [:(a::Array)], [:a], :(size(a)), comb, sker, ker0!, ker1!) +end + macro code_reducedim(fname, comb, sker, ker0, ker1) esc(generate_reducedim_funcs(fname, comb, sker, ker0, ker1)) end