From 3daa183c909211e132399bdfe1417b51437d5b3b Mon Sep 17 00:00:00 2001
From: Jan Weidner <jw3126@gmail.com>
Date: Wed, 23 Nov 2016 21:21:32 +0100
Subject: [PATCH] Add accumulate, accumulate! (#18931)

---
 NEWS.md                  |   4 +
 base/abstractarray.jl    |   2 +-
 base/arraymath.jl        |  46 ---------
 base/deprecated.jl       |   4 +
 base/exports.jl          |   4 +-
 base/multidimensional.jl | 197 +++++++++++++++++++++++++++++----------
 doc/stdlib/arrays.rst    |  26 ++++++
 test/arrayops.jl         |  58 ++++++++++++
 test/reduce.jl           |   7 --
 9 files changed, 245 insertions(+), 103 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 76b008ffa5eaa..c857562810e93 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -60,6 +60,8 @@ Library improvements
     you can now do e.g. `[A I]` and it will concatenate an appropriately sized
     identity matrix ([#19305]).
 
+  * New `accumulate` and `accumulate!` functions, which generalize `cumsum` and `cumprod`. Also known as a [scan](https://en.wikipedia.org/wiki/Prefix_sum) operation ([#18931]).
+
 Compiler/Runtime improvements
 -----------------------------
 
@@ -76,6 +78,8 @@ Deprecated or removed
 
   * `Dates.recur` has been deprecated in favor of `filter` ([#19288])
 
+  * `cummin` and `cummax` have been deprecated in favor of `accumulate`.
+
 Julia v0.5.0 Release Notes
 ==========================
 
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 206e99f257a1e..ffc678b813c43 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -1415,7 +1415,7 @@ function typed_hvcat{T}(::Type{T}, rows::Tuple{Vararg{Int}}, as...)
     T[rs...;]
 end
 
-## Reductions and scans ##
+## Reductions and accumulates ##
 
 function isequal(A::AbstractArray, B::AbstractArray)
     if A === B return true end
diff --git a/base/arraymath.jl b/base/arraymath.jl
index 22a6c6db6f17d..8cb00d37dbe9b 100644
--- a/base/arraymath.jl
+++ b/base/arraymath.jl
@@ -488,49 +488,3 @@ ctranspose{T<:Real}(A::AbstractVecOrMat{T}) = transpose(A)
 
 transpose(x::AbstractVector) = [ transpose(v) for i=of_indices(x, OneTo(1)), v in x ]
 ctranspose{T}(x::AbstractVector{T}) = T[ ctranspose(v) for i=of_indices(x, OneTo(1)), v in x ]
-
-# see discussion in #18364 ... we try not to widen type of the resulting array
-# from cumsum or cumprod, but in some cases (+, Bool) we may not have a choice.
-rcum_promote_type{T<:Number}(op, ::Type{T}) = promote_op(op, T)
-rcum_promote_type{T}(op, ::Type{T}) = T
-
-# handle sums of Vector{Bool} and similar.   it would be nice to handle
-# any AbstractArray here, but it's not clear how that would be possible
-rcum_promote_type{T,N}(op, ::Type{Array{T,N}}) = Array{rcum_promote_type(op,T), N}
-
-for (f, f!, fp, op) = ((:cumsum, :cumsum!, :cumsum_pairwise!, :+),
-                       (:cumprod, :cumprod!, :cumprod_pairwise!, :*) )
-    # in-place cumsum of c = s+v[range(i1,n)], using pairwise summation
-    @eval function ($fp){T}(v::AbstractVector, c::AbstractVector{T}, s, i1, n)
-        local s_::T # for sum(v[range(i1,n)]), i.e. sum without s
-        if n < 128
-            @inbounds s_ = v[i1]
-            @inbounds c[i1] = ($op)(s, s_)
-            for i = i1+1:i1+n-1
-                @inbounds s_ = $(op)(s_, v[i])
-                @inbounds c[i] = $(op)(s, s_)
-            end
-        else
-            n2 = n >> 1
-            s_ = ($fp)(v, c, s, i1, n2)
-            s_ = $(op)(s_, ($fp)(v, c, ($op)(s, s_), i1+n2, n-n2))
-        end
-        return s_
-    end
-
-    @eval function ($f!)(result::AbstractVector, v::AbstractVector)
-        li = linearindices(v)
-        li != linearindices(result) && throw(DimensionMismatch("input and output array sizes and indices must match"))
-        n = length(li)
-        if n == 0; return result; end
-        i1 = first(li)
-        @inbounds result[i1] = v1 = v[i1]
-        n == 1 && return result
-        ($fp)(v, result, v1, i1+1, n-1)
-        return result
-    end
-
-    @eval function ($f){T}(v::AbstractVector{T})
-        return ($f!)(similar(v, rcum_promote_type($op, T)), v)
-    end
-end
diff --git a/base/deprecated.jl b/base/deprecated.jl
index fd98b9e0eaf9b..c1bc89a844e40 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -145,6 +145,10 @@ end
 @deprecate chol(A::Number, ::Type{Val{:L}})         ctranspose(chol(A))
 @deprecate chol(A::AbstractMatrix, ::Type{Val{:L}}) ctranspose(chol(A))
 
+@deprecate cummin(A, dim=1) accumulate(min, A, dim=1)
+@deprecate cummax(A, dim=1) accumulate(max, A, dim=1)
+
+
 # Number updates
 
 # rem1 is inconsistent for x==0: The result should both have the same
diff --git a/base/exports.jl b/base/exports.jl
index 56a0af9a33353..e2b2e061e03e5 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -497,12 +497,12 @@ export
     colon,
     conj!,
     copy!,
-    cummax,
-    cummin,
     cumprod,
     cumprod!,
     cumsum,
     cumsum!,
+    accumulate,
+    accumulate!,
     cumsum_kbn,
     eachindex,
     extrema,
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index 6532e04df5101..abf38cd25fd79 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -481,49 +481,62 @@ end
     end
 end
 
-for (f, fmod, op) = ((:cummin, :_cummin!, :min), (:cummax, :_cummax!, :max))
-    @eval function ($f)(v::AbstractVector)
-        n = length(v)
-        cur_val = v[1]
-        res = similar(v, n)
-        res[1] = cur_val
-        for i in 2:n
-            cur_val = ($op)(v[i], cur_val)
-            res[i] = cur_val
-        end
-        return res
-    end
 
-    @eval function ($f)(A::AbstractArray, axis::Integer)
-        axis > 0 || throw(ArgumentError("axis must be a positive integer"))
-        res = similar(A)
-        axis > ndims(A) && return copy!(res, A)
-        inds = indices(A)
-        if isempty(inds[axis])
-            return res
+# see discussion in #18364 ... we try not to widen type of the resulting array
+# from cumsum or cumprod, but in some cases (+, Bool) we may not have a choice.
+rcum_promote_type{T,S<:Number}(op, ::Type{T}, ::Type{S}) = promote_op(op, T, S)
+rcum_promote_type{T<:Number}(op, ::Type{T}) = rcum_promote_type(op, T,T)
+rcum_promote_type{T}(op, ::Type{T}) = T
+
+# handle sums of Vector{Bool} and similar.   it would be nice to handle
+# any AbstractArray here, but it's not clear how that would be possible
+rcum_promote_type{T,N}(op, ::Type{Array{T,N}}) = Array{rcum_promote_type(op,T), N}
+
+# accumulate_pairwise slightly slower then accumulate, but more numerically
+# stable in certain situtations (e.g. sums).
+# it does double the number of operations compared to accumulate,
+# though for cheap operations like + this does not have much impact (20%)
+function _accumulate_pairwise!{T, Op}(op::Op, c::AbstractVector{T}, v::AbstractVector, s, i1, n)::T
+    @inbounds if n < 128
+        s_ = v[i1]
+        c[i1] = op(s, s_)
+        for i = i1+1:i1+n-1
+            s_ = op(s_, v[i])
+            c[i] = op(s, s_)
         end
-        R1 = CartesianRange(inds[1:axis-1])
-        R2 = CartesianRange(inds[axis+1:end])
-        ($fmod)(res, A, R1, R2, axis)
+    else
+        n2 = n >> 1
+        s_ = _accumulate_pairwise!(op, c, v, s, i1, n2)
+        s_ = op(s_, _accumulate_pairwise!(op, c, v, op(s, s_), i1+n2, n-n2))
     end
+    return s_
+end
 
-    @eval @noinline function ($fmod)(res, A::AbstractArray, R1::CartesianRange, R2::CartesianRange, axis::Integer)
-        inds = indices(A, axis)
-        i1 = first(inds)
-        for I2 in R2
-            for I1 in R1
-                res[I1, i1, I2] = A[I1, i1, I2]
-            end
-            for i = i1+1:last(inds)
-                for I1 in R1
-                    res[I1, i, I2] = ($op)(A[I1, i, I2], res[I1, i-1, I2])
-                end
-            end
-        end
-        res
-    end
+function accumulate_pairwise!{Op}(op::Op, result::AbstractVector, v::AbstractVector)
+    li = linearindices(v)
+    li != linearindices(result) && throw(DimensionMismatch("input and output array sizes and indices must match"))
+    n = length(li)
+    n == 0 && return result
+    i1 = first(li)
+    @inbounds result[i1] = v1 = v[i1]
+    n == 1 && return result
+    _accumulate_pairwise!(op, result, v, v1, i1+1, n-1)
+    return result
+end
+
+function accumulate_pairwise{T}(op, v::AbstractVector{T})
+    out = similar(v, rcum_promote_type(op, T))
+    return accumulate_pairwise!(op, out, v)
+end
 
-    @eval ($f)(A::AbstractArray) = ($f)(A, 1)
+function cumsum!(out, v::AbstractVector, axis::Integer=1)
+    # for types prone to numerical stability issues, we want
+    # accumulate_pairwise.
+    axis == 1 ? accumulate_pairwise!(+, out, v) : copy!(out,v)
+end
+
+function cumsum!{T <: Integer}(out, v::AbstractVector{T}, axis::Integer=1)
+    axis == 1 ? accumulate!(+, out, v) : copy!(out,v)
 end
 
 """
@@ -550,8 +563,12 @@ julia> cumsum(a,2)
  4  9  15
 ```
 """
-cumsum{T}(A::AbstractArray{T}, axis::Integer=1) =  cumsum!(similar(A, rcum_promote_type(+, T)), A, axis)
-cumsum!(B, A::AbstractArray) = cumsum!(B, A, 1)
+function cumsum{T}(A::AbstractArray{T}, axis::Integer=1)
+    out = similar(A, rcum_promote_type(+, T))
+    cumsum!(out, A, axis)
+end
+cumsum!(B, A, axis::Integer=1) = accumulate!(+, B, A, axis)
+
 """
     cumprod(A, dim=1)
 
@@ -576,13 +593,99 @@ julia> cumprod(a,2)
  4  20  120
 ```
 """
-cumprod(A::AbstractArray, axis::Integer=1) = cumprod!(similar(A), A, axis)
-cumprod!(B, A) = cumprod!(B, A, 1)
+cumprod(A::AbstractArray, axis::Integer=1) = accumulate(*, A, axis)
+cumprod!(B, A, axis::Integer=1) = accumulate!(*, B, A, axis)
+
+"""
+    accumulate(op, A, dim=1)
 
-cumsum!(B, A, axis::Integer) = cumop!(+, B, A, axis)
-cumprod!(B, A, axis::Integer) = cumop!(*, B, A, axis)
+Cumulative operation `op` along a dimension `dim` (defaults to 1). See also
+[`accumulate!`](:func:`accumulate!`) to use a preallocated output array, both for performance and
+to control the precision of the output (e.g. to avoid overflow). For common operations
+there are specialized variants of `accumulate`, see:
+[`cumsum`](:func:`cumsum`), [`cumprod`](:func:`cumprod`)
+
+```jldoctest
+julia> accumulate(+, [1,2,3])
+3-element Array{Int64,1}:
+ 1
+ 3
+ 6
+
+julia> accumulate(*, [1,2,3])
+3-element Array{Int64,1}:
+ 1
+ 2
+ 6
+```
+"""
+function accumulate(op, A, axis::Integer=1)
+    out = similar(A, rcum_promote_type(op, eltype(A)))
+    accumulate!(op, out, A, axis)
+end
 
-function cumop!(op, B, A, axis::Integer)
+
+"""
+    accumulate(op, v0, A)
+
+Like `accumulate`, but using a starting element `v0`. The first entry of the result will be
+`op(v0, first(A))`. For example:
+
+```jldoctest
+julia> accumulate(+, 100, [1,2,3])
+3-element Array{Int64,1}:
+ 101
+ 103
+ 106
+
+ julia> accumulate(min, 0, [1,2,-1])
+ 3-element Array{Int64,1}:
+   0
+   0
+  -1
+```
+"""
+function accumulate(op, v0, A, axis::Integer=1)
+    T = rcum_promote_type(op, typeof(v0), eltype(A))
+    out = similar(A, T)
+    accumulate!(op, out, v0, A, 1)
+end
+
+function accumulate!{Op}(op::Op, B, A::AbstractVector, axis::Integer=1)
+    isempty(A) && return B
+    v1 = first(A)
+    _accumulate1!(op, B, v1, A, axis)
+end
+
+function accumulate!(op, B, v0, A::AbstractVector, axis::Integer=1)
+    isempty(A) && return B
+    v1 = op(v0, first(A))
+    _accumulate1!(op, B, v1, A, axis)
+end
+
+
+function _accumulate1!(op, B, v1, A::AbstractVector, axis::Integer=1)
+    axis > 0 || throw(ArgumentError("axis must be a positive integer"))
+    inds = linearindices(A)
+    inds == linearindices(B) || throw(DimensionMismatch("linearindices of A and B don't match"))
+    axis > 1 && return copy!(B, A)
+    i1 = inds[1]
+    cur_val = v1
+    B[i1] = cur_val
+    @inbounds for i in inds[2:end]
+        cur_val = op(cur_val, A[i])
+        B[i] = cur_val
+    end
+    return B
+end
+
+"""
+    accumulate!(op, B, A, dim=1)
+
+Cumulative operation `op` on `A` along a dimension, storing the result in `B`. The dimension defaults to 1.
+See also [`accumulate`](:func:`accumulate`).
+"""
+function accumulate!(op, B, A, axis::Integer=1)
     axis > 0 || throw(ArgumentError("axis must be a positive integer"))
     inds_t = indices(A)
     indices(B) == inds_t || throw(DimensionMismatch("shape of B must match A"))
@@ -603,12 +706,12 @@ function cumop!(op, B, A, axis::Integer)
     else
         R1 = CartesianRange(indices(A)[1:axis-1])   # not type-stable
         R2 = CartesianRange(indices(A)[axis+1:end])
-        _cumop!(op, B, A, R1, inds_t[axis], R2) # use function barrier
+        _accumulate!(op, B, A, R1, inds_t[axis], R2) # use function barrier
     end
     return B
 end
 
-@noinline function _cumop!(op, B, A, R1, ind, R2)
+@noinline function _accumulate!(op, B, A, R1, ind, R2)
     # Copy the initial element in each 1d vector along dimension `axis`
     i = first(ind)
     @inbounds for J in R2, I in R1
diff --git a/doc/stdlib/arrays.rst b/doc/stdlib/arrays.rst
index dad5555044bfa..a7d1559e1d8e6 100644
--- a/doc/stdlib/arrays.rst
+++ b/doc/stdlib/arrays.rst
@@ -1499,6 +1499,32 @@ Indexing, Assignment, and Concatenation
 Array functions
 ---------------
 
+.. function:: accumulate(op, A, dim=1)
+
+   .. Docstring generated from Julia source
+
+   Cumulative operation ``op`` along a dimension ``dim`` (defaults to 1). See also :func:`accumulate!` to use a preallocated output array, both for performance and to control the precision of the output (e.g. to avoid overflow). For common operations there are specialized variants of accumulate, see: :func:`cumsum`\ , :func:`cumprod`
+
+   .. doctest::
+
+       julia> accumulate(+, [1,2,3])
+       3-element Array{Int64,1}:
+        1
+        3
+        6
+
+       julia> accumulate(*, [1,2,3])
+       3-element Array{Int64,1}:
+        1
+        2
+        6
+
+.. function:: accumulate!(op, B, A, dim=1)
+
+   .. Docstring generated from Julia source
+
+   Cumulative operation ``op`` on ``A`` along a dimension, storing the result in ``B``\ . The dimension defaults to 1. See also :func:`accumulate`\ .
+
 .. function:: cumprod(A, dim=1)
 
    .. Docstring generated from Julia source
diff --git a/test/arrayops.jl b/test/arrayops.jl
index 340173edc9aca..e81eaf44c8796 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -1868,3 +1868,61 @@ end
         @test size(a) == size(b)
     end
 end
+
+isdefined(Main, :TestHelpers) || eval(Main, :(include("TestHelpers.jl")))
+using TestHelpers.OAs
+
+@testset "accumulate, accumulate!" begin
+
+    @test accumulate(+, [1,2,3]) == [1, 3, 6]
+    @test accumulate(min, [1 2; 3 4], 1) == [1 2; 1 2]
+    @test accumulate(max, [1 2; 3 0], 2) == [1 2; 3 3]
+    @test accumulate(+, Bool[]) == Int[]
+    @test accumulate(*, Bool[]) == Bool[]
+    @test accumulate(+, Float64[]) == Float64[]
+
+    @test accumulate(min, [1, 2, 5, -1, 3, -2]) == [1, 1, 1, -1, -1, -2]
+    @test accumulate(max, [1, 2, 5, -1, 3, -2]) == [1, 2, 5, 5, 5, 5]
+
+    @test accumulate(max, [1 0; 0 1], 1) == [1 0; 1 1]
+    @test accumulate(max, [1 0; 0 1], 2) == [1 1; 0 1]
+    @test accumulate(min, [1 0; 0 1], 1) == [1 0; 0 0]
+    @test accumulate(min, [1 0; 0 1], 2) == [1 0; 0 0]
+
+    @test isa(accumulate(+,     Int[]) , Vector{Int})
+    @test isa(accumulate(+, 1., Int[]) , Vector{Float64})
+    @test accumulate(+, 1, [1,2]) == [2, 4]
+    arr = randn(4)
+    @test accumulate(*, 1, arr) ≈ accumulate(*, arr)
+
+    N = 5
+    for arr in [rand(Float64, N), rand(Bool, N), rand(-2:2, N)]
+        for (op, cumop) in [(+, cumsum), (*, cumprod)]
+            @inferred accumulate(op, arr)
+            accumulate_arr = accumulate(op, arr)
+            @test accumulate_arr ≈ cumop(arr)
+            @test accumulate_arr[end] ≈ reduce(op, arr)
+            @test accumulate_arr[1] ≈ arr[1]
+            @test accumulate(op, arr, 10) ≈ arr
+
+            if eltype(arr) in [Int, Float64] # eltype of out easy
+                out = similar(arr)
+                @test accumulate!(op, out, arr) ≈ accumulate_arr
+                @test out ≈ accumulate_arr
+            end
+        end
+    end
+
+    # exotic indexing
+    arr = randn(4)
+    oarr = OffsetArray(arr, (-3,))
+    @test accumulate(+, oarr).parent == accumulate(+, arr)
+
+    @inferred accumulate(+, randn(3))
+    @inferred accumulate(+, 1, randn(3))
+
+    # asymmetric operation
+    op(x,y) = 2x+y
+    @test accumulate(op, [10,20, 30]) == [10, op(10, 20), op(op(10, 20), 30)] == [10, 40, 110]
+    @test accumulate(op, [10 20 30], 2) == [10 op(10, 20) op(op(10, 20), 30)] == [10 40 110]
+end
diff --git a/test/reduce.jl b/test/reduce.jl
index f1a8f3d41c9ef..207e57684a95a 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -268,13 +268,6 @@ let es = sum_kbn(z), es2 = sum_kbn(z[1:10^5])
     @test (es2 - cs[10^5]) < es2 * 1e-13
 end
 
-@test isequal(cummin([1, 2, 5, -1, 3, -2]), [1, 1, 1, -1, -1, -2])
-@test isequal(cummax([1, 2, 5, -1, 3, -2]), [1, 2, 5, 5, 5, 5])
-
-@test isequal(cummax([1 0; 0 1], 1), [1 0; 1 1])
-@test isequal(cummax([1 0; 0 1], 2), [1 1; 0 1])
-@test isequal(cummin([1 0; 0 1], 1), [1 0; 0 0])
-@test isequal(cummin([1 0; 0 1], 2), [1 0; 0 0])
 
 @test sum(collect(map(UInt8,0:255))) == 32640
 @test sum(collect(map(UInt8,254:255))) == 509