Implement wsum!/wsum: weighted sums without constructing a WeightVec

JuliaStats · May 25, 2014 · 1ab08cb · 1ab08cb
1 parent 7202bbd
commit 1ab08cb
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 17 deletions.
diff --git a/src/StatsBase.jl b/src/StatsBase.jl
@@ -13,6 +13,8 @@ module StatsBase
     harmmean,    # harmonic mean
     trimmean,    # trimmed mean
     wmean,       # weighted mean
+    wsum,        # weighted sum with vector as second argument
+    wsum!,       # in-place weighted sum across dimensions
 
     # scalar_stats 
     skewness,   # (standardized) skewness

diff --git a/src/means.jl b/src/means.jl
@@ -44,10 +44,12 @@ end
 
 # Weighted means
 
-sum(v::BitArray, w::WeightVec) = dot(vec(v), values(w))
-sum(v::SparseMatrixCSC, w::WeightVec) = dot(vec(v), values(w))
-sum(v::AbstractArray, w::WeightVec) = dot(vec(v), values(w))
-mean(v::AbstractArray, w::WeightVec) = sum(v, w) / sum(w)
+# 1D weighted sum/mean
+wsum(v::AbstractArray, w::AbstractVector) = dot(vec(v), w)
+Base.sum(v::BitArray, w::WeightVec) = wsum(v, values(w))
+Base.sum(v::SparseMatrixCSC, w::WeightVec) = wsum(v, values(w))
+Base.sum(v::AbstractArray, w::WeightVec) = wsum(v, values(w))
+Base.mean(v::AbstractArray, w::WeightVec) = sum(v, w) / sum(w)
 
 function wmean{T<:Number}(v::AbstractArray{T}, w::AbstractArray)
     Base.depwarn("wmean is deprecated, use mean(v, weights(w)) instead.", :wmean)
@@ -56,17 +58,16 @@ end
 
 # General Cartesian-based weighted sum across dimensions
 import Base.Cartesian: @ngenerate, @nloops, @nref
-@ngenerate N typeof(r) function Base.sum!{T,N,S,W<:Real}(r::AbstractArray{T,N}, v::AbstractArray{S,N},
-                                                         w::WeightVec{W}, dim::Int)
-    vals = values(w)
-    1 <= dim <= N || error("dim = $dim not in range (1,$N)")
+@ngenerate N typeof(r) function wsum!{T,N,S,W<:Real}(r::AbstractArray{T,N}, v::AbstractArray{S,N},
+                                                     w::AbstractVector{W}, dim::Int)
+    1 <= dim <= N || error("dim = $dim not in range [1,$N]")
     for i = 1:N
-        (i == dim && size(r, i) == 1 && size(v, i) == length(vals)) || size(r, i) == size(v, i) || error(DimensionMismatch(""))
+        (i == dim && size(r, i) == 1 && size(v, i) == length(w)) || size(r, i) == size(v, i) || error(DimensionMismatch(""))
     end
     fill!(r, 0)
     weight = zero(W)
     @nloops N i v d->(if d == dim
-                           weight = vals[i_d]
+                           weight = w[i_d]
                            j_d = 1
                        else
                            j_d = i_d
@@ -78,29 +79,34 @@ end
 # dimensions of compatible arrays. `vec` and `reshape` are only
 # guaranteed not to make a copy for Arrays, so only supports Arrays if
 # these calls may be necessary.
-function Base.sum!{W<:Real}(r::Union(Array, AbstractVector), v::Union(Array, AbstractMatrix), w::WeightVec{W}, dim::Int)
+function wsum!{W<:Real}(r::Union(Array, AbstractVector), v::Union(Array, AbstractMatrix), w::AbstractVector{W}, dim::Int)
     if dim == 1
         m = size(v, 1)
         n = div(length(v), m)
         (length(r) == n && length(w) == m) || throw(DimensionMismatch(""))
-        At_mul_B!(vec(r), isa(v, AbstractMatrix) ? v : reshape(v, m, n), values(w))
+        At_mul_B!(vec(r), isa(v, AbstractMatrix) ? v : reshape(v, m, n), w)
     elseif dim == ndims(v)
         n = size(v, ndims(v))
         m = div(length(v), n)
         (length(r) == m && length(w) == n) || throw(DimensionMismatch(""))
-        A_mul_B!(vec(r), isa(v, AbstractMatrix) ? v : reshape(v, m, n), values(w))
+        A_mul_B!(vec(r), isa(v, AbstractMatrix) ? v : reshape(v, m, n), w)
     else
-        invoke(sum!, (AbstractArray, AbstractArray, typeof(w), Int), r, v, w, dim)
+        invoke(wsum!, (AbstractArray, AbstractArray, typeof(w), Int), r, v, w, dim)
     end
     r
 end
 
+Base.sum!{W<:Real}(r::AbstractArray, v::AbstractArray, w::WeightVec{W}, dim::Int) =
+    wsum!(r, v, values(w), dim)
+
+wsum{T<:Number,W<:Real}(v::AbstractArray{T}, w::AbstractVector{W}, dim::Int) =
+    wsum!(Array(typeof(zero(T)*zero(W) + zero(T)*zero(W)), Base.reduced_dims(size(v), dim)), v, w, dim)
+
+Base.sum{T<:Number,W<:Real}(v::AbstractArray{T}, w::WeightVec{W}, dim::Int) = wsum(v, values(w), dim)
+
 Base.mean!(r::AbstractArray, v::AbstractArray, w::WeightVec, dim::Int) =
     scale!(Base.sum!(r, v, w, dim), inv(sum(w)))
 
-Base.sum{T<:Number,W<:Real}(v::AbstractArray{T}, w::WeightVec{W}, dim::Int) =
-    sum!(Array(typeof(zero(T)*zero(W) + zero(T)*zero(W)), Base.reduced_dims(size(v), dim)), v, w, dim)
-
 Base.mean{T<:Number,W<:Real}(v::AbstractArray{T}, w::WeightVec{W}, dim::Int) =
     mean!(Array(typeof((zero(T)*zero(W) + zero(T)*zero(W)) / one(W)), Base.reduced_dims(size(v), dim)), v, w, dim)
 
diff --git a/test/means.jl b/test/means.jl
@@ -45,6 +45,9 @@ a = [1. 2. 3.; 4. 5. 6.]
 @test_approx_eq sum(a, weights([0.0, 1.0]), 1) [4.0, 5.0, 6.0]
 
 @test size(mean(a, weights(ones(3)), 2)) == (2, 1)
+@test_approx_eq wsum!(zeros(1, 2), a, [1.0, 1.0, 1.0], 2) [6.0 15.0]
+@test_approx_eq wsum(a, [1.0, 1.0, 1.0], 2) [6.0 15.0]
+@test_approx_eq sum!(zeros(1, 2), a, weights([1.0, 1.0, 1.0]), 2) [6.0 15.0]
 @test_approx_eq sum(a, weights([1.0, 1.0, 1.0]), 2) [6.0 15.0]
 @test_approx_eq mean(a, weights([1.0, 1.0, 1.0]), 2) [2.0 5.0]
 @test_approx_eq sum(a, weights([1.0, 0.0, 0.0]), 2) [1.0 4.0]