Merge pull request #13465 from JuliaLang/anj/cov

Make cov and cor similar to mean and var by removing keyword arguments.
JuliaLang · Oct 8, 2015 · 38299b7 · 38299b7
2 parents 90bce78 + c661ef6
commit 38299b7
Show file tree

Hide file tree

Showing 7 changed files with 165 additions and 144 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -26,6 +26,8 @@ Library improvements
   * The functions `remotecall`, `remotecall_fetch`, and `remotecall_wait` now have the
     function argument as the first argument to allow for do-block syntax ([#13338]).
 
+  * `cov` and `cor` don't use keyword arguments anymore and are therefore now type stable ([#13465]).
+
 Deprecated or removed
 ---------------------
 

diff --git a/base/deprecated.jl b/base/deprecated.jl
@@ -841,4 +841,14 @@ for f in (:remotecall, :remotecall_fetch, :remotecall_wait)
         @deprecate ($f)(w::Worker, f::Function, args...)          ($f)(f, w::Worker, args...)
         @deprecate ($f)(id::Integer, f::Function, args...)        ($f)(f, id::Integer, args...)
     end
-end
+end
+
+@deprecate cov(x::AbstractVector; corrected=true, mean=nothing) covm(x, mean, corrected)
+@deprecate cov(X::AbstractMatrix; vardim=1, corrected=true, mean=nothing) covm(X, mean, vardim, corrected)
+@deprecate cov(x::AbstractVector, y::AbstractVector; corrected=true, mean=nothing) covm(x, mean[1], y, mean[2], corrected)
+@deprecate cov(X::AbstractVecOrMat, Y::AbstractVecOrMat; vardim=1, corrected=true, mean=nothing) covm(X, mean[1], Y, mean[2], vardim, corrected)
+
+@deprecate cor(x::AbstractVector; mean=nothing) corm(x, mean)
+@deprecate cor(X::AbstractMatrix; vardim=1, mean=nothing) corm(X, mean, vardim)
+@deprecate cor(x::AbstractVector, y::AbstractVector; mean=nothing) corm(x, mean[1], y, mean[2])
+@deprecate cor(X::AbstractVecOrMat, Y::AbstractVecOrMat; vardim=1, mean=nothing) corm(X, mean[1], Y, mean[2], vardim)
diff --git a/base/docs/helpdb.jl b/base/docs/helpdb.jl
@@ -1140,15 +1140,6 @@ Get the precision of a floating point number, as defined by the effective number
 """
 precision
 
-doc"""
-    cor(v1[, v2][, vardim=1, mean=nothing])
-
-Compute the Pearson correlation between the vector(s) in `v1` and `v2`.
-
-Users can use the keyword argument `vardim` to specify the variable dimension, and `mean` to supply pre-computed mean values.
-"""
-cor
-
 doc"""
     partitions(n)
 
@@ -9003,23 +8994,6 @@ The process was stopped by a terminal interrupt (CTRL+C).
 """
 InterruptException
 
-doc"""
-    cov(v1[, v2][, vardim=1, corrected=true, mean=nothing])
-
-Compute the Pearson covariance between the vector(s) in `v1` and `v2`. Here, `v1` and `v2` can be either vectors or matrices.
-
-This function accepts three keyword arguments:
-
-- `vardim`: the dimension of variables. When `vardim = 1`, variables are considered in columns while observations in rows; when `vardim = 2`, variables are in rows while observations in columns. By default, it is set to `1`.
-- `corrected`: whether to apply Bessel's correction (divide by `n-1` instead of `n`). By default, it is set to `true`.
-- `mean`: allow users to supply mean values that are known. By default, it is set to `nothing`, which indicates that the mean(s) are unknown, and the function will compute the mean. Users can use `mean=0` to indicate that the input data are centered, and hence there's no need to subtract the mean.
-
-The size of the result depends on the size of `v1` and `v2`. When both `v1` and `v2` are vectors, it returns the covariance between them as a scalar. When either one is a matrix, it returns a covariance matrix of size `(n1, n2)`, where `n1` and `n2` are the numbers of slices in `v1` and `v2`, which depend on the setting of `vardim`.
-
-Note: `v2` can be omitted, which indicates `v2 = v1`.
-"""
-cov
-
 doc"""
     den(x)
 

diff --git a/base/statistics.jl b/base/statistics.jl
@@ -261,66 +261,67 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
 
 # covzm (with centered data)
 
-covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
-
-covzm(x::AbstractMatrix; vardim::Int=1, corrected::Bool=true) =
+covzm(x::AbstractVector, corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
+covzm(x::AbstractMatrix, vardim::Int=1, corrected::Bool=true) =
     scale!(unscaled_covzm(x, vardim), inv(size(x,vardim) - Int(corrected)))
-
-covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
+covzm(x::AbstractVector, y::AbstractVector, corrected::Bool=true) =
     unscaled_covzm(x, y) / (length(x) - Int(corrected))
-
-covzm(x::AbstractVecOrMat, y::AbstractVecOrMat; vardim::Int=1, corrected::Bool=true) =
+covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1, corrected::Bool=true) =
     scale!(unscaled_covzm(x, y, vardim), inv(_getnobs(x, y, vardim) - Int(corrected)))
 
 # covm (with provided mean)
 
-covm(x::AbstractVector, xmean; corrected::Bool=true) =
-    covzm(x .- xmean; corrected=corrected)
+covm(x::AbstractVector, xmean, corrected::Bool=true) =
+    covzm(x .- xmean, corrected)
+covm(x::AbstractMatrix, xmean, vardim::Int=1, corrected::Bool=true) =
+    covzm(x .- xmean, vardim, corrected)
+covm(x::AbstractVector, xmean, y::AbstractVector, ymean, corrected::Bool=true) =
+    covzm(x .- xmean, y .- ymean, corrected)
+covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1, corrected::Bool=true) =
+    covzm(x .- xmean, y .- ymean, vardim, corrected)
 
-covm(x::AbstractMatrix, xmean; vardim::Int=1, corrected::Bool=true) =
-    covzm(x .- xmean; vardim=vardim, corrected=corrected)
+# cov (API)
+doc"""
+    cov(x[, corrected=true])
 
-covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
-    covzm(x .- xmean, y .- ymean; corrected=corrected)
+Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum is scaled with `n-1` wheares the sum is scaled with `n` if `corrected` is `false` where `n = length(x)`.
+"""
+cov(x::AbstractVector, corrected::Bool) = covm(x, Base.mean(x), corrected)
+# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
+cov{T<:AbstractVector}(x::T) = covm(x, Base.mean(x), true)
 
-covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean; vardim::Int=1, corrected::Bool=true) =
-    covzm(x .- xmean, y .- ymean; vardim=vardim, corrected=corrected)
+doc"""
+    cov(X[, vardim=1, corrected=true])
 
-# cov (API)
+Compute the covariance matrix of the matrix `X` along the dimension `vardim`. If `corrected` is `true` (the default) then the sum is scaled with `n-1` wheares the sum is scaled with `n` if `corrected` is `false` where `n = size(X, vardim)`.
+"""
+cov(X::AbstractMatrix, vardim::Int, corrected::Bool=true) =
+    covm(X, _vmean(X, vardim), vardim, corrected)
+# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
+cov{T<:AbstractMatrix}(X::T) = cov(X, 1, true)
 
-function cov(x::AbstractVector; corrected::Bool=true, mean=nothing)
-    mean == 0 ? covzm(x; corrected=corrected) :
-    mean === nothing ? covm(x, Base.mean(x); corrected=corrected) :
-    isa(mean, Number) ? covm(x, mean; corrected=corrected) :
-    throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
-end
+doc"""
+    cov(x, y[, corrected=true])
 
-function cov(x::AbstractMatrix; vardim::Int=1, corrected::Bool=true, mean=nothing)
-    mean == 0 ? covzm(x; vardim=vardim, corrected=corrected) :
-    mean === nothing ? covm(x, _vmean(x, vardim); vardim=vardim, corrected=corrected) :
-    isa(mean, AbstractArray) ? covm(x, mean; vardim=vardim, corrected=corrected) :
-    throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
-end
+Compute the covariance between the vectors `x` and `y`. If `corrected` is `true` (the default) then the sum is scaled with `n-1` wheares the sum is scaled with `n` if `corrected` is `false` where `n = length(x) = length(y)`.
+"""
+cov(x::AbstractVector, y::AbstractVector, corrected::Bool) =
+    covm(x, Base.mean(x), y, Base.mean(y), corrected)
+# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
+cov{T<:AbstractVector,S<:AbstractVector}(x::T, y::S) =
+    covm(x, Base.mean(x), y, Base.mean(y), true)
 
-function cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true, mean=nothing)
-    mean == 0 ? covzm(x, y; corrected=corrected) :
-    mean === nothing ? covm(x, Base.mean(x), y, Base.mean(y); corrected=corrected) :
-    isa(mean, (Number,Number)) ? covm(x, mean[1], y, mean[2]; corrected=corrected) :
-    throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
-end
+doc"""
+    cov(X, Y[, vardim=1, corrected=true])
 
-function cov(x::AbstractVecOrMat, y::AbstractVecOrMat; vardim::Int=1, corrected::Bool=true, mean=nothing)
-    if mean == 0
-        covzm(x, y; vardim=vardim, corrected=corrected)
-    elseif mean === nothing
-        covm(x, _vmean(x, vardim), y, _vmean(y, vardim); vardim=vardim, corrected=corrected)
-    elseif isa(mean, (Any,Any))
-        covm(x, mean[1], y, mean[2]; vardim=vardim, corrected=corrected)
-    else
-        throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
-    end
-end
+Compute the covariance between the vectors or matrices `X` and `Y` along the dimension `vardim`. If `corrected` is `true` (the default) then the sum is scaled with `n-1` wheares the sum is scaled with `n` if `corrected` is `false` where `n = size(X, vardim) = size(Y, vardim)`.
 
+"""
+cov(X::AbstractVecOrMat, Y::AbstractVecOrMat, vardim::Int, corrected::Bool=true) =
+    covm(X, _vmean(X, vardim), Y, _vmean(Y, vardim), vardim, corrected)
+# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
+cov{T<:AbstractVecOrMat,S<:AbstractVecOrMat}(X::T, Y::S) =
+    covm(X, _vmean(X, vardim), Y, _vmean(Y, vardim), 1, true)
 
 ##### correlation #####
 
@@ -340,7 +341,6 @@ function cov2cor!{T}(C::AbstractMatrix{T}, xsd::AbstractArray)
     end
     return C
 end
-
 function cov2cor!(C::AbstractMatrix, xsd::Number, ysd::AbstractArray)
     nx, ny = size(C)
     length(ysd) == ny || throw(DimensionMismatch("inconsistent dimensions"))
@@ -351,7 +351,6 @@ function cov2cor!(C::AbstractMatrix, xsd::Number, ysd::AbstractArray)
     end
     return C
 end
-
 function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::Number)
     nx, ny = size(C)
     length(xsd) == nx || throw(DimensionMismatch("inconsistent dimensions"))
@@ -362,7 +361,6 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::Number)
     end
     return C
 end
-
 function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray)
     nx, ny = size(C)
     (length(xsd) == nx && length(ysd) == ny) ||
@@ -378,10 +376,10 @@ end
 # corzm (non-exported, with centered data)
 
 corzm{T}(x::AbstractVector{T}) = one(real(T))
-
-corzm(x::AbstractMatrix; vardim::Int=1) =
-    (c = unscaled_covzm(x, vardim); cov2cor!(c, sqrt!(diag(c))))
-
+function corzm(x::AbstractMatrix, vardim::Int=1)
+    c = unscaled_covzm(x, vardim)
+    return cov2cor!(c, sqrt!(diag(c)))
+end
 function corzm(x::AbstractVector, y::AbstractVector)
     n = length(x)
     length(y) == n || throw(DimensionMismatch("inconsistent lengths"))
@@ -401,57 +399,58 @@ function corzm(x::AbstractVector, y::AbstractVector)
     end
     return xy / (sqrt(xx) * sqrt(yy))
 end
-
-corzm(x::AbstractVector, y::AbstractMatrix; vardim::Int=1) =
+corzm(x::AbstractVector, y::AbstractMatrix, vardim::Int=1) =
     cov2cor!(unscaled_covzm(x, y, vardim), sqrt(sumabs2(x)), sqrt!(sumabs2(y, vardim)))
-
-corzm(x::AbstractMatrix, y::AbstractVector; vardim::Int=1) =
+corzm(x::AbstractMatrix, y::AbstractVector, vardim::Int=1) =
     cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sumabs2(x, vardim)), sqrt(sumabs2(y)))
-
-corzm(x::AbstractMatrix, y::AbstractMatrix; vardim::Int=1) =
+corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
     cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sumabs2(x, vardim)), sqrt!(sumabs2(y, vardim)))
 
 # corm
 
 corm{T}(x::AbstractVector{T}, xmean) = one(real(T))
+corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
+corm(x::AbstractVector, xmean, y::AbstractVector, ymean) = corzm(x .- xmean, y .- ymean)
+corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
+    corzm(x .- xmean, y .- ymean, vardim)
 
-corm(x::AbstractMatrix, xmean; vardim::Int=1) = corzm(x .- xmean; vardim=vardim)
+# cor
+doc"""
+    cor(x)
 
-corm(x::AbstractVector, xmean, y::AbstractVector, ymean) = corzm(x .- xmean, y .- ymean)
+Return the number one.
+"""
+cor{T<:AbstractVector}(x::T) = one(real(eltype(x)))
+# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
 
-corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean; vardim::Int=1) =
-    corzm(x .- xmean, y .- ymean; vardim=vardim)
+doc"""
+    cor(X[, vardim=1])
 
-# cor
+Compute the Pearson correlation matrix of the matrix `X` along the dimension `vardim`.
+"""
+cor(X::AbstractMatrix, vardim::Int) = corm(X, _vmean(X, vardim), vardim)
+# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
+cor{T<:AbstractMatrix}(X::T) = corm(X, _vmean(X, vardim), 1)
 
-cor{T}(x::AbstractVector{T}; mean=nothing) = one(real(T))
+doc"""
+    cor(x, y)
 
-function cor(x::AbstractMatrix; vardim::Int=1, mean=nothing)
-    mean == 0 ? corzm(x; vardim=vardim) :
-    mean === nothing ? corm(x, _vmean(x, vardim); vardim=vardim) :
-    isa(mean, AbstractArray) ? corm(x, mean; vardim=vardim) :
-    throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
-end
+Compute the Pearson correlation between the vectors `x` and `y`.
+"""
+cor{T<:AbstractVector,S<:AbstractVector}(x::T, y::S) = corm(x, Base.mean(x), y, Base.mean(y))
+# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
 
-function cor(x::AbstractVector, y::AbstractVector; mean=nothing)
-    mean == 0 ? corzm(x, y) :
-    mean === nothing ? corm(x, Base.mean(x), y, Base.mean(y)) :
-    isa(mean, (Number,Number)) ? corm(x, mean[1], y, mean[2]) :
-    throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
-end
+doc"""
+    cor(X, Y[, vardim=1])
 
-function cor(x::AbstractVecOrMat, y::AbstractVecOrMat; vardim::Int=1, mean=nothing)
-    if mean == 0
-        corzm(x, y; vardim=vardim)
-    elseif mean === nothing
-        corm(x, _vmean(x, vardim), y, _vmean(y, vardim); vardim=vardim)
-    elseif isa(mean, (Any,Any))
-        corm(x, mean[1], y, mean[2]; vardim=vardim)
-    else
-        throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
-    end
-end
+Compute the Pearson correlation between the vectors or matrices `X` and `Y` along the dimension `vardim`.
 
+"""
+cor(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int) =
+    corm(x, _vmean(x, vardim), y, _vmean(y, vardim), vardim)
+# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
+cor(x::AbstractVecOrMat, y::AbstractVecOrMat) =
+    corm(x, _vmean(x, vardim), y, _vmean(y, vardim), 1)
 
 ##### median & quantiles #####
 

diff --git a/doc/stdlib/math.rst b/doc/stdlib/math.rst
@@ -1691,29 +1691,53 @@ Statistics
 
    Like ``quantile``\ , but overwrites the input vector.
 
-.. function:: cov(v1[, v2][, vardim=1, corrected=true, mean=nothing])
+.. function:: cov(x[, corrected=true])
 
    .. Docstring generated from Julia source
 
-   Compute the Pearson covariance between the vector(s) in ``v1`` and ``v2``\ . Here, ``v1`` and ``v2`` can be either vectors or matrices.
+   Compute the variance of the vector ``x``\ . If ``corrected`` is ``true`` (the default) then the sum is scaled with ``n-1`` wheares the sum is scaled with ``n`` if ``corrected`` is ``false`` where ``n = length(x)``\ .
 
-   This function accepts three keyword arguments:
+.. function:: cov(X[, vardim=1, corrected=true])
 
-   * ``vardim``\ : the dimension of variables. When ``vardim = 1``\ , variables are considered in columns while observations in rows; when ``vardim = 2``\ , variables are in rows while observations in columns. By default, it is set to ``1``\ .
-   * ``corrected``\ : whether to apply Bessel's correction (divide by ``n-1`` instead of ``n``\ ). By default, it is set to ``true``\ .
-   * ``mean``\ : allow users to supply mean values that are known. By default, it is set to ``nothing``\ , which indicates that the mean(s) are unknown, and the function will compute the mean. Users can use ``mean=0`` to indicate that the input data are centered, and hence there's no need to subtract the mean.
+   .. Docstring generated from Julia source
+
+   Compute the covariance matrix of the matrix ``X`` along the dimension ``vardim``\ . If ``corrected`` is ``true`` (the default) then the sum is scaled with ``n-1`` wheares the sum is scaled with ``n`` if ``corrected`` is ``false`` where ``n = size(X, vardim)``\ .
+
+.. function:: cov(x, y[, corrected=true])
+
+   .. Docstring generated from Julia source
 
-   The size of the result depends on the size of ``v1`` and ``v2``\ . When both ``v1`` and ``v2`` are vectors, it returns the covariance between them as a scalar. When either one is a matrix, it returns a covariance matrix of size ``(n1, n2)``\ , where ``n1`` and ``n2`` are the numbers of slices in ``v1`` and ``v2``\ , which depend on the setting of ``vardim``\ .
+   Compute the covariance between the vectors ``x`` and ``y``\ . If ``corrected`` is ``true`` (the default) then the sum is scaled with ``n-1`` wheares the sum is scaled with ``n`` if ``corrected`` is ``false`` where ``n = length(x) = length(y)``\ .
 
-   Note: ``v2`` can be omitted, which indicates ``v2 = v1``\ .
+.. function:: cov(X, Y[, vardim=1, corrected=true])
 
-.. function:: cor(v1[, v2][, vardim=1, mean=nothing])
+   .. Docstring generated from Julia source
+
+   Compute the covariance between the vectors or matrices ``X`` and ``Y`` along the dimension ``vardim``\ . If ``corrected`` is ``true`` (the default) then the sum is scaled with ``n-1`` wheares the sum is scaled with ``n`` if ``corrected`` is ``false`` where ``n = size(X, vardim) = size(Y, vardim)``\ .
+
+.. function:: cor(x)
 
    .. Docstring generated from Julia source
 
-   Compute the Pearson correlation between the vector(s) in ``v1`` and ``v2``\ .
+   Return the number one.
+
+.. function:: cor(X[, vardim=1])
+
+   .. Docstring generated from Julia source
+
+   Compute the Pearson correlation matrix of the matrix ``X`` along the dimension ``vardim``\ .
+
+.. function:: cor(x, y)
+
+   .. Docstring generated from Julia source
+
+   Compute the Pearson correlation between the vectors ``x`` and ``y``\ .
+
+.. function:: cor(X, Y[, vardim=1])
+
+   .. Docstring generated from Julia source
 
-   Users can use the keyword argument ``vardim`` to specify the variable dimension, and ``mean`` to supply pre-computed mean values.
+   Compute the Pearson correlation between the vectors or matrices ``X`` and ``Y`` along the dimension ``vardim``\ .
 
 Signal Processing
 -----------------

diff --git a/test/docs.jl b/test/docs.jl
@@ -415,7 +415,7 @@ f12593_2() = 1
 @test (Docs.@repl @r_str) !== nothing
 
 # Simple tests for apropos:
-@test contains(sprint(apropos, "pearson"), "cov")
+@test contains(sprint(apropos, "pearson"), "cor")
 @test contains(sprint(apropos, r"ind(exes|ices)"), "eachindex")
 @test contains(sprint(apropos, "print"), "Profile.print")