Skip to content

Commit

Permalink
Merge pull request #13465 from JuliaLang/anj/cov
Browse files Browse the repository at this point in the history
Make cov and cor similar to mean and var by removing keyword arguments.
  • Loading branch information
andreasnoack committed Oct 8, 2015
2 parents 90bce78 + c661ef6 commit 38299b7
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 144 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Library improvements
* The functions `remotecall`, `remotecall_fetch`, and `remotecall_wait` now have the
function argument as the first argument to allow for do-block syntax ([#13338]).

* `cov` and `cor` don't use keyword arguments anymore and are therefore now type stable ([#13465]).

Deprecated or removed
---------------------

Expand Down
12 changes: 11 additions & 1 deletion base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -841,4 +841,14 @@ for f in (:remotecall, :remotecall_fetch, :remotecall_wait)
@deprecate ($f)(w::Worker, f::Function, args...) ($f)(f, w::Worker, args...)
@deprecate ($f)(id::Integer, f::Function, args...) ($f)(f, id::Integer, args...)
end
end
end

@deprecate cov(x::AbstractVector; corrected=true, mean=nothing) covm(x, mean, corrected)
@deprecate cov(X::AbstractMatrix; vardim=1, corrected=true, mean=nothing) covm(X, mean, vardim, corrected)
@deprecate cov(x::AbstractVector, y::AbstractVector; corrected=true, mean=nothing) covm(x, mean[1], y, mean[2], corrected)
@deprecate cov(X::AbstractVecOrMat, Y::AbstractVecOrMat; vardim=1, corrected=true, mean=nothing) covm(X, mean[1], Y, mean[2], vardim, corrected)

@deprecate cor(x::AbstractVector; mean=nothing) corm(x, mean)
@deprecate cor(X::AbstractMatrix; vardim=1, mean=nothing) corm(X, mean, vardim)
@deprecate cor(x::AbstractVector, y::AbstractVector; mean=nothing) corm(x, mean[1], y, mean[2])
@deprecate cor(X::AbstractVecOrMat, Y::AbstractVecOrMat; vardim=1, mean=nothing) corm(X, mean[1], Y, mean[2], vardim)
26 changes: 0 additions & 26 deletions base/docs/helpdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1140,15 +1140,6 @@ Get the precision of a floating point number, as defined by the effective number
"""
precision

doc"""
cor(v1[, v2][, vardim=1, mean=nothing])
Compute the Pearson correlation between the vector(s) in `v1` and `v2`.
Users can use the keyword argument `vardim` to specify the variable dimension, and `mean` to supply pre-computed mean values.
"""
cor

doc"""
partitions(n)
Expand Down Expand Up @@ -9003,23 +8994,6 @@ The process was stopped by a terminal interrupt (CTRL+C).
"""
InterruptException

doc"""
cov(v1[, v2][, vardim=1, corrected=true, mean=nothing])
Compute the Pearson covariance between the vector(s) in `v1` and `v2`. Here, `v1` and `v2` can be either vectors or matrices.
This function accepts three keyword arguments:
- `vardim`: the dimension of variables. When `vardim = 1`, variables are considered in columns while observations in rows; when `vardim = 2`, variables are in rows while observations in columns. By default, it is set to `1`.
- `corrected`: whether to apply Bessel's correction (divide by `n-1` instead of `n`). By default, it is set to `true`.
- `mean`: allow users to supply mean values that are known. By default, it is set to `nothing`, which indicates that the mean(s) are unknown, and the function will compute the mean. Users can use `mean=0` to indicate that the input data are centered, and hence there's no need to subtract the mean.
The size of the result depends on the size of `v1` and `v2`. When both `v1` and `v2` are vectors, it returns the covariance between them as a scalar. When either one is a matrix, it returns a covariance matrix of size `(n1, n2)`, where `n1` and `n2` are the numbers of slices in `v1` and `v2`, which depend on the setting of `vardim`.
Note: `v2` can be omitted, which indicates `v2 = v1`.
"""
cov

doc"""
den(x)
Expand Down
173 changes: 86 additions & 87 deletions base/statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -261,66 +261,67 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =

# covzm (with centered data)

covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))

covzm(x::AbstractMatrix; vardim::Int=1, corrected::Bool=true) =
covzm(x::AbstractVector, corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
covzm(x::AbstractMatrix, vardim::Int=1, corrected::Bool=true) =
scale!(unscaled_covzm(x, vardim), inv(size(x,vardim) - Int(corrected)))

covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
covzm(x::AbstractVector, y::AbstractVector, corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))

covzm(x::AbstractVecOrMat, y::AbstractVecOrMat; vardim::Int=1, corrected::Bool=true) =
covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1, corrected::Bool=true) =
scale!(unscaled_covzm(x, y, vardim), inv(_getnobs(x, y, vardim) - Int(corrected)))

# covm (with provided mean)

covm(x::AbstractVector, xmean; corrected::Bool=true) =
covzm(x .- xmean; corrected=corrected)
covm(x::AbstractVector, xmean, corrected::Bool=true) =
covzm(x .- xmean, corrected)
covm(x::AbstractMatrix, xmean, vardim::Int=1, corrected::Bool=true) =
covzm(x .- xmean, vardim, corrected)
covm(x::AbstractVector, xmean, y::AbstractVector, ymean, corrected::Bool=true) =
covzm(x .- xmean, y .- ymean, corrected)
covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1, corrected::Bool=true) =
covzm(x .- xmean, y .- ymean, vardim, corrected)

covm(x::AbstractMatrix, xmean; vardim::Int=1, corrected::Bool=true) =
covzm(x .- xmean; vardim=vardim, corrected=corrected)
# cov (API)
doc"""
cov(x[, corrected=true])
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
covzm(x .- xmean, y .- ymean; corrected=corrected)
Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum is scaled with `n-1` wheares the sum is scaled with `n` if `corrected` is `false` where `n = length(x)`.
"""
cov(x::AbstractVector, corrected::Bool) = covm(x, Base.mean(x), corrected)
# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
cov{T<:AbstractVector}(x::T) = covm(x, Base.mean(x), true)

covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean; vardim::Int=1, corrected::Bool=true) =
covzm(x .- xmean, y .- ymean; vardim=vardim, corrected=corrected)
doc"""
cov(X[, vardim=1, corrected=true])
# cov (API)
Compute the covariance matrix of the matrix `X` along the dimension `vardim`. If `corrected` is `true` (the default) then the sum is scaled with `n-1` wheares the sum is scaled with `n` if `corrected` is `false` where `n = size(X, vardim)`.
"""
cov(X::AbstractMatrix, vardim::Int, corrected::Bool=true) =
covm(X, _vmean(X, vardim), vardim, corrected)
# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
cov{T<:AbstractMatrix}(X::T) = cov(X, 1, true)

function cov(x::AbstractVector; corrected::Bool=true, mean=nothing)
mean == 0 ? covzm(x; corrected=corrected) :
mean === nothing ? covm(x, Base.mean(x); corrected=corrected) :
isa(mean, Number) ? covm(x, mean; corrected=corrected) :
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
end
doc"""
cov(x, y[, corrected=true])
function cov(x::AbstractMatrix; vardim::Int=1, corrected::Bool=true, mean=nothing)
mean == 0 ? covzm(x; vardim=vardim, corrected=corrected) :
mean === nothing ? covm(x, _vmean(x, vardim); vardim=vardim, corrected=corrected) :
isa(mean, AbstractArray) ? covm(x, mean; vardim=vardim, corrected=corrected) :
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
end
Compute the covariance between the vectors `x` and `y`. If `corrected` is `true` (the default) then the sum is scaled with `n-1` wheares the sum is scaled with `n` if `corrected` is `false` where `n = length(x) = length(y)`.
"""
cov(x::AbstractVector, y::AbstractVector, corrected::Bool) =
covm(x, Base.mean(x), y, Base.mean(y), corrected)
# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
cov{T<:AbstractVector,S<:AbstractVector}(x::T, y::S) =
covm(x, Base.mean(x), y, Base.mean(y), true)

function cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true, mean=nothing)
mean == 0 ? covzm(x, y; corrected=corrected) :
mean === nothing ? covm(x, Base.mean(x), y, Base.mean(y); corrected=corrected) :
isa(mean, (Number,Number)) ? covm(x, mean[1], y, mean[2]; corrected=corrected) :
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
end
doc"""
cov(X, Y[, vardim=1, corrected=true])
function cov(x::AbstractVecOrMat, y::AbstractVecOrMat; vardim::Int=1, corrected::Bool=true, mean=nothing)
if mean == 0
covzm(x, y; vardim=vardim, corrected=corrected)
elseif mean === nothing
covm(x, _vmean(x, vardim), y, _vmean(y, vardim); vardim=vardim, corrected=corrected)
elseif isa(mean, (Any,Any))
covm(x, mean[1], y, mean[2]; vardim=vardim, corrected=corrected)
else
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
end
end
Compute the covariance between the vectors or matrices `X` and `Y` along the dimension `vardim`. If `corrected` is `true` (the default) then the sum is scaled with `n-1` wheares the sum is scaled with `n` if `corrected` is `false` where `n = size(X, vardim) = size(Y, vardim)`.
"""
cov(X::AbstractVecOrMat, Y::AbstractVecOrMat, vardim::Int, corrected::Bool=true) =
covm(X, _vmean(X, vardim), Y, _vmean(Y, vardim), vardim, corrected)
# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
cov{T<:AbstractVecOrMat,S<:AbstractVecOrMat}(X::T, Y::S) =
covm(X, _vmean(X, vardim), Y, _vmean(Y, vardim), 1, true)

##### correlation #####

Expand All @@ -340,7 +341,6 @@ function cov2cor!{T}(C::AbstractMatrix{T}, xsd::AbstractArray)
end
return C
end

function cov2cor!(C::AbstractMatrix, xsd::Number, ysd::AbstractArray)
nx, ny = size(C)
length(ysd) == ny || throw(DimensionMismatch("inconsistent dimensions"))
Expand All @@ -351,7 +351,6 @@ function cov2cor!(C::AbstractMatrix, xsd::Number, ysd::AbstractArray)
end
return C
end

function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::Number)
nx, ny = size(C)
length(xsd) == nx || throw(DimensionMismatch("inconsistent dimensions"))
Expand All @@ -362,7 +361,6 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::Number)
end
return C
end

function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray)
nx, ny = size(C)
(length(xsd) == nx && length(ysd) == ny) ||
Expand All @@ -378,10 +376,10 @@ end
# corzm (non-exported, with centered data)

corzm{T}(x::AbstractVector{T}) = one(real(T))

corzm(x::AbstractMatrix; vardim::Int=1) =
(c = unscaled_covzm(x, vardim); cov2cor!(c, sqrt!(diag(c))))

function corzm(x::AbstractMatrix, vardim::Int=1)
c = unscaled_covzm(x, vardim)
return cov2cor!(c, sqrt!(diag(c)))
end
function corzm(x::AbstractVector, y::AbstractVector)
n = length(x)
length(y) == n || throw(DimensionMismatch("inconsistent lengths"))
Expand All @@ -401,57 +399,58 @@ function corzm(x::AbstractVector, y::AbstractVector)
end
return xy / (sqrt(xx) * sqrt(yy))
end

corzm(x::AbstractVector, y::AbstractMatrix; vardim::Int=1) =
corzm(x::AbstractVector, y::AbstractMatrix, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt(sumabs2(x)), sqrt!(sumabs2(y, vardim)))

corzm(x::AbstractMatrix, y::AbstractVector; vardim::Int=1) =
corzm(x::AbstractMatrix, y::AbstractVector, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sumabs2(x, vardim)), sqrt(sumabs2(y)))

corzm(x::AbstractMatrix, y::AbstractMatrix; vardim::Int=1) =
corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sumabs2(x, vardim)), sqrt!(sumabs2(y, vardim)))

# corm

corm{T}(x::AbstractVector{T}, xmean) = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
corm(x::AbstractVector, xmean, y::AbstractVector, ymean) = corzm(x .- xmean, y .- ymean)
corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
corzm(x .- xmean, y .- ymean, vardim)

corm(x::AbstractMatrix, xmean; vardim::Int=1) = corzm(x .- xmean; vardim=vardim)
# cor
doc"""
cor(x)
corm(x::AbstractVector, xmean, y::AbstractVector, ymean) = corzm(x .- xmean, y .- ymean)
Return the number one.
"""
cor{T<:AbstractVector}(x::T) = one(real(eltype(x)))
# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged

corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean; vardim::Int=1) =
corzm(x .- xmean, y .- ymean; vardim=vardim)
doc"""
cor(X[, vardim=1])
# cor
Compute the Pearson correlation matrix of the matrix `X` along the dimension `vardim`.
"""
cor(X::AbstractMatrix, vardim::Int) = corm(X, _vmean(X, vardim), vardim)
# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
cor{T<:AbstractMatrix}(X::T) = corm(X, _vmean(X, vardim), 1)

cor{T}(x::AbstractVector{T}; mean=nothing) = one(real(T))
doc"""
cor(x, y)
function cor(x::AbstractMatrix; vardim::Int=1, mean=nothing)
mean == 0 ? corzm(x; vardim=vardim) :
mean === nothing ? corm(x, _vmean(x, vardim); vardim=vardim) :
isa(mean, AbstractArray) ? corm(x, mean; vardim=vardim) :
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
end
Compute the Pearson correlation between the vectors `x` and `y`.
"""
cor{T<:AbstractVector,S<:AbstractVector}(x::T, y::S) = corm(x, Base.mean(x), y, Base.mean(y))
# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged

function cor(x::AbstractVector, y::AbstractVector; mean=nothing)
mean == 0 ? corzm(x, y) :
mean === nothing ? corm(x, Base.mean(x), y, Base.mean(y)) :
isa(mean, (Number,Number)) ? corm(x, mean[1], y, mean[2]) :
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
end
doc"""
cor(X, Y[, vardim=1])
function cor(x::AbstractVecOrMat, y::AbstractVecOrMat; vardim::Int=1, mean=nothing)
if mean == 0
corzm(x, y; vardim=vardim)
elseif mean === nothing
corm(x, _vmean(x, vardim), y, _vmean(y, vardim); vardim=vardim)
elseif isa(mean, (Any,Any))
corm(x, mean[1], y, mean[2]; vardim=vardim)
else
throw(ArgumentError("invalid value of mean, $(mean)::$(typeof(mean))"))
end
end
Compute the Pearson correlation between the vectors or matrices `X` and `Y` along the dimension `vardim`.
"""
cor(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int) =
corm(x, _vmean(x, vardim), y, _vmean(y, vardim), vardim)
# This ugly hack is necessary to make the method below considered more specific than the deprecated method. When the old keyword version has been completely deprecated, these two methods can be merged
cor(x::AbstractVecOrMat, y::AbstractVecOrMat) =
corm(x, _vmean(x, vardim), y, _vmean(y, vardim), 1)

##### median & quantiles #####

Expand Down
46 changes: 35 additions & 11 deletions doc/stdlib/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1691,29 +1691,53 @@ Statistics
Like ``quantile``\ , but overwrites the input vector.

.. function:: cov(v1[, v2][, vardim=1, corrected=true, mean=nothing])
.. function:: cov(x[, corrected=true])

.. Docstring generated from Julia source
Compute the Pearson covariance between the vector(s) in ``v1`` and ``v2``\ . Here, ``v1`` and ``v2`` can be either vectors or matrices.
Compute the variance of the vector ``x``\ . If ``corrected`` is ``true`` (the default) then the sum is scaled with ``n-1`` wheares the sum is scaled with ``n`` if ``corrected`` is ``false`` where ``n = length(x)``\ .

This function accepts three keyword arguments:
.. function:: cov(X[, vardim=1, corrected=true])

* ``vardim``\ : the dimension of variables. When ``vardim = 1``\ , variables are considered in columns while observations in rows; when ``vardim = 2``\ , variables are in rows while observations in columns. By default, it is set to ``1``\ .
* ``corrected``\ : whether to apply Bessel's correction (divide by ``n-1`` instead of ``n``\ ). By default, it is set to ``true``\ .
* ``mean``\ : allow users to supply mean values that are known. By default, it is set to ``nothing``\ , which indicates that the mean(s) are unknown, and the function will compute the mean. Users can use ``mean=0`` to indicate that the input data are centered, and hence there's no need to subtract the mean.
.. Docstring generated from Julia source
Compute the covariance matrix of the matrix ``X`` along the dimension ``vardim``\ . If ``corrected`` is ``true`` (the default) then the sum is scaled with ``n-1`` wheares the sum is scaled with ``n`` if ``corrected`` is ``false`` where ``n = size(X, vardim)``\ .

.. function:: cov(x, y[, corrected=true])

.. Docstring generated from Julia source
The size of the result depends on the size of ``v1`` and ``v2``\ . When both ``v1`` and ``v2`` are vectors, it returns the covariance between them as a scalar. When either one is a matrix, it returns a covariance matrix of size ``(n1, n2)``\ , where ``n1`` and ``n2`` are the numbers of slices in ``v1`` and ``v2``\ , which depend on the setting of ``vardim``\ .
Compute the covariance between the vectors ``x`` and ``y``\ . If ``corrected`` is ``true`` (the default) then the sum is scaled with ``n-1`` wheares the sum is scaled with ``n`` if ``corrected`` is ``false`` where ``n = length(x) = length(y)``\ .

Note: ``v2`` can be omitted, which indicates ``v2 = v1``\ .
.. function:: cov(X, Y[, vardim=1, corrected=true])

.. function:: cor(v1[, v2][, vardim=1, mean=nothing])
.. Docstring generated from Julia source
Compute the covariance between the vectors or matrices ``X`` and ``Y`` along the dimension ``vardim``\ . If ``corrected`` is ``true`` (the default) then the sum is scaled with ``n-1`` wheares the sum is scaled with ``n`` if ``corrected`` is ``false`` where ``n = size(X, vardim) = size(Y, vardim)``\ .

.. function:: cor(x)

.. Docstring generated from Julia source
Compute the Pearson correlation between the vector(s) in ``v1`` and ``v2``\ .
Return the number one.

.. function:: cor(X[, vardim=1])

.. Docstring generated from Julia source
Compute the Pearson correlation matrix of the matrix ``X`` along the dimension ``vardim``\ .

.. function:: cor(x, y)

.. Docstring generated from Julia source
Compute the Pearson correlation between the vectors ``x`` and ``y``\ .

.. function:: cor(X, Y[, vardim=1])

.. Docstring generated from Julia source
Users can use the keyword argument ``vardim`` to specify the variable dimension, and ``mean`` to supply pre-computed mean values.
Compute the Pearson correlation between the vectors or matrices ``X`` and ``Y`` along the dimension ``vardim``\ .

Signal Processing
-----------------
Expand Down
2 changes: 1 addition & 1 deletion test/docs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ f12593_2() = 1
@test (Docs.@repl @r_str) !== nothing

# Simple tests for apropos:
@test contains(sprint(apropos, "pearson"), "cov")
@test contains(sprint(apropos, "pearson"), "cor")
@test contains(sprint(apropos, r"ind(exes|ices)"), "eachindex")
@test contains(sprint(apropos, "print"), "Profile.print")

Expand Down
Loading

0 comments on commit 38299b7

Please sign in to comment.