Skip to content

Commit

Permalink
Let var, std & cor take a CovarianceEstimator (#815)
Browse files Browse the repository at this point in the history
* Closes #734

* Undo drive-by tweak

* Apparently needed to avoid duplicating docstrings

* Bump version

* Use private _cov2cor! for now
  • Loading branch information
tpgillam committed Jul 15, 2022
1 parent db4e40e commit 77590ce
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "StatsBase"
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
authors = ["JuliaStats"]
version = "0.33.18"
version = "0.33.19"

[deps]
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
Expand Down
2 changes: 2 additions & 0 deletions docs/src/cov.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ cov
cov(::CovarianceEstimator, ::AbstractVector)
cov(::CovarianceEstimator, ::AbstractVector, ::AbstractVector)
cov(::CovarianceEstimator, ::AbstractMatrix)
var(::CovarianceEstimator, ::AbstractVector)
std(::CovarianceEstimator, ::AbstractVector)
cor
mean_and_cov
cov2cor
Expand Down
65 changes: 64 additions & 1 deletion src/cov.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ function scattermat end


"""
cov(X, w::AbstractWeights, vardim=1; mean=nothing, corrected=false)
cov(X, w::AbstractWeights, vardim=1; mean=nothing, corrected=false)
Compute the weighted covariance matrix. Similar to `var` and `std` the biased covariance
matrix (`corrected=false`) is computed by multiplying `scattermat(X, w)` by
Expand Down Expand Up @@ -154,6 +154,15 @@ function cor2cov!(C::AbstractMatrix, s::AbstractArray)
return C
end

"""
_cov2cor!(C)
Compute the correlation matrix from the covariance matrix `C`, in-place.
The leading diagonal is used to determine the standard deviations by which to normalise.
"""
_cov2cor!(C::AbstractMatrix) = cov2cor!(C, sqrt.(diag(C)))

"""
CovarianceEstimator
Expand Down Expand Up @@ -198,6 +207,60 @@ cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1) =
cov(ce::CovarianceEstimator, X::AbstractMatrix, w::AbstractWeights; mean=nothing, dims::Int=1) =
error("cov is not defined for $(typeof(ce)), $(typeof(X)) and $(typeof(w))")

"""
var(ce::CovarianceEstimator, x::AbstractVector; mean=nothing)
Compute the variance of the vector `x` using the estimator `ce`.
"""
var(ce::CovarianceEstimator, x::AbstractVector; kwargs...) = cov(ce, x; kwargs...)

"""
std(ce::CovarianceEstimator, x::AbstractVector; mean=nothing)
Compute the standard deviation of the vector `x` using the estimator `ce`.
"""
std(ce::CovarianceEstimator, x::AbstractVector; kwargs...) = sqrt(var(ce, x; kwargs...))

"""
cor(ce::CovarianceEstimator, x::AbstractVector, y::AbstractVector)
Compute the correlation of the vectors `x` and `y` using estimator `ce`.
"""
function cor(ce::CovarianceEstimator, x::AbstractVector, y::AbstractVector)
# Here we allow `ce` to see both `x` and `y` simultaneously, and allow it to compute
# a full covariance matrix, from which we will extract the correlation.
#
# Whilst in some cases it might be equivalent (and possibly more efficient) to use:
# cov(ce, x, y) / (std(ce, x) * std(ce, y)),
# this need not apply in general.
return cor(ce, hcat(x, y))[1, 2]
end

"""
cor(
ce::CovarianceEstimator, X::AbstractMatrix, [w::AbstractWeights];
mean=nothing, dims::Int=1
)
Compute the correlation matrix of the matrix `X` along dimension `dims`
using estimator `ce`. A weighting vector `w` can be specified.
The keyword argument `mean` can be:
* `nothing` (default) in which case the mean is estimated and subtracted
from the data `X`,
* a precalculated mean in which case it is subtracted from the data `X`.
Assuming `size(X)` is `(N,M)`, `mean` can either be:
* when `dims=1`, an `AbstractMatrix` of size `(1,M)`,
* when `dims=2`, an `AbstractVector` of length `N` or an `AbstractMatrix`
of size `(N,1)`.
"""
function cor(ce::CovarianceEstimator, X::AbstractMatrix; kwargs...)
return _cov2cor!(cov(ce, X; kwargs...))
end
function cor(ce::CovarianceEstimator, X::AbstractMatrix, w::AbstractWeights; kwargs...)
return _cov2cor!(cov(ce, X, w; kwargs...))
end

"""
SimpleCovariance(;corrected::Bool=false)
Expand Down
11 changes: 11 additions & 0 deletions test/cov.jl
Original file line number Diff line number Diff line change
Expand Up @@ -288,12 +288,23 @@ end

x = rand(8)
y = rand(8)
X = hcat(x, y)

for corrected (false, true)
@test_throws MethodError SimpleCovariance(corrected)
scc = SimpleCovariance(corrected=corrected)
@test cov(scc, x) cov(x; corrected=corrected)
@test cov(scc, x, y) cov(x, y; corrected=corrected)
@test cov(scc, X) cov(X; corrected=corrected)

@test var(scc, x) var(x; corrected=corrected)
@test std(scc, x) std(x; corrected=corrected)

# NB That we should get the same correlation regardless of `corrected`, since it
# only affects the overall scale of the covariance. This cancels out when turning
# it into a correlation matrix.
@test cor(scc, x, y) cor(x, y)
@test cor(scc, X) cor(X)
end
end
end # @testset "StatsBase.Covariance"

2 comments on commit 77590ce

@rofinn
Copy link
Member

@rofinn rofinn commented on 77590ce Jul 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if I have permission to do this, but @JuliaRegistrator register

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/64317

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.33.19 -m "<description of version>" 77590cee52eb16c960710789c9ef323dd551d580
git push origin v0.33.19

Please sign in to comment.