Skip to content

Commit

Permalink
Merge 934332b into 2913b7a
Browse files Browse the repository at this point in the history
  • Loading branch information
ararslan committed Dec 23, 2018
2 parents 2913b7a + 934332b commit 83f4826
Show file tree
Hide file tree
Showing 9 changed files with 919 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ makedocs(
"sampling.md",
"empirical.md",
"signalcorr.md",
"multivariate.md",
"misc.md",
"statmodels.md"]
)
Expand Down
16 changes: 16 additions & 0 deletions docs/src/multivariate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Multivariate Summary Statistics

This package provides a few methods for summarizing multivariate data.

## Partial Correlation

```@docs
partialcor
```

## Generalizations of Variance

```@docs
genvar
totalvar
```
7 changes: 7 additions & 0 deletions src/StatsBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ module StatsBase
mad, # median absolute deviation
iqr, # interquatile range

genvar, # generalized variance
totalvar, # total variation

entropy, # the entropy of a probability vector
renyientropy, # the Rényi (generalised) entropy of a probability vector
crossentropy, # cross entropy between two probability vectors
Expand Down Expand Up @@ -113,6 +116,9 @@ module StatsBase
corspearman, # spearman's rank correlation
corkendall, # kendall's rank correlation

## partialcor
partialcor, # partial correlation

## signalcorr
autocov!, autocov, # auto covariance
autocor!, autocor, # auto correlation
Expand Down Expand Up @@ -210,6 +216,7 @@ module StatsBase
include("toeplitzsolvers.jl")
include("rankcorr.jl")
include("signalcorr.jl")
include("partialcor.jl")
include("empirical.jl")
include("hist.jl")
include("misc.jl")
Expand Down
62 changes: 62 additions & 0 deletions src/partialcor.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Partial correlation

"""
partialcor(x, y, Z)
Compute the partial correlation of the vectors `x` and `y` given `Z`, which can be
a vector or matrix.
"""
function partialcor(x::AbstractVector, y::AbstractVector, Z::AbstractVecOrMat)
length(x) == length(y) == size(Z, 1) ||
throw(DimensionMismatch("Inputs must have the same number of observations"))
length(x) > 0 || throw(ArgumentError("Inputs must be non-empty"))
return Statistics.clampcor(_partialcor(x, mean(x), y, mean(y), Z))
end

function _partialcor(x::AbstractVector, μx, y::AbstractVector, μy, Z::AbstractMatrix)
p = size(Z, 2)
p == 1 && return _partialcor(x, μx, y, μy, vec(Z))
z₀ = view(Z, :, 1)
Zmz₀ = view(Z, :, 2:p)
μz₀ = mean(z₀)
rxz = _partialcor(x, μx, z₀, μz₀, Zmz₀)
rzy = _partialcor(z₀, μz₀, y, μy, Zmz₀)
rxy = _partialcor(x, μx, y, μy, Zmz₀)::typeof(rxz)
return (rxy - rxz * rzy) / (sqrt(1 - rxz^2) * sqrt(1 - rzy^2))
end

function _partialcor(x::AbstractVector, μx, y::AbstractVector, μy, z::AbstractVector)
μz = mean(z)

# Initialize all of the accumulators to 0 of the appropriate types
Σxx = abs2(zero(eltype(x)) - zero(μx))
Σyy = abs2(zero(eltype(y)) - zero(μy))
Σzz = abs2(zero(eltype(z)) - zero(μz))
Σxy = zero(Σxx * Σyy)
Σxz = zero(Σxx * Σzz)
Σzy = zero(Σzz * Σyy)

# We only want to make one pass over all of the arrays
@inbounds begin
@simd for i in eachindex(x, y, z)
xi = x[i] - μx
yi = y[i] - μy
zi = z[i] - μz

Σxx += abs2(xi)
Σyy += abs2(yi)
Σzz += abs2(zi)

Σxy += xi * yi
Σxz += xi * zi
Σzy += zi * yi
end
end

# Individual pairwise correlations
rxy = Σxy / sqrt(Σxx * Σyy)
rxz = Σxz / sqrt(Σxx * Σzz)
rzy = Σzy / sqrt(Σzz * Σyy)

return (rxy - rxz * rzy) / (sqrt(1 - rxz^2) * sqrt(1 - rzy^2))
end
27 changes: 27 additions & 0 deletions src/scalarstats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,33 @@ minus the 25th percentile.
"""
iqr(v::AbstractArray{<:Real}) = (q = quantile(v, [.25, .75]); q[2] - q[1])

# Generalized variance
"""
genvar(X)
Compute the generalized sample variance of `X`. If `X` is a vector, one-column matrix,
or other one-dimensional iterable, this is equivalent to the sample variance.
Otherwise if `X` is a matrix, this is equivalent to the determinant of the covariance
matrix of `X`.
!!! note
The generalized sample variance will be 0 if the columns of the matrix of deviations
are linearly dependent.
"""
genvar(X::AbstractMatrix) = size(X, 2) == 1 ? var(vec(X)) : det(cov(X))
genvar(itr) = var(itr)

# Total variation
"""
totalvar(X)
Compute the total sample variance of `X`. If `X` is a vector, one-column matrix,
or other one-dimensional iterable, this is equivalent to the sample variance.
Otherwise if `X` is a matrix, this is equivalent to the sum of the diagonal elements
of the covariance matrix of `X`.
"""
totalvar(X::AbstractMatrix) = sum(var(X, dims=1))
totalvar(itr) = var(itr)

#############################
#
Expand Down
Loading

0 comments on commit 83f4826

Please sign in to comment.