Skip to content

Commit

Permalink
move statistics.jl and related method from Base to StatsBase
Browse files Browse the repository at this point in the history
  • Loading branch information
fredrikekre committed May 20, 2018
1 parent bb5138e commit 8fe4df1
Show file tree
Hide file tree
Showing 13 changed files with 1,649 additions and 80 deletions.
17 changes: 16 additions & 1 deletion src/StatsBase.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__precompile__()

module StatsBase
import Base: length, isempty, eltype, values, sum, mean, mean!, show, quantile
import Base: length, isempty, eltype, values, sum, mean, mean!, show
import Base.Cartesian: @nloops, @nref, @nextract
using Base: @irrational, @propagate_inbounds
import DataStructures: heapify!, heappop!, percolate_down!
Expand Down Expand Up @@ -193,11 +193,26 @@ module StatsBase
export midpoints
end

const BASESTATS_IN_STATSBASE = false # VERSION >= v"1.2.3"

if VERSION < v"0.7.0-DEV.3665"
myscale!(A::AbstractArray, b::Number) = scale!(A, b)
else
myscale!(A::AbstractArray, b::Number) = rmul!(A, b)
end

@static if BASESTATS_IN_STATSBASE
export cor, cov, median!, median, middle,
quantile!, quantile, std, stdm, var, varm, linreg
include("statistics.jl")
const Compatvarm = varm
else
import Base: quantile, median, var, varm!, std, stdm,
cov, covm, cor, corm, sqrt!, unscaled_covzm, cov2cor!
import Compat: varm
const Compatvarm = Compat.varm
end

# source files

include("common.jl")
Expand Down
32 changes: 16 additions & 16 deletions src/cov.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ end
## scatter matrix


scattermat_zm(x::DenseMatrix, vardim::Int) = Base.unscaled_covzm(x, vardim)
scattermat_zm(x::DenseMatrix, vardim::Int) = unscaled_covzm(x, vardim)


scattermat_zm(x::DenseMatrix, wv::AbstractWeights, vardim::Int) =
_symmetrize!(Base.unscaled_covzm(x, _scalevars(x, values(wv), vardim), vardim))
_symmetrize!(unscaled_covzm(x, _scalevars(x, values(wv), vardim), vardim))

"""
scattermat(X, [wv::AbstractWeights]; mean=nothing, vardim=1)
Expand Down Expand Up @@ -88,18 +88,18 @@ scattermat(x::DenseMatrix, wv::AbstractWeights, vardim::Int=1) =
scattermatm(x, Base.mean(x, wv, vardim), wv, vardim)

## weighted cov
Base.covm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1;
covm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1;
corrected::DepBool=nothing) =
myscale!(scattermatm(x, mean, w, vardim), varcorrection(w, depcheck(:covm, corrected)))


Base.cov(x::DenseMatrix, w::AbstractWeights, vardim::Int=1; corrected::DepBool=nothing) =
Base.covm(x, Base.mean(x, w, vardim), w, vardim; corrected=depcheck(:cov, corrected))
cov(x::DenseMatrix, w::AbstractWeights, vardim::Int=1; corrected::DepBool=nothing) =
covm(x, Base.mean(x, w, vardim), w, vardim; corrected=depcheck(:cov, corrected))

function Base.corm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1)
c = Base.covm(x, mean, w, vardim; corrected=false)
s = Base.stdm(x, w, mean, vardim; corrected=false)
Base.cov2cor!(c, s)
function corm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1)
c = covm(x, mean, w, vardim; corrected=false)
s = stdm(x, w, mean, vardim; corrected=false)
cov2cor!(c, s)
end

"""
Expand All @@ -108,33 +108,33 @@ end
Compute the Pearson correlation matrix of `X` along the dimension
`vardim` with a weighting `w` .
"""
Base.cor(x::DenseMatrix, w::AbstractWeights, vardim::Int=1) =
Base.corm(x, Base.mean(x, w, vardim), w, vardim)
cor(x::DenseMatrix, w::AbstractWeights, vardim::Int=1) =
corm(x, Base.mean(x, w, vardim), w, vardim)

if VERSION >= v"0.7.0-DEV.755"
function mean_and_cov(x::DenseMatrix, vardim::Int=1; corrected::Bool=true)
m = Compat.mean(x, dims = vardim)
return m, Base.covm(x, m, vardim, corrected=corrected)
return m, covm(x, m, vardim, corrected=corrected)
end
else
function mean_and_cov(x::DenseMatrix, vardim::Int=1; corrected::Bool=true)
m = mean(x, vardim)
return m, Base.covm(x, m, vardim, corrected)
return m, covm(x, m, vardim, corrected)
end
end
function mean_and_cov(x::DenseMatrix, wv::AbstractWeights, vardim::Int=1;
corrected::DepBool=nothing)
m = mean(x, wv, vardim)
return m, Base.cov(x, wv, vardim; corrected=depcheck(:mean_and_cov, corrected))
return m, cov(x, wv, vardim; corrected=depcheck(:mean_and_cov, corrected))
end

"""
cov2cor(C, s)
Compute the correlation matrix from the covariance matrix `C` and a vector of standard
deviations `s`. Use `Base.cov2cor!` for an in-place version.
deviations `s`. Use `StatsBase.cov2cor!` for an in-place version.
"""
cov2cor(C::AbstractMatrix, s::AbstractArray) = Base.cov2cor!(copy(C), s)
cov2cor(C::AbstractMatrix, s::AbstractArray) = cov2cor!(copy(C), s)

"""
cor2cov(C, s)
Expand Down
1 change: 0 additions & 1 deletion src/deprecates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import Base.@deprecate
import Base.depwarn
import Base.@deprecate_binding

import Base.varm, Base.stdm
@deprecate varm(v::RealArray, m::Real, wv::AbstractWeights) varm(v, wv, m)
@deprecate varm(A::RealArray, M::RealArray, wv::AbstractWeights, dim::Int) varm(v, wv, m, dim)
@deprecate stdm(v::RealArray, m::Real, wv::AbstractWeights) stdm(v, wv, m)
Expand Down
32 changes: 16 additions & 16 deletions src/moments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ the population variance is computed by replacing
* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)`
* `Weights`: `ArgumentError` (bias correction not supported)
"""
Base.varm(v::RealArray, w::AbstractWeights, m::Real; corrected::DepBool=nothing) =
varm(v::RealArray, w::AbstractWeights, m::Real; corrected::DepBool=nothing) =
_moment2(v, w, m; corrected=depcheck(:varm, corrected))

"""
Expand All @@ -38,7 +38,7 @@ replacing ``\\frac{1}{\\sum{w}}`` with a factor dependent on the type of weights
* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)`
* `Weights`: `ArgumentError` (bias correction not supported)
"""
function Base.var(v::RealArray, w::AbstractWeights; mean=nothing,
function var(v::RealArray, w::AbstractWeights; mean=nothing,
corrected::DepBool=nothing)
corrected = depcheck(:var, corrected)

Expand All @@ -51,7 +51,7 @@ end

## var along dim

function Base.varm!(R::AbstractArray, A::RealArray, w::AbstractWeights, M::RealArray,
function varm!(R::AbstractArray, A::RealArray, w::AbstractWeights, M::RealArray,
dim::Int; corrected::DepBool=nothing)
corrected = depcheck(:varm!, corrected)
myscale!(_wsum_centralize!(R, abs2, A, values(w), M, dim, true),
Expand All @@ -63,10 +63,10 @@ function var!(R::AbstractArray, A::RealArray, w::AbstractWeights, dim::Int;
corrected = depcheck(:var!, corrected)

if mean == 0
Base.varm!(R, A, w, Base.reducedim_initarray(A, dim, 0, eltype(R)), dim;
varm!(R, A, w, Base.reducedim_initarray(A, dim, 0, eltype(R)), dim;
corrected=corrected)
elseif mean == nothing
Base.varm!(R, A, w, Base.mean(A, w, dim), dim; corrected=corrected)
varm!(R, A, w, Base.mean(A, w, dim), dim; corrected=corrected)
else
# check size of mean
for i = 1:ndims(A)
Expand All @@ -78,18 +78,18 @@ function var!(R::AbstractArray, A::RealArray, w::AbstractWeights, dim::Int;
dM == dA || throw(DimensionMismatch("Incorrect size of mean."))
end
end
Base.varm!(R, A, w, mean, dim; corrected=corrected)
varm!(R, A, w, mean, dim; corrected=corrected)
end
end

function Base.varm(A::RealArray, w::AbstractWeights, M::RealArray, dim::Int;
function varm(A::RealArray, w::AbstractWeights, M::RealArray, dim::Int;
corrected::DepBool=nothing)
corrected = depcheck(:varm, corrected)
Base.varm!(similar(A, Float64, Base.reduced_indices(Compat.axes(A), dim)), A, w, M,
varm!(similar(A, Float64, Base.reduced_indices(Compat.axes(A), dim)), A, w, M,
dim; corrected=corrected)
end

function Base.var(A::RealArray, w::AbstractWeights, dim::Int; mean=nothing,
function var(A::RealArray, w::AbstractWeights, dim::Int; mean=nothing,
corrected::DepBool=nothing)
corrected = depcheck(:var, corrected)
var!(similar(A, Float64, Base.reduced_indices(Compat.axes(A), dim)), A, w, dim;
Expand All @@ -114,7 +114,7 @@ dependent on the type of weights used:
* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)`
* `Weights`: `ArgumentError` (bias correction not supported)
"""
Base.stdm(v::RealArray, w::AbstractWeights, m::Real; corrected::DepBool=nothing) =
stdm(v::RealArray, w::AbstractWeights, m::Real; corrected::DepBool=nothing) =
sqrt(varm(v, w, m, corrected=depcheck(:stdm, corrected)))

"""
Expand All @@ -135,17 +135,17 @@ weights used:
* `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)`
* `Weights`: `ArgumentError` (bias correction not supported)
"""
Base.std(v::RealArray, w::AbstractWeights; mean=nothing, corrected::DepBool=nothing) =
std(v::RealArray, w::AbstractWeights; mean=nothing, corrected::DepBool=nothing) =
sqrt.(var(v, w; mean=mean, corrected=depcheck(:std, corrected)))

Base.stdm(v::RealArray, m::RealArray, dim::Int; corrected::DepBool=nothing) =
Base.sqrt!(Compat.varm(v, m, dims=dim, corrected=depcheck(:stdm, corrected)))
stdm(v::RealArray, m::RealArray, dim::Int; corrected::DepBool=nothing) =
sqrt!(Compatvarm(v, m, dims=dim, corrected=depcheck(:stdm, corrected)))

Base.stdm(v::RealArray, w::AbstractWeights, m::RealArray, dim::Int;
stdm(v::RealArray, w::AbstractWeights, m::RealArray, dim::Int;
corrected::DepBool=nothing) =
sqrt.(varm(v, w, m, dim; corrected=depcheck(:stdm, corrected)))

Base.std(v::RealArray, w::AbstractWeights, dim::Int; mean=nothing,
std(v::RealArray, w::AbstractWeights, dim::Int; mean=nothing,
corrected::DepBool=nothing) =
sqrt.(var(v, w, dim; mean=mean, corrected=depcheck(:std, corrected)))

Expand Down Expand Up @@ -193,7 +193,7 @@ end

function mean_and_var(A::RealArray, dim::Int; corrected::Bool=true)
m = Compat.mean(A, dims = dim)
v = Compat.varm(A, m, dims = dim, corrected=corrected)
v = Compatvarm(A, m, dims = dim, corrected=corrected)
m, v
end
function mean_and_std(A::RealArray, dim::Int; corrected::Bool=true)
Expand Down
Loading

0 comments on commit 8fe4df1

Please sign in to comment.