# Keno/julia forked from JuliaLang/julia

### Subversion checkout URL

You can clone with
or
.
Fetching contributors…

Cannot retrieve contributors at this time

380 lines (324 sloc) 10.563 kB
 mean(v::AbstractArray) = sum(v)/numel(v) mean(v::AbstractArray, dim::Int) = sum(v,dim)/size(v,dim) weighted_mean(v::AbstractArray, w::AbstractArray) = sum(v.*w)/sum(w) function median(v::AbstractArray) n = numel(v) if isodd(n) return select(v, div(n+1, 2)) else vs = sort(v) return (vs[div(n, 2)] + vs[div(n, 2) + 1]) / 2 end end ## variance with known mean function var(v::AbstractVector, m::Number, corrected::Bool) n = length(v) if n == 0 || (n == 1 && corrected) return NaN end x = v - m return dot(x, x) / (n - (corrected ? 1 : 0)) end var(v::AbstractVector, m::Number) = var(v, m, true) var(v::AbstractArray, m::Number, corrected::Bool) = var(reshape(v, numel(v)), m, corrected) var(v::AbstractArray, m::Number) = var(v, m, true) function var(v::Ranges, m::Number, corrected::Bool) f = first(v) - m s = step(v) l = length(v) if l == 0 || (l == 1 && corrected) return NaN end if corrected return f^2 * l / (l - 1) + f * s * l + s^2 * l * (2 * l - 1) / 6 else return f^2 + f * s * (l - 1) + s^2 * (l - 1) * (2 * l - 1) / 6 end end var(v::Ranges, m::Number) = var(v, m, true) ## variance function var(v::Ranges, corrected::Bool) s = step(v) l = length(v) if l == 0 || (l == 1 && corrected) return NaN end return abs2(s) * (l + 1) * (corrected ? l : (l - 1)) / 12 end var(v::AbstractVector, corrected::Bool) = var(v, mean(v), corrected) var(v::AbstractArray, corrected::Bool) = var(reshape(v, numel(v)), corrected) var(v::AbstractArray) = var(v, true) ## standard deviation with known mean std(v::AbstractArray, m::Number, corrected::Bool) = sqrt(var(v, m, corrected)) std(v::AbstractArray, m::Number) = std(v, m, true) ## standard deviation std(v::AbstractArray, corrected::Bool) = std(v, mean(v), corrected) std(v::AbstractArray) = std(v, true) std(v::Ranges, corrected::Bool) = sqrt(var(v, corrected)) std(v::Ranges) = std(v, true) ## median absolute deviation with known center mad(v::AbstractArray, center::Number) = median(abs(v - center)) ## median absolute deviation mad(v::AbstractArray) = mad(v, median(v)) ## hist ## function hist(v::StridedVector, nbins::Integer) h = zeros(Int, nbins) if nbins == 0 return h end lo, hi = min(v), max(v) if lo == hi lo -= div(nbins,2) hi += div(nbins,2) + int(isodd(nbins)) end binsz = (hi - lo) / nbins for x in v if isfinite(x) i = iround((x - lo) / binsz + 0.5) h[i > nbins ? nbins : i] += 1 end end h end hist(x) = hist(x, 10) function hist(A::StridedMatrix, nbins::Integer) m, n = size(A) h = Array(Int, nbins, n) for j=1:n h[:,j] = hist(sub(A, 1:m, j), nbins) end h end function histc(v::StridedVector, edg) n = length(edg) h = zeros(Int, n) if n == 0 return h end first = edg[1] last = edg[n] for x in v if !isless(last, x) && !isless(x, first) i = search_sorted(edg, x) while isless(x, edg[i]) i -= 1 end h[i] += 1 end end h end function histc(A::StridedMatrix, edg) m, n = size(A) h = Array(Int, length(edg), n) for j=1:n h[:,j] = histc(sub(A, 1:m, j), edg) end h end ## order (aka, rank), resolving ties using the mean rank function tiedrank(v::AbstractArray) n = length(v) place = order(v) ord = Array(Float64, n) i = 1 while i <= n j = i while j + 1 <= n && v[place[i]] == v[place[j + 1]] j += 1 end if j > i m = sum(i:j) / (j - i + 1) for k = i:j ord[place[k]] = m end else ord[place[i]] = i end i = j + 1 end return ord end ## pearson covariance functions ## # pearson covariance between two vectors, with known means function _jl_cov_pearson1(x::AbstractArray, y::AbstractArray, mx::Number, my::Number, corrected::Bool) n = numel(x) if n == 0 || (n == 1 && corrected) return NaN end x0 = x - mx y0 = y - my return (x0'*y0)[1] / (n - (corrected ? 1 : 0)) end # pearson covariance between two vectors function cov_pearson(x::AbstractVector, y::AbstractVector, corrected::Bool) if numel(x) != numel(y) error("cov_pearson: incompatible dimensions") end mx = mean(x) my = mean(y) _jl_cov_pearson1(x, y, mx, my, corrected) end cov_pearson(x::AbstractVector, y::AbstractVector) = cov_pearson(x, y, true) # pearson covariance over all pairs of columns of a matrix function _jl_cov_pearson(x::AbstractMatrix, mxs::AbstractMatrix, corrected::Bool) n = size(x, 1) if n == 0 || (n == 1 && corrected) return NaN end x0 = x - repmat(mxs, n, 1) return (x0'*x0) / (n - (corrected ? 1 : 0)) end cov_pearson(x::AbstractMatrix, corrected::Bool) = _jl_cov_pearson(x, mean(x, 1), corrected) cov_pearson(x::AbstractMatrix) = cov_pearson(x, true) # pearson covariance over all pairs of columns of two matrices function _jl_cov_pearson(x::AbstractMatrix, y::AbstractMatrix, mxs::AbstractMatrix, mys::AbstractMatrix, corrected::Bool) n = size(x, 1) if n == 0 || (n == 1 && corrected) return NaN end x0 = x - repmat(mxs, n, 1) y0 = y - repmat(mys, n, 1) return (x0'*y0) / (n - (corrected ? 1 : 0)) end function cov_pearson(x::AbstractMatrix, y::AbstractMatrix, corrected::Bool) if size(x) != size(y) error("cov_pearson: incompatible dimensions") end if is(x, y) return cov_pearson(x, corrected) end n = size(x, 1) mxs = mean(x, 1) mys = mean(y, 1) return _jl_cov_pearson(x, y, mxs, mys, corrected) end cov_pearson(x::AbstractMatrix, y::AbstractMatrix) = cov_pearson(x, y, true) ## spearman covariance functions ## # spearman covariance between two vectors function cov_spearman(x::AbstractVector, y::AbstractVector, corrected::Bool) cov_pearson(tiedrank(x), tiedrank(y), corrected) end cov_spearman(x::AbstractVector, y::AbstractVector) = cov_spearman(x, y, true) # spearman covariance over all pairs of columns of a matrix function cov_spearman(x::AbstractMatrix, corrected::Bool) cov_pearson(apply(hcat, amap(tiedrank, x, 2)), corrected) end cov_spearman(x::AbstractMatrix) = cov_spearman(x, true) # spearman covariance over all pairs of columns of two matrices function cov_spearman(x::AbstractMatrix, y::AbstractMatrix, corrected::Bool) if is(x, y) return cov_spearman(x, corrected) end cov_pearson( apply(hcat, amap(tiedrank, x, 2)), apply(hcat, amap(tiedrank, y, 2)), corrected) end cov_spearman(x::AbstractMatrix, y::AbstractMatrix) = cov_spearman(x, y, true) const cov = cov_pearson ## pearson correlation functions ## # pearson correlation between two vectors function cor_pearson(x::AbstractVector, y::AbstractVector, corrected::Bool) if numel(x) != numel(y) error("cor_pearson: incompatible dimensions") end mx = mean(x) my = mean(y) sx = std(x, mx, corrected) sy = std(y, my, corrected) return _jl_cov_pearson1(x, y, mx, my, corrected) / (sx * sy) end cor_pearson(x::AbstractVector, y::AbstractVector) = cor_pearson(x, y, true) # pearson correlation over all pairs of columns of a matrix function cor_pearson{T}(x::AbstractMatrix{T}, corrected::Bool) (n,m) = size(x) mxs = mean(x, 1) sxs = similar(mxs) for i = 1:m sxs[i] = std(sub(x, (1:n, i)), mxs[i], corrected) end R = _jl_cov_pearson(x, mxs, corrected) ./ (sxs' * sxs) R[1:m+1:end] = one(T) # fix diagonal for numerical errors return R end cor_pearson(x::AbstractMatrix) = cor_pearson(x, true) # pearson correlation over all pairs of columns of two matrices function cor_pearson(x::AbstractMatrix, y::AbstractMatrix, corrected::Bool) if size(x) != size(y) error("cor_pearson: incompatible dimensions") end if is(x, y) return cor_pearson(x, corrected) end (n,m) = size(x) mxs = mean(x, 1) mys = mean(y, 1) sxs = similar(mxs) sys = similar(mys) for i = 1:m sxs[i] = std(sub(x, (1:n, i)), mxs[i], corrected) sys[i] = std(sub(y, (1:n, i)), mys[i], corrected) end return _jl_cov_pearson(x, y, mxs, mys, corrected) ./ (sxs' * sys) end cor_pearson(x::AbstractMatrix, y::AbstractMatrix) = cor_pearson(x, y, true) ## spearman correlation functions ## # spearman correlation between two vectors function cor_spearman(x::AbstractVector, y::AbstractVector, corrected::Bool) cor_pearson(tiedrank(x), tiedrank(y), corrected) end cor_spearman(x::AbstractVector, y::AbstractVector) = cor_spearman(x, y, true) # spearman correlation over all pairs of columns of a matrix function cor_spearman(x::AbstractMatrix, corrected::Bool) cor_pearson(apply(hcat, amap(tiedrank, x, 2)), corrected) end cor_spearman(x::AbstractMatrix) = cor_spearman(x, true) # spearman correlation over all pairs of columns of two matrices function cor_spearman(x::AbstractMatrix, y::AbstractMatrix, corrected::Bool) if is(x, y) return cor_spearman(x, corrected) end cor_pearson( apply(hcat, amap(tiedrank, x, 2)), apply(hcat, amap(tiedrank, y, 2)), corrected) end cor_spearman(x::AbstractMatrix, y::AbstractMatrix) = cor_spearman(x, y, true) const cor = cor_pearson ## quantiles ## # for now, use the R/S definition of quantile; may want variants later # see ?quantile in R -- this is type 7 function quantile(x, qs) # make sure the quantiles are in [0,1] bqs = _bound_quantiles(qs) lx = length(x) lqs = length(bqs) if lx > 0 && lqs > 0 index = 1 + (lx-1) * bqs lo = ifloor(index) hi = iceil(index) sortedX = sort(x) i = index .> lo ret = float(sortedX[lo]) i = [1:length(i)][i] h = (index - lo)[i] ret[i] = (1-h) .* ret[i] + h .* sortedX[hi[i]] else ret = zeros(lqs) * NaN end ret end quantile(x, q::Number) = quantile(x, [q])[1] quartile(x) = quantile(x, [.25, .5, .75]) quintile(x) = quantile(x, [.2:.2:.8]) decile(x) = quantile(x, [.1:.1:.9]) function _bound_quantiles(qs) epsilon = 100 * eps() if (any(qs .< -epsilon) || any(qs .> 1 + epsilon)) error("quantiles out of [0,1] range!") end [min(1, max(0, q)) for q = qs] end
Something went wrong with that request. Please try again.