Skip to content

Commit

Permalink
Spearman Test
Browse files Browse the repository at this point in the history
  • Loading branch information
diegozea committed May 21, 2016
1 parent 351b8d0 commit 5a9c7e5
Show file tree
Hide file tree
Showing 4 changed files with 219 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/HypothesisTests.jl
Expand Up @@ -37,7 +37,7 @@ check_same_length(x::Vector, y::Vector) = if length(x) != length(y)
end

# Basic function for finding a p-value given a distribution and tail
pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail=:both) =
pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail=:both) =
if tail == :both
min(2 * min(cdf(dist, x), ccdf(dist, x)), 1.0)
elseif tail == :left
Expand All @@ -48,7 +48,7 @@ pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail=:both) =
throw(ArgumentError("tail=$(tail) is invalid"))
end

pvalue(dist::DiscreteUnivariateDistribution, x::Number; tail=:both) =
pvalue(dist::DiscreteUnivariateDistribution, x::Number; tail=:both) =
if tail == :both
min(2 * min(ccdf(dist, x-1), cdf(dist, x)), 1.0)
elseif tail == :left
Expand Down Expand Up @@ -138,4 +138,5 @@ include("t.jl")
include("wilcoxon.jl")
include("power_divergence.jl")
include("anderson_darling.jl")
include("spearman.jl")
end
173 changes: 173 additions & 0 deletions src/spearman.jl
@@ -0,0 +1,173 @@
# spearman.jl
# Spearman's rank correlation test in Julia
#
# Copyright (C) 2016 Diego Javier Zea
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

export CorrelationTest, SpearmanCorrelationTest

abstract CorrelationTest <: HypothesisTest

"Sum squared difference of ranks (midranks for ties)"
spearman_S(xrank, yrank) = sumabs2(xrank .- yrank)

immutable SpearmanCorrelationTest <: CorrelationTest
# Tied ranking for x and y
xrank::Vector{Float64}
yrank::Vector{Float64}
# Adjustment for ties
xtiesadj::Float64
ytiesadj::Float64
# Sum squared difference of ranks
S::Float64
# Number of points
n::Int
# Spearman's ρ
ρ::Float64

function SpearmanCorrelationTest(x, y)

n = length(x)
(n != length(y)) && throw(ErrorException("x and y must have the same length"))

xrank, xtiesadj = HypothesisTests.tiedrank_adj(x)
yrank, ytiesadj = HypothesisTests.tiedrank_adj(y)

S = spearman_S(xrank, yrank)

ρ = corspearman(x, y)

new(xrank, yrank, xtiesadj, ytiesadj, S, n, ρ)
end

end

testname(::SpearmanCorrelationTest) = "Spearman's rank correlation test"

# parameter of interest: name, value under h0, point estimate
population_param_of_interest(x::SpearmanCorrelationTest) = ("Spearman's ρ", 0.0, x.ρ)

function show_params(io::IO, x::SpearmanCorrelationTest, ident)
println(io, ident, "Number of points: ", x.n)
println(io, ident, "Spearman's ρ: ", x.ρ)
println(io, ident, "S (Sum squared difference of ranks): ", x.S)
println(io, ident, "adjustment for ties in x: ", x.xtiesadj)
println(io, ident, "adjustment for ties in y: ", x.ytiesadj)
end

function P_from_null_S_values(S_null, x::SpearmanCorrelationTest, tail)
S_null_mean = mean(S_null)
# S is approximately normally distributed
# S and ρ are inversely proportional
S_null[:] = S_null .- S_null_mean # center
S_centered = x.S - S_null_mean # center
if tail == :both
modS = abs(S_centered)
mean(S_null .<= -modS) + mean(S_null .>= modS)
elseif tail == :right
mean(S_null .<= S_centered)
elseif tail == :left
mean(S_null .>= S_centered)
else
throw(ArgumentError("tail=$(tail) is invalid"))
end
end

function spearman_P_exact(x::SpearmanCorrelationTest, tail)
S_null = Float64[ spearman_S(perm, x.yrank) for perm in permutations(x.xrank) ]
P_from_null_S_values(S_null, x, tail)
end

function spearman_P_sampling(x::SpearmanCorrelationTest, tail)
# 360000 samples gives an se(P)=0.0005 for P < 0.1
X = copy(x.xrank)
S_null = Float64[ spearman_S(shuffle!(X), x.yrank) for sample in 1:360000 ]
P_from_null_S_values(S_null, x, tail)
end

# Use estimated mean and std for the S null distribution as in:
#
# Press WH, Teukolsky SA, Vetterling WT, Flannery BP.
# Numerical recipes in C.
# Cambridge: Cambridge university press; 1996.
# Use estimated mean and std for the S null distribution as in:
#
# Press WH, Teukolsky SA, Vetterling WT, Flannery BP.
# Numerical recipes in C.
# Cambridge: Cambridge university press; 1996.
function spearman_P_estimated(x::SpearmanCorrelationTest, tail)
a = (x.n^3 - x.n)
# Numerical Recipes (14.6.6)
S_null_mean = (a/6.) - (x.xtiesadj/12.) - (x.ytiesadj/12.)
# Numerical Recipes (14.6.7)
S_null_std = sqrt((((x.n-1)*(x.n^2)*((x.n+1)^2))/36) * (1-(x.xtiesadj/a)) * (1-(x.ytiesadj/a)))
zscore = (x.S - S_null_mean)/S_null_std
# S is approximately normally distributed
# S and ρ are inversely proportional
if tail == :both
cdf(Normal(), -abs(zscore)) + ccdf(Normal(), abs(zscore))
elseif tail == :right
cdf(Normal(), zscore)
elseif tail == :left
ccdf(Normal(), zscore)
else
throw(ArgumentError("tail=$(tail) is invalid"))
end
end

# Using T test for n > 10 as in:
#
# McDonald JH.
# Handbook of biological statistics.
# Baltimore, MD: Sparky House Publishing; 2009 Aug.
function spearman_P_ttest(x::SpearmanCorrelationTest, tail)
ρ2 = x.ρ^2
df = x.n-2
t = sqrt((df*ρ2)/(1-ρ2))
if tail == :both
cdf(TDist(df), -t) + ccdf(Normal(), t)
elseif tail == :right
ccdf(TDist(df), t)
elseif tail == :left
cdf(TDist(df), t)
else
throw(ArgumentError("tail=$(tail) is invalid"))
end
end

function pvalue(x::SpearmanCorrelationTest; tail=:both, method=:estimated)
if x.n <= 10
# Exact P value using permutations
return( spearman_P_exact(x, tail) )
end
if method == :sampling
return( spearman_P_sampling(x, tail) )
elseif method == :exact
return( spearman_P_exact(x, tail) )
elseif method == :estimated
return( spearman_P_estimated(x, tail) )
elseif method == :ttest
return( spearman_P_ttest(x, tail) )
else
throw(ArgumentError("method=$(method) is invalid"))
end
end

1 change: 1 addition & 0 deletions test/runtests.jl
Expand Up @@ -9,4 +9,5 @@ include("t.jl")
include("wilcoxon.jl")
include("power_divergence.jl")
include("anderson_darling.jl")
include("spearman.jl")
include("common.jl")
42 changes: 42 additions & 0 deletions test/spearman.jl
@@ -0,0 +1,42 @@
using HypothesisTests, Base.Test

# Test Exact P value: n <= 10
let x = [ 44.4, 45.9, 41.9, 53.3, 44.7, 44.1 ],
y = [ 2.6, 3.1, 2.5, 5.0, 3.6, 4.0 ]

corr = HypothesisTests.SpearmanCorrelationTest(x, y)

# R values
@test_approx_eq corr.ρ 0.6
@test_approx_eq_eps HypothesisTests.pvalue(corr) 0.2417 0.0001
@test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right) 0.1208 0.0001
@test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:left) 0.9125 0.0001
end

show(IOBuffer(),
HypothesisTests.SpearmanCorrelationTest([ 44.4, 45.9, 41.9, 53.3, 44.7, 44.1 ],
[ 2.6, 3.1, 2.5, 5.0, 3.6, 4.0 ])
)

let x = collect(1:11),
y = [6,5,4,3,2,1,7,11,10,9,8]
# https://stat.ethz.ch/pipermail/r-devel/2009-February/052112.html
# correct P value 0.03044548

corr = HypothesisTests.SpearmanCorrelationTest(x, y)

@test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right, method=:exact) 0.03044548 1e-8
@test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right, method=:sampling) 0.030 1e-3
@test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right, method=:estimated) 0.030 1e-3
@test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right, method=:ttest) 0.03 1e-2
end

let x = collect(1:10),
y = [5,4,3,2,1,6,10,9,8,7]

# R's pspearman: 0.05443067 is the exact value
corr = HypothesisTests.SpearmanCorrelationTest(x, y)
@test_approx_eq_eps HypothesisTests.pvalue(corr) 0.05443067 1e-8
end


0 comments on commit 5a9c7e5

Please sign in to comment.