JuliaStats · diegozea · May 21, 2016 · May 24, 2016 · May 24, 2016 · May 24, 2016
diff --git a/src/spearman.jl b/src/spearman.jl
@@ -27,7 +27,7 @@ export CorrelationTest, SpearmanCorrelationTest
 abstract CorrelationTest <: HypothesisTest
 
 "Sum squared difference of ranks (midranks for ties)"
-spearman_S(xrank,yrank) = sumabs2(xrank .- yrank)
+spearman_S(xrank, yrank) = sumabs2(xrank .- yrank)
 
 immutable SpearmanCorrelationTest <: CorrelationTest
     # Tied ranking for x and y
@@ -43,37 +43,37 @@ immutable SpearmanCorrelationTest <: CorrelationTest
     # Spearman's ρ
     ρ::Float64
 
-    function SpearmanCorrelationTest(x,y)
+    function SpearmanCorrelationTest(x, y)
 
         n = length(x)
         (n != length(y)) && throw(ErrorException("x and y must have the same length"))
 
-        xrank,xtiesadj = HypothesisTests.tiedrank_adj(x)
-        yrank,ytiesadj = HypothesisTests.tiedrank_adj(y)
+        xrank, xtiesadj = HypothesisTests.tiedrank_adj(x)
+        yrank, ytiesadj = HypothesisTests.tiedrank_adj(y)
 
-        S = spearman_S(xrank,yrank)
+        S = spearman_S(xrank, yrank)
 
-        ρ = corspearman(x,y)
+        ρ = corspearman(x, y)
 
-        new(xrank,yrank,xtiesadj,ytiesadj,S,n,ρ)
+        new(xrank, yrank, xtiesadj, ytiesadj, S, n, ρ)
     end
 
 end
 
 testname(::SpearmanCorrelationTest) = "Spearman's rank correlation test"
 
-# parameter of interest: name, value under h0, point estimate
-population_param_of_interest(x::SpearmanCorrelationTest) = ("Spearman's ρ",0.0,x.ρ)
+# parameter of interest: name,  value under h0,  point estimate
+population_param_of_interest(x::SpearmanCorrelationTest) = ("Spearman's ρ", 0.0, x.ρ)
 
-function show_params(io::IO,x::SpearmanCorrelationTest,ident)
-    println(io,ident,"Number of points:                    ",x.n)
-    println(io,ident,"Spearman's ρ:                        ",x.ρ)
-    println(io,ident,"S (Sum squared difference of ranks): ",x.S)
-    println(io,ident,"adjustment for ties in x:            ",x.xtiesadj)
-    println(io,ident,"adjustment for ties in y:            ",x.ytiesadj)
+function show_params(io::IO, x::SpearmanCorrelationTest, ident)
+    println(io, ident, "Number of points:                    ", x.n)
+    println(io, ident, "Spearman's ρ:                        ", x.ρ)
+    println(io, ident, "S (Sum squared difference of ranks): ", x.S)
+    println(io, ident, "adjustment for ties in x:            ", x.xtiesadj)
+    println(io, ident, "adjustment for ties in y:            ", x.ytiesadj)
 end
 
-function P_from_null_S_values(S_null,x::SpearmanCorrelationTest,tail)
+function P_from_null_S_values(S_null, x::SpearmanCorrelationTest, tail)
     S_null_mean = mean(S_null)
     # S is approximately normally distributed
     # S and ρ are inversely proportional
@@ -91,24 +91,24 @@ function P_from_null_S_values(S_null,x::SpearmanCorrelationTest,tail)
     end
 end
 
-function spearman_P_exact(x::SpearmanCorrelationTest,tail)
-    S_null = Float64[ spearman_S(perm,x.yrank) for perm in permutations(x.xrank) ]
-    P_from_null_S_values(S_null,x,tail)
+function spearman_P_exact(x::SpearmanCorrelationTest, tail)
+    S_null = Float64[ spearman_S(perm, x.yrank) for perm in permutations(x.xrank) ]
+    P_from_null_S_values(S_null, x, tail)
 end
 
-function spearman_P_sampling(x::SpearmanCorrelationTest,tail)
+function spearman_P_sampling(x::SpearmanCorrelationTest, tail)
     # 360000 samples gives an se(P) < 0.0005 for P < 0.1
     X = copy(x.xrank)
-    S_null = Float64[ spearman_S(shuffle!(X),x.yrank) for sample in 1:360000 ]
-    P_from_null_S_values(S_null,x,tail)
+    S_null = Float64[ spearman_S(shuffle!(X), x.yrank) for sample in 1:360000 ]
+    P_from_null_S_values(S_null, x, tail)
 end
 
 # Use estimated mean and std for the S null distribution as in:
 #
 # Press WH, Teukolsky SA, Vetterling WT, Flannery BP.
 # Numerical recipes in C.
 # Cambridge: Cambridge university press; 1996.
-function spearman_P_estimated(x::SpearmanCorrelationTest,tail)
+function spearman_P_estimated(x::SpearmanCorrelationTest, tail)
     N = float(x.n)
     a = (N^3 - N)
     # Numerical Recipes (14.6.6)
@@ -119,11 +119,11 @@ function spearman_P_estimated(x::SpearmanCorrelationTest,tail)
     # S is approximately normally distributed
     # S and ρ are inversely proportional
     if tail == :both
-        cdf(Normal(),-abs(zscore)) + ccdf(Normal(),abs(zscore))
+        cdf(Normal(), -abs(zscore)) + ccdf(Normal(), abs(zscore))
     elseif tail == :right
-        cdf(Normal(),zscore)
+        cdf(Normal(), zscore)
     elseif tail == :left
-        ccdf(Normal(),zscore)
+        ccdf(Normal(), zscore)
     else
         throw(ArgumentError("tail=$(tail) is invalid"))
     end
@@ -134,16 +134,16 @@ end
 # McDonald JH.
 # Handbook of biological statistics.
 # Baltimore, MD: Sparky House Publishing; 2009 Aug.
-function spearman_P_ttest(x::SpearmanCorrelationTest,tail)
+function spearman_P_ttest(x::SpearmanCorrelationTest, tail)
     ρ2 = x.ρ^2
     df = x.n-2
     t = sqrt((df*ρ2)/(1-ρ2))
     if tail == :both
-        cdf(TDist(df),-t) + ccdf(Normal(),t)
+        cdf(TDist(df), -t) + ccdf(Normal(), t)
     elseif tail == :right
-        ccdf(TDist(df),t)
+        ccdf(TDist(df), t)
     elseif tail == :left
-        cdf(TDist(df),t)
+        cdf(TDist(df), t)
     else
         throw(ArgumentError("tail=$(tail) is invalid"))
     end
@@ -152,16 +152,16 @@ end
 function pvalue(x::SpearmanCorrelationTest; tail=:both, method=:estimated)
     if x.n <= 10
         # Exact P value using permutations
-        return(spearman_P_exact(x,tail))
+        return(spearman_P_exact(x, tail))
     end
     if method == :sampling
-        return(spearman_P_sampling(x,tail))
+        return(spearman_P_sampling(x, tail))
     elseif method == :exact
-        return(spearman_P_exact(x,tail))
+        return(spearman_P_exact(x, tail))
     elseif method == :estimated
-        return(spearman_P_estimated(x,tail))
+        return(spearman_P_estimated(x, tail))
     elseif method == :ttest
-        return(spearman_P_ttest(x,tail))
+        return(spearman_P_ttest(x, tail))
     else
         throw(ArgumentError("method=$(method) is invalid"))
     end

diff --git a/test/spearman.jl b/test/spearman.jl
@@ -1,51 +1,52 @@
 using HypothesisTests, Base.Test
 
 # Test Exact P value: n <= 10
-let x = [44.4,45.9,41.9,53.3,44.7,44.1],
-    y = [2.6,3.1,2.5,5.0,3.6,4.0]
+let x = [44.4, 45.9, 41.9, 53.3, 44.7, 44.1],
+    y = [2.6, 3.1, 2.5, 5.0, 3.6, 4.0]
 
-    corr = HypothesisTests.SpearmanCorrelationTest(x,y)
+    corr = HypothesisTests.SpearmanCorrelationTest(x, y)
 
     # R values
     @test_approx_eq corr.ρ 0.6
     @test_approx_eq_eps HypothesisTests.pvalue(corr) 0.2417 0.0001
-    @test_approx_eq_eps HypothesisTests.pvalue(corr,tail=:right) 0.1208 0.0001
-    @test_approx_eq_eps HypothesisTests.pvalue(corr,tail=:left) 0.9125 0.0001
+    @test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right) 0.1208 0.0001
+    @test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:left) 0.9125 0.0001
 end
 
 show(IOBuffer(),
-     HypothesisTests.SpearmanCorrelationTest([44.4,45.9,41.9,53.3,44.,44.1],
-                                             [2.6,3.1,2.5,5.0,3.6,4.0])
+     HypothesisTests.SpearmanCorrelationTest([44.4, 45.9, 41.9, 53.3, 44., 44.1],
+                                             [2.6, 3.1, 2.5, 5.0, 3.6, 4.0])
      )
 
 let x = collect(1:11),
-    y = [6,5,4,3,2,1,7,11,10,9,8]
+    y = [6, 5, 4, 3, 2, 1, 7, 11, 10, 9, 8]
     # https://stat.ethz.ch/pipermail/r-devel/2009-February/052112.html
     # correct P value 0.03044548
 
-    corr = HypothesisTests.SpearmanCorrelationTest(x,y)
+    corr = HypothesisTests.SpearmanCorrelationTest(x, y)
 
     srand(12345) # Seed for method=:sampling
 
-    @test_approx_eq_eps HypothesisTests.pvalue(corr,tail=:right,method=:exact)     0.03044548 1e-8
-    @test_approx_eq_eps HypothesisTests.pvalue(corr,tail=:right,method=:sampling)  0.030      1e-3
-    @test_approx_eq_eps HypothesisTests.pvalue(corr,tail=:right,method=:estimated) 0.030      1e-3
-    @test_approx_eq_eps HypothesisTests.pvalue(corr,tail=:right,method=:ttest)     0.03       1e-2
+    @test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right, method=:exact)     0.03044548 1e-8
+    @test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right, method=:sampling)  0.030      1e-3
+    @test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right, method=:estimated) 0.030      1e-3
+    @test_approx_eq_eps HypothesisTests.pvalue(corr, tail=:right, method=:ttest)     0.03       1e-2
 end
 
 let x = collect(1:10),
-    y = [5,4,3,2,1,6,10,9,8,7]
+    y = [5, 4, 3, 2, 1, 6, 10, 9, 8, 7]
 
     # R's pspearman: 0.05443067 is the exact value
-    corr = HypothesisTests.SpearmanCorrelationTest(x,y)
+    corr = HypothesisTests.SpearmanCorrelationTest(x, y)
     @test_approx_eq_eps HypothesisTests.pvalue(corr) 0.05443067 1e-8
 end
 
-# Using (N-1)N²(N+1)² overflows with N = 10153
+# Using (N-1)N²(N+1)² overflows with N = 10153 and sqrt((N-1)N²(N+1)²) throws an error
+# pvalue avoids the Int overflow using float(N) and sqrt(N-1)N(N+1) since N > 0
 srand(12345) # Seed for rand
 let x = rand(10153)
 
-    corr = SpearmanCorrelationTest(x,x)
+    corr = SpearmanCorrelationTest(x, x)
 
     @test_approx_eq corr.ρ 1.0
     @test_approx_eq pvalue(corr) 0.0
@@ -54,18 +55,18 @@ end
 
 # Test S value with ties
 
-function rho_with_ties(S,N,tx,ty) # S == D
+function rho_with_ties(S, N, tx, ty) # S == D
     # Equation (14.6.5) from Numerical Recipes for rho with ties
     a=(N^3)-N
     (1-((6/a)*(S+(tx/12)+(ty/12)))) / (sqrt(1-(tx/a))*sqrt(1-(ty/a)))
 end
 
-function diff_rho(x,y)
-    corr = SpearmanCorrelationTest(x,y)
-    corr.ρ - rho_with_ties(corr.S,corr.n,corr.xtiesadj,corr.ytiesadj)
+function diff_rho(x, y)
+    corr = SpearmanCorrelationTest(x, y)
+    corr.ρ - rho_with_ties(corr.S, corr.n, corr.xtiesadj, corr.ytiesadj)
 end
 
 srand(12345) # Seed for rand
 for i in 20:100
-    @test_approx_eq_eps diff_rho(rand(1:10,i),rand(1:10,i)) 0.0 1e-10
+    @test_approx_eq_eps diff_rho(rand(1:10, i), rand(1:10, i)) 0.0 1e-10
 end