From 8508cb167cab4f2e66addef336ccdec664d9c862 Mon Sep 17 00:00:00 2001
From: Marius Pille <mapille@uos.de>
Date: Mon, 17 Jul 2023 15:25:05 +0200
Subject: [PATCH 1/3] Add SpearmanCorrelationTest

---
 src/correlation.jl  | 75 +++++++++++++++++++++++++++++++++++++++------
 test/correlation.jl | 24 +++++++++++++++
 2 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/src/correlation.jl b/src/correlation.jl
index 40db405a..26cf7f56 100644
--- a/src/correlation.jl
+++ b/src/correlation.jl
@@ -2,8 +2,11 @@
 # TODO: Implement `CorrelationTest` for two arguments
 
 using Statistics: clampcor
+using StatsBase: corspearman
 
-export CorrelationTest
+export CorrelationTest, SpearmanCorrelationTest
+
+abstract type AbstractCorrelationTest <: HypothesisTest end
 
 """
     CorrelationTest(x, y)
@@ -17,6 +20,7 @@ Perform a t-test for the hypothesis that ``\\text{Cor}(x,y|Z=z) = 0``, i.e. the
 correlation of vectors `x` and `y` given the matrix `Z` is zero.
 
 Implements `pvalue` for the t-test.
+
 Implements `confint` using an approximate confidence interval based on Fisher's
 ``z``-transform.
 
@@ -29,7 +33,7 @@ See also `partialcor` from StatsBase.
 * [Section testing using Student's t-distribution from Pearson correlation coefficient on Wikipedia](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Testing_using_Student's_t-distribution)
 * [K.J. Levy and S.C. Narula (1978): Testing Hypotheses concerning Partial Correlations: Some Methods and Discussion. International Statistical Review 46(2).](https://www.jstor.org/stable/1402814)
 """
-struct CorrelationTest{T<:Real} <: HypothesisTest
+struct CorrelationTest{T<:Real} <: AbstractCorrelationTest
     r::T
     n::Int
     k::Int
@@ -60,15 +64,15 @@ struct CorrelationTest{T<:Real} <: HypothesisTest
 end
 
 testname(p::CorrelationTest) =
-    string("Test for nonzero ", p.k != 0 ? "partial " : "", "correlation")
+    string("Test for nonzero ", p.k != 0 ? "partial " : "", "Pearson correlation")
 
 function population_param_of_interest(p::CorrelationTest)
-    param = p.k != 0 ? "Partial correlation" : "Correlation"
+    param = p.k != 0 ? "Partial Pearson correlation" : "Pearson correlation"
     (param, zero(p.r), p.r)
 end
 
-StatsAPI.nobs(p::CorrelationTest) = p.n
-StatsAPI.dof(p::CorrelationTest) = p.n - 2 - p.k
+StatsAPI.nobs(p::AbstractCorrelationTest) = p.n
+StatsAPI.dof(p::AbstractCorrelationTest) = p.n - 2 - p.k
 
 function StatsAPI.confint(test::CorrelationTest{T}, level::Float64=0.95) where T
     dof(test) > 1 || return (-one(T), one(T))  # Otherwise we can get NaNs
@@ -80,12 +84,65 @@ function StatsAPI.confint(test::CorrelationTest{T}, level::Float64=0.95) where T
     return (elo, ehi)
 end
 
-default_tail(::CorrelationTest) = :both
-StatsAPI.pvalue(test::CorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail)
+default_tail(::AbstractCorrelationTest) = :both
+StatsAPI.pvalue(test::AbstractCorrelationTest; tail=:both) = pvalue(TDist(dof(test)), test.t, tail=tail)
 
-function show_params(io::IO, test::CorrelationTest, indent="")
+function show_params(io::IO, test::AbstractCorrelationTest, indent="")
     println(io, indent, "number of observations:          ", nobs(test))
     println(io, indent, "number of conditional variables: ", test.k)
     println(io, indent, "t-statistic:                     ", test.t)
     println(io, indent, "degrees of freedom:              ", dof(test))
 end
+
+"""
+    SpearmanCorrelationTest(x, y)
+
+Perform a t-test for the hypothesis that ``\\text{Cor}(x,y) = 0``, i.e. the rank-based Spearman correlation 
+of vectors `x` and `y` is zero.
+
+Implements `pvalue` for the t-test.
+
+Implements `confint` using an approximate confidence interval adjusting for the non-normality of the ranks based on [1]. This is still an approximation and which performs insufficient in the case of:
+
+* small sample sizes n < 25
+* a high true population Spearman correlation
+
+In these cases a bootstrap confidence interval can perform better [2].
+
+# External resources
+[1] D. G. Bonett and T. A. Wright, “Sample size requirements for estimating pearson, kendall and spearman correlations,” Psychometrika, vol. 65, no. 1, pp. 23–28, Mar. 2000, doi: 10.1007/BF02294183.
+
+[2] A. J. Bishara and J. B. Hittner, “Confidence intervals for correlations when data are not normal,” Behav Res, vol. 49, no. 1, pp. 294–309, Feb. 2017, doi: 10.3758/s13428-016-0702-8.
+
+"""
+struct SpearmanCorrelationTest{T<:Real} <: AbstractCorrelationTest
+    r::T
+    n::Int
+    k::Int
+    t::T
+
+    # Error checking is done in `corspearman`
+    function SpearmanCorrelationTest(x::AbstractVector, y::AbstractVector)
+        r = corspearman(x, y)
+        n = length(x)
+        t = r * sqrt((n - 2) / (1 - r^2))
+        return new{typeof(r)}(r, n, 0, t)
+    end
+end
+
+testname(p::SpearmanCorrelationTest) =  "Spearman correlation"
+
+function population_param_of_interest(p::SpearmanCorrelationTest)
+    param = "Spearman Correlation"
+    (param, zero(p.r), p.r)
+end
+
+function StatsAPI.confint(test::SpearmanCorrelationTest{T}, level::Float64=0.95) where T
+    dof(test) > 1 || return (-one(T), one(T))  # Otherwise we can get NaNs
+    q = quantile(Normal(), 1 - (1-level) / 2)
+    fisher = atanh(test.r)
+    bound = sqrt((1 + test.r^2 / 2) / (dof(test)-1)) * q # Estimates variance as in Bonnet et al. (2000)
+    elo = clampcor(tanh(fisher - bound))
+    ehi = clampcor(tanh(fisher + bound))
+    return (elo, ehi)
+end
\ No newline at end of file
diff --git a/test/correlation.jl b/test/correlation.jl
index a8c10c3e..5116b1cb 100644
--- a/test/correlation.jl
+++ b/test/correlation.jl
@@ -26,6 +26,30 @@ using DelimitedFiles
     @test pvalue(x) ≈ 0.2948405 atol=1e-6
 end
 
+@testset "SpearmanCorrelation" begin
+    # Columns are line number, calcium, iron
+    nutrient = readdlm(joinpath(@__DIR__, "data", "nutrient.txt"))[:, 1:3]
+    w = SpearmanCorrelationTest(nutrient[:,2], nutrient[:,3])
+    let out = sprint(show, w)
+        @test occursin("reject h_0", out) && !occursin("fail to", out)
+    end
+    # let ci = confint(w)
+    #     @test first(ci) ≈ 0.3327138 atol=1e-6
+    #     @test last(ci) ≈ 0.4546639 atol=1e-6
+    # end
+    @test nobs(w) == 737
+    @test dof(w) == 735
+    @test pvalue(w) < 1e-25
+
+    x = SpearmanCorrelationTest(nutrient[:,1], nutrient[:,2])
+    @test occursin("fail to reject", sprint(show, x))
+    # let ci = confint(x)
+    #     @test first(ci) ≈ -0.1105478 atol=1e-6
+    #     @test last(ci) ≈ 0.0336730 atol=1e-6
+    # end
+    @test pvalue(x) ≈ 0.09275 atol=1e-2 # value from R's cor.test(..., method="spearman") which does not use a t test algorithm AS 89 
+end
+
 @testset "Partial correlation" begin
     # Columns are information, similarities, arithmetic, picture completion
     wechsler = readdlm(joinpath(@__DIR__, "data", "wechsler.txt"))[:,2:end]

From 88699006436402bad342cd430789c3aabc64f34b Mon Sep 17 00:00:00 2001
From: Marius Pille <mapille@uos.de>
Date: Mon, 17 Jul 2023 15:35:25 +0200
Subject: [PATCH 2/3] Typo

---
 src/correlation.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/correlation.jl b/src/correlation.jl
index 26cf7f56..2ee2d801 100644
--- a/src/correlation.jl
+++ b/src/correlation.jl
@@ -133,7 +133,7 @@ end
 testname(p::SpearmanCorrelationTest) =  "Spearman correlation"
 
 function population_param_of_interest(p::SpearmanCorrelationTest)
-    param = "Spearman Correlation"
+    param = "Spearman correlation"
     (param, zero(p.r), p.r)
 end
 

From ed3313b7359ee3e9e1a3dfbb9734e754e1379549 Mon Sep 17 00:00:00 2001
From: Marius Pille <mapille@uos.de>
Date: Tue, 1 Aug 2023 13:50:27 +0200
Subject: [PATCH 3/3] incorporate review

---
 docs/src/multivariate.md |  1 +
 src/correlation.jl       | 37 ++++++++++++++++++++++++-------------
 test/correlation.jl      | 27 ++++++++++++++++++---------
 3 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/docs/src/multivariate.md b/docs/src/multivariate.md
index 33422902..4e519b12 100644
--- a/docs/src/multivariate.md
+++ b/docs/src/multivariate.md
@@ -21,4 +21,5 @@ BartlettTest
 
 ```@docs
 CorrelationTest
+SpearmanCorrelationTest
 ```
diff --git a/src/correlation.jl b/src/correlation.jl
index 2ee2d801..7a50f16e 100644
--- a/src/correlation.jl
+++ b/src/correlation.jl
@@ -11,8 +11,8 @@ abstract type AbstractCorrelationTest <: HypothesisTest end
 """
     CorrelationTest(x, y)
 
-Perform a t-test for the hypothesis that ``\\text{Cor}(x,y) = 0``, i.e. the correlation 
-of vectors `x` and `y` is zero.
+Perform a t-test for the hypothesis that ``\\text{Cor}(x,y) = 0``, i.e. the Pearson 
+correlation of vectors `x` and `y` is zero.
 
     CorrelationTest(x, y, Z)
 
@@ -97,23 +97,34 @@ end
 """
     SpearmanCorrelationTest(x, y)
 
-Perform a t-test for the hypothesis that ``\\text{Cor}(x,y) = 0``, i.e. the rank-based Spearman correlation 
-of vectors `x` and `y` is zero.
+Perform a t-test for the hypothesis that ``\\text{Cor}(x,y) = 0``, i.e. the Spearman rank
+correlation ρₛ of vectors `x` and `y` is zero.
 
 Implements `pvalue` for the t-test.
 
-Implements `confint` using an approximate confidence interval adjusting for the non-normality of the ranks based on [1]. This is still an approximation and which performs insufficient in the case of:
+Implements `confint` using an approximate confidence interval adjusting for the
+non-normality of the ranks based on [1]. This is still an approximation, which performs
+insufficiently in the case of:
 
-* small sample sizes n < 25
-* a high true population Spearman correlation
+* sample sizes below 25
+* a true population Spearman correlation |ρₛ| above 0.95
+* ordinal data
 
-In these cases a bootstrap confidence interval can perform better [2].
+In these cases a bootstrap confidence interval can perform better [2,3].
 
 # External resources
-[1] D. G. Bonett and T. A. Wright, “Sample size requirements for estimating pearson, kendall and spearman correlations,” Psychometrika, vol. 65, no. 1, pp. 23–28, Mar. 2000, doi: 10.1007/BF02294183.
-
-[2] A. J. Bishara and J. B. Hittner, “Confidence intervals for correlations when data are not normal,” Behav Res, vol. 49, no. 1, pp. 294–309, Feb. 2017, doi: 10.3758/s13428-016-0702-8.
-
+[1] D. G. Bonett and T. A. Wright, “Sample size requirements for estimating Pearson, 
+Kendall and Spearman correlations,” Psychometrika, vol. 65, no. 1, pp. 23–28, Mar. 2000,
+doi: 10.1007/BF02294183.
+
+[2] A. J. Bishara and J. B. Hittner, “Confidence intervals for correlations when data are
+not normal,” Behav Res, vol. 49, no. 1, pp. 294–309, Feb. 2017,
+doi: 10.3758/s13428-016-0702-8.
+
+[3] J. Ruscio, “Constructing Confidence Intervals for Spearman’s Rank Correlation with 
+Ordinal Data: A Simulation Study Comparing Analytic and Bootstrap Methods,” 
+J. Mod. App. Stat. Meth., vol. 7, no. 2, pp. 416–434, Nov. 2008,
+doi: 10.22237/jmasm/1225512360
 """
 struct SpearmanCorrelationTest{T<:Real} <: AbstractCorrelationTest
     r::T
@@ -130,7 +141,7 @@ struct SpearmanCorrelationTest{T<:Real} <: AbstractCorrelationTest
     end
 end
 
-testname(p::SpearmanCorrelationTest) =  "Spearman correlation"
+testname(p::SpearmanCorrelationTest) = "Spearman correlation"
 
 function population_param_of_interest(p::SpearmanCorrelationTest)
     param = "Spearman correlation"
diff --git a/test/correlation.jl b/test/correlation.jl
index 5116b1cb..4080247a 100644
--- a/test/correlation.jl
+++ b/test/correlation.jl
@@ -33,21 +33,30 @@ end
     let out = sprint(show, w)
         @test occursin("reject h_0", out) && !occursin("fail to", out)
     end
-    # let ci = confint(w)
-    #     @test first(ci) ≈ 0.3327138 atol=1e-6
-    #     @test last(ci) ≈ 0.4546639 atol=1e-6
-    # end
+    let ci = confint(w)
+        # Compare against values from R using library(spearmanCI)
+        @test first(ci) ≈ 0.3938044 atol=1e-3 
+        @test last(ci) ≈ 0.5178866 atol=1e-2
+        # Test for consistency against our results
+        @test first(ci) ≈ 0.3934359 atol=1e-6
+        @test last(ci) ≈ 0.5137945 atol=1e-6
+    end
     @test nobs(w) == 737
     @test dof(w) == 735
     @test pvalue(w) < 1e-25
 
     x = SpearmanCorrelationTest(nutrient[:,1], nutrient[:,2])
     @test occursin("fail to reject", sprint(show, x))
-    # let ci = confint(x)
-    #     @test first(ci) ≈ -0.1105478 atol=1e-6
-    #     @test last(ci) ≈ 0.0336730 atol=1e-6
-    # end
-    @test pvalue(x) ≈ 0.09275 atol=1e-2 # value from R's cor.test(..., method="spearman") which does not use a t test algorithm AS 89 
+    let ci = confint(x)
+        # Compare against values from R using library(spearmanCI)
+        @test first(ci) ≈ -0.1359917 atol=1e-2
+        @test last(ci) ≈ 0.01197175 atol=1e-2
+        # Test for consistency against our results
+        @test first(ci) ≈ -0.1333692 atol=1e-6
+        @test last(ci) ≈ 0.01065576 atol=1e-6
+    end
+    @test pvalue(x) ≈ 0.09274721 atol=1e-2 # value from R's cor.test(..., method="spearman") which does not use a t test algorithm AS 89 
+    @test pvalue(x) ≈ 0.09429494 atol=1e-6 # Test for consistency against our results
 end
 
 @testset "Partial correlation" begin