Move method descriptions to docstrings (#106)

JuliaStats · Aug 26, 2017 · 6cc7d93 · 6cc7d93
1 parent 48558c8
commit 6cc7d93
Show file tree

Hide file tree

Showing 10 changed files with 535 additions and 46 deletions.
diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl
@@ -39,6 +39,36 @@ check_same_length(x::AbstractVector, y::AbstractVector) = if length(x) != length
     throw(DimensionMismatch("Vectors must be the same length"))
 end
 
+"""
+    confint(test::HypothesisTest, alpha = 0.05; tail = :both)
+
+Compute a confidence interval C with coverage 1-`alpha`.
+
+If `tail` is `:both` (default), then a two-sided confidence interval is returned. If `tail`
+is `:left` or `:right`, then a one-sided confidence interval is returned.
+
+!!! note
+    Most of the implemented confidence intervals are *strongly consistent*, that is, the
+    confidence interval with coverage 1-`alpha` does not contain the test statistic under
+    ``h_0`` if and only if the corresponding test rejects the null hypothesis
+    ``h_0: θ = θ_0``:
+    ```math
+        C (x, 1 − α) = \\{θ : p_θ (x) > α\\},
+    ```
+    where ``p_θ`` is the [`pvalue`](@ref) of the corresponding test.
+"""
+function confint end
+
+"""
+    pvalue(test::HypothesisTest; tail = :both)
+
+Compute the p-value for a given significance test.
+
+If `tail` is `:both` (default), then the p-value for the two-sided test is returned. If
+`tail` is `:left` or `:right`, then a one-sided test is performed.
+"""
+function pvalue end
+
 # Basic function for finding a p-value given a distribution and tail
 pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail=:both) =
     if tail == :both

diff --git a/src/anderson_darling.jl b/src/anderson_darling.jl
@@ -30,6 +30,15 @@ immutable OneSampleADTest <: ADTest
     A²::Float64 # Anderson-Darling test statistic
 end
 
+"""
+    OneSampleADTest(x::AbstractVector{<:Real}, d::UnivariateDistribution)
+
+Perform a one-sample Anderson–Darling test of the null hypothesis that the data in vector
+`x` come from the distribution `d` against the alternative hypothesis that the sample
+is not drawn from `d`.
+
+Implements: [`pvalue`](@ref)
+"""
 function OneSampleADTest{T<:Real}(x::AbstractVector{T}, d::UnivariateDistribution)
     OneSampleADTest(adstats(x, d)...)
 end
@@ -62,17 +71,31 @@ function pvalue(x::OneSampleADTest)
     end
 end
 
-
 ## K-SAMPLE ANDERSON DARLING TEST
-### k-Sample Anderson-Darling Tests, F. W. Scholz; M. A. Stephens, Journal of the American Statistical Association, Vol. 82, No. 399. (Sep., 1987), pp. 918-924.
-
 immutable KSampleADTest <: ADTest
     k::Int        # number of samples
     n::Int        # number of observations
     σ::Float64   # variance A²k
     A²k::Float64 # Anderson-Darling test statistic
 end
 
+"""
+    KSampleADTest(xs::AbstractVector{<:Real}...; modified = true)
+
+Perform a ``k``-sample Anderson–Darling test of the null hypothesis that the data in the
+``k`` vectors `xs` come from the same distribution against the alternative hypothesis that
+the samples come from different distributions.
+
+`modified` parameter enables a modified test calculation for samples whose observations
+do not all coincide.
+
+Implements: [`pvalue`](@ref)
+
+# References
+
+  * F. W. Scholz and M. A. Stephens, K-Sample Anderson-Darling Tests, Journal of the
+    American Statistical Association, Vol. 82, No. 399. (Sep., 1987), pp. 918-924.
+"""
 function KSampleADTest{T<:Real}(xs::AbstractVector{T}...; modified=true)
     KSampleADTest(a2_ksample(xs, modified)...)
 end

diff --git a/src/binomial.jl b/src/binomial.jl
@@ -34,15 +34,26 @@ immutable BinomialTest <: HypothesisTest
     BinomialTest(x::Real, n::Real, p::Real=0.5) = new(p, x, n)
 end
 
+"""
+    BinomialTest(x::Integer, n::Integer, p::Real = 0.5)
+    BinomialTest(x::AbstractVector{Bool}, p::Real = 0.5)
+
+Perform a binomial test of the null hypothesis that the distribution from which `x`
+successes were encountered in `n` draws (or alternatively from which the vector `x` was
+drawn) has success probability `p` against the alternative hypothesis that the success
+probability is not equal to `p`.
+
+Computed confidence intervals ([`confint`](@ref)) by default are Clopper-Pearson intervals.
+
+Implements: [`pvalue`](@ref), [`confint`](@ref)
+"""
 BinomialTest(x::AbstractVector{Bool}, p=0.5) =
     BinomialTest(sum(x), length(x), p)
 
 """
-```julia
-testname(::HypothesisTest)
-```
+    testname(::HypothesisTest)
 
-Returns the string value. E.g. "Binomial test", "Sign Test"
+Returns the string value, e.g. "Binomial test" or "Sign Test".
 """
 testname(::BinomialTest) = "Binomial test"
 population_param_of_interest(x::BinomialTest) = ("Probability of success", x.p, x.x/x.n) # parameter of interest: name, value under h0, point estimate
@@ -56,18 +67,38 @@ end
 pvalue(x::BinomialTest; tail=:both) = pvalue(Binomial(x.n, x.p), x.x; tail=tail)
 
 # Confidence interval
-
 """
-```julia
-function confint(x::HypothesisTest, alpha::Float64=0.05; tail=:both, method=:clopper_pearson)
-```
-Compute a confidence interval with coverage 1-alpha for binomial proportions using one of the following methods. Possible values for method are:
-
-- Clopper-Pearson :clopper_pearson (default)
-- Agresti-Coull :agresti_coull
-- Jeffrey :jeffrey
-- Wald :wald
-- Wilson :wilson
+    confint(test::BinomialTest, alpha = 0.05; tail = :both, method = :clopper_pearson)
+
+Compute a confidence interval with coverage 1-`alpha` for a binomial proportion using one
+of the following methods. Possible values for `method` are:
+
+  - `:clopper_pearson` (default): Clopper-Pearson interval is based on the binomial
+    distribution. The empirical coverage is never less than the nominal coverage of
+    1-`alpha`; it is usually too conservative.
+  - `:wald`: Wald (or normal approximation) interval relies on the standard approximation of
+    the actual binomial distribution by a normal distribution. Coverage can be erratically
+    poor for success probabilities close to zero or one.
+  - `:wilson`: Wilson score interval relies on a normal approximation. In contrast to `:wald`,
+    the standard deviation is not approximated by an empirical estimate, resulting in good
+    empirical coverages even for small numbers of draws and extreme success probabilities.
+  - `:jeffrey`: Jeffreys interval is a Bayesian credible interval obtained by using a
+    non-informative Jeffreys prior. The interval is very similar to the Wilson interval.
+  - `:agresti_coull`: Agresti-Coull interval is a simplified version of the Wilson interval;
+    both are centered around the same value. The Agresti Coull interval has higher or equal
+    coverage.
+  - `:arcsine`: Confidence interval computed using the arcsine transformation to make
+    ``var(p)`` independent of the probability ``p``.
+
+# References
+
+  * Brown, L.D., Cai, T.T., and DasGupta, A. Interval estimation for a binomial proportion.
+    Statistical Science, 16(2):101–117, 2001.
+
+# External links
+
+  * [Binomial confidence interval on Wikipedia](https://en.wikipedia.org/wiki/
+    Binomial_proportion_confidence_interval)
 """
 function StatsBase.confint(x::BinomialTest, alpha::Float64=0.05; tail=:both, method=:clopper_pearson)
     check_alpha(alpha)
@@ -152,9 +183,20 @@ immutable SignTest <: HypothesisTest
     data
 end
 
+"""
+    SignTest(x::AbstractVector{T<:Real}, median::Real = 0)
+    SignTest(x::AbstractVector{T<:Real}, y::AbstractVector{T<:Real}, median::Real = 0)
+
+Perform a sign test of the null hypothesis that the distribution from which `x`
+(or `x - y` if `y` is provided) was drawn has median `median` against the alternative
+hypothesis that the median is not equal to `median`.
+
+Implements: [`pvalue`](@ref), [`confint`](@ref)
+"""
 SignTest{T<:Real}(x::AbstractVector{T}, median::Real=0) =
     SignTest(median, sum(x .> median), sum(x .!= median), sort(x))
-SignTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}) = SignTest(x - y, 0.0)
+SignTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}) =
+    SignTest(x - y, 0.0)
 
 testname(::SignTest) = "Sign Test"
 population_param_of_interest(x::SignTest) = ("Median", x.median, median(x.data)) # parameter of interest: name, value under h0, point estimate
@@ -169,26 +211,8 @@ function show_params(io::IO, x::SignTest, ident="")
     println(io, ident, text2, x.x)
 end
 
-"""
-```julia
-pvalue(x::HypothesisTest; tail=:both)
-```
-
-Compute the p-value for a given significance test.
-
-If tail is :both (default), then the p-value for the two-sided test is returned. If tail is :left or :right, then a one-sided test is performed.
-"""
 pvalue(x::SignTest; tail=:both) = pvalue(Binomial(x.n, 0.5), x.x; tail=tail)
 
-"""
-```julia
-function confint(x::HypothesisTest, alpha::Float64=0.05; tail=:both)
-```
-
-Compute a confidence interval C with coverage 1-alpha.
-
-If tail is :both (default), then a two-sided confidence interval is returned. If tail is :left or :right, then a one-sided confidence interval is returned
-"""
 function StatsBase.confint(x::SignTest, alpha::Float64=0.05; tail=:both)
     check_alpha(alpha)
 

diff --git a/src/fisher.jl b/src/fisher.jl
@@ -24,6 +24,33 @@
 
 export FisherExactTest
 
+"""
+    FisherExactTest(a::Integer, b::Integer, c::Integer, d::Integer)
+
+Perform Fisher's exact test of the null hypothesis that the success probabilities ``a/c``
+and ``b/d`` are equal, that is the odds ratio ``(a/c) / (b/d)`` is one, against the
+alternative hypothesis that they are not equal.
+
+The contingency table is structured as:
+
+| -  | X1 | X2 |
+|:--:|:--:|:--:|
+|*Y1*| a  | b  |
+|*Y2*| c  | d  |
+
+!!! note
+    The `show` function output contains the conditional maximum likelihood estimate of the
+    odds ratio rather than the sample odds ratio; it maximizes the likelihood given by
+    Fisher's non-central hypergeometric distribution.
+
+Implements: [`pvalue`](@ref), [`confint`](@ref)
+
+# References
+
+  * Fay, M.P., Supplementary material to "Confidence intervals that match Fisher’s exact or
+    Blaker’s exact tests". Biostatistics, Volume 11, Issue 2, 1 April 2010, Pages 373–374,
+    [link](https://doi.org/10.1093/biostatistics/kxp050)
+"""
 immutable FisherExactTest <: HypothesisTest
     # Format:
     # X1  X2
@@ -59,6 +86,37 @@ function show_params(io::IO, x::FisherExactTest, ident="")
 end
 
 # DOC: for tail=:both there exist multiple ``method``s for computing a pvalue and the corresponding ci.
+"""
+    pvalue(x::FisherExactTest; tail = :both, method = :central)
+
+Compute the p-value for a given Fisher exact test.
+
+The one-sided p-values are based on Fisher's non-central hypergeometric distribution
+``f_ω(i)`` with odds ratio ``ω``:
+```math
+    \\begin{align*}
+        p_ω^{(\\text{left})} &=\\sum_{i ≤ a} f_ω(i)\\\\
+        p_ω^{(\\text{right})} &=\\sum_{i ≥ a} f_ω(i)
+    \\end{align*}
+```
+For `tail = :both`, possible values for `method` are:
+
+  - `:central` (default): Central interval, i.e. the p-value is two times the minimum of the
+    one-sided p-values.
+  - `:minlike`: Minimum likelihood interval, i.e. the p-value is computed by summing all
+    tables with the same marginals that are equally or less probable:
+    ```math
+        p_ω = \\sum_{f_ω(i)≤ f_ω(a)} f_ω(i)
+    ```
+
+# References
+
+  * Gibbons, J.D., Pratt, J.W., P-values: Interpretation and Methodology, American
+    Statistican, 29(1):20-25, 1975.
+  * Fay, M.P., Supplementary material to "Confidence intervals that match Fisher’s exact or
+    Blaker’s exact tests". Biostatistics, Volume 11, Issue 2, 1 April 2010, Pages 373–374,
+    [link](https://doi.org/10.1093/biostatistics/kxp050)
+"""
 function pvalue(x::FisherExactTest; tail=:both, method=:central)
     if tail == :both && method != :central
         if method == :minlike
@@ -97,6 +155,25 @@ function pvalue_both_minlike(x::FisherExactTest, ω::Float64=1.0)
 end
 
 # confidence interval by inversion of p-value
+"""
+    confint(x::FisherExactTest, alpha::Float64=0.05; tail=:both, method=:central)
+
+Compute a confidence interval with coverage 1 - `alpha`. One-sided intervals are based on
+Fisher's non-central hypergeometric distribution. For `tail = :both`, the only
+`method` implemented yet is the central interval (`:central`).
+
+!!! note
+    Since the p-value is not necessarily unimodal, the corresponding confidence region might
+    not be an interval.
+
+# References
+
+  * Gibbons, J.D, Pratt, J.W. P-values: Interpretation and Methodology, American
+    Statistican, 29(1):20-25, 1975.
+  * Fay, M.P., Supplementary material to "Confidence intervals that match Fisher’s exact or
+    Blaker’s exact tests". Biostatistics, Volume 11, Issue 2, 1 April 2010, Pages 373–374,
+    [link](https://doi.org/10.1093/biostatistics/kxp050)
+"""
 function StatsBase.confint(x::FisherExactTest, alpha::Float64=0.05; tail=:both, method=:central)
     check_alpha(alpha)
     dist(ω) = FisherNoncentralHypergeometric(x.a+x.b, x.c+x.d, x.a+x.c, ω)

diff --git a/src/kolmogorov_smirnov.jl b/src/kolmogorov_smirnov.jl
@@ -54,6 +54,15 @@ immutable ExactOneSampleKSTest <: ExactKSTest
     δn::Float64 # supremum of the negative CDF differences
 end
 
+"""
+    ExactOneSampleKSTest(x::AbstractVector{<:Real}, d::UnivariateDistribution)
+
+Perform a one-sample exact Kolmogorov–Smirnov test of the null hypothesis that the data in
+vector `x` comes from the distribution `d` against the alternative hypothesis that the
+sample is not drawn from `d`.
+
+Implements: [`pvalue`](@ref)
+"""
 function ExactOneSampleKSTest{T<:Real}(x::AbstractVector{T}, d::UnivariateDistribution)
     if length(x) > length(unique(x))
         warn("This test is inaccurate with ties")
@@ -89,6 +98,15 @@ immutable ApproximateOneSampleKSTest <: ApproximateKSTest
     δn::Float64 # suproemum of the negative CDF differences
 end
 
+"""
+    ApproximateOneSampleKSTest(x::AbstractVector{<:Real}, d::UnivariateDistribution)
+
+Perform an asymptotic one-sample Kolmogorov–Smirnov test of the null hypothesis that the
+data in vector `x` comes from the distribution `d` against the alternative hypothesis
+that the sample is not drawn from `d`.
+
+Implements: [`pvalue`](@ref)
+"""
 function ApproximateOneSampleKSTest{T<:Real}(x::AbstractVector{T}, d::UnivariateDistribution)
     if length(x) > length(unique(x))
         warn("This test is inaccurate with ties")
@@ -129,6 +147,20 @@ immutable ApproximateTwoSampleKSTest <: ApproximateKSTest
     δn::Float64 # suproemum of the negative CDF differences
 end
 
+"""
+    ApproximateTwoSampleKSTest(x::AbstractVector{<:Real}, y::AbstractVector{<:Real})
+
+Perform an asymptotic two-sample Kolmogorov–Smirnov-test of the null hypothesis that `x`
+and `y` are drawn from the same distribution against the alternative hypothesis that they
+come from different distributions.
+
+Implements: [`pvalue`](@ref)
+
+# External links
+
+  * [Approximation of one-sided test (Encyclopedia of Mathematics)
+    ](https://www.encyclopediaofmath.org/index.php/Kolmogorov-Smirnov_test)
+"""
 function ApproximateTwoSampleKSTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S})
     n_x, n_y = length(x), length(y)
     if n_x+n_y > length(unique([x; y]))