Skip to content

Commit

Permalink
Move method descriptions to docstrings (#106)
Browse files Browse the repository at this point in the history
  • Loading branch information
BenjaminBorn authored and ararslan committed Aug 26, 2017
1 parent 48558c8 commit 6cc7d93
Show file tree
Hide file tree
Showing 10 changed files with 535 additions and 46 deletions.
30 changes: 30 additions & 0 deletions src/HypothesisTests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,36 @@ check_same_length(x::AbstractVector, y::AbstractVector) = if length(x) != length
throw(DimensionMismatch("Vectors must be the same length"))
end

"""
confint(test::HypothesisTest, alpha = 0.05; tail = :both)
Compute a confidence interval C with coverage 1-`alpha`.
If `tail` is `:both` (default), then a two-sided confidence interval is returned. If `tail`
is `:left` or `:right`, then a one-sided confidence interval is returned.
!!! note
Most of the implemented confidence intervals are *strongly consistent*, that is, the
confidence interval with coverage 1-`alpha` does not contain the test statistic under
``h_0`` if and only if the corresponding test rejects the null hypothesis
``h_0: θ = θ_0``:
```math
C (x, 1 − α) = \\{θ : p_θ (x) > α\\},
```
where ``p_θ`` is the [`pvalue`](@ref) of the corresponding test.
"""
function confint end

"""
pvalue(test::HypothesisTest; tail = :both)
Compute the p-value for a given significance test.
If `tail` is `:both` (default), then the p-value for the two-sided test is returned. If
`tail` is `:left` or `:right`, then a one-sided test is performed.
"""
function pvalue end

# Basic function for finding a p-value given a distribution and tail
pvalue(dist::ContinuousUnivariateDistribution, x::Number; tail=:both) =
if tail == :both
Expand Down
29 changes: 26 additions & 3 deletions src/anderson_darling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ immutable OneSampleADTest <: ADTest
::Float64 # Anderson-Darling test statistic
end

"""
OneSampleADTest(x::AbstractVector{<:Real}, d::UnivariateDistribution)
Perform a one-sample Anderson–Darling test of the null hypothesis that the data in vector
`x` come from the distribution `d` against the alternative hypothesis that the sample
is not drawn from `d`.
Implements: [`pvalue`](@ref)
"""
function OneSampleADTest{T<:Real}(x::AbstractVector{T}, d::UnivariateDistribution)
OneSampleADTest(adstats(x, d)...)
end
Expand Down Expand Up @@ -62,17 +71,31 @@ function pvalue(x::OneSampleADTest)
end
end


## K-SAMPLE ANDERSON DARLING TEST
### k-Sample Anderson-Darling Tests, F. W. Scholz; M. A. Stephens, Journal of the American Statistical Association, Vol. 82, No. 399. (Sep., 1987), pp. 918-924.

immutable KSampleADTest <: ADTest
k::Int # number of samples
n::Int # number of observations
σ::Float64 # variance A²k
A²k::Float64 # Anderson-Darling test statistic
end

"""
KSampleADTest(xs::AbstractVector{<:Real}...; modified = true)
Perform a ``k``-sample Anderson–Darling test of the null hypothesis that the data in the
``k`` vectors `xs` come from the same distribution against the alternative hypothesis that
the samples come from different distributions.
`modified` parameter enables a modified test calculation for samples whose observations
do not all coincide.
Implements: [`pvalue`](@ref)
# References
* F. W. Scholz and M. A. Stephens, K-Sample Anderson-Darling Tests, Journal of the
American Statistical Association, Vol. 82, No. 399. (Sep., 1987), pp. 918-924.
"""
function KSampleADTest{T<:Real}(xs::AbstractVector{T}...; modified=true)
KSampleADTest(a2_ksample(xs, modified)...)
end
Expand Down
92 changes: 58 additions & 34 deletions src/binomial.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,26 @@ immutable BinomialTest <: HypothesisTest
BinomialTest(x::Real, n::Real, p::Real=0.5) = new(p, x, n)
end

"""
BinomialTest(x::Integer, n::Integer, p::Real = 0.5)
BinomialTest(x::AbstractVector{Bool}, p::Real = 0.5)
Perform a binomial test of the null hypothesis that the distribution from which `x`
successes were encountered in `n` draws (or alternatively from which the vector `x` was
drawn) has success probability `p` against the alternative hypothesis that the success
probability is not equal to `p`.
Computed confidence intervals ([`confint`](@ref)) by default are Clopper-Pearson intervals.
Implements: [`pvalue`](@ref), [`confint`](@ref)
"""
BinomialTest(x::AbstractVector{Bool}, p=0.5) =
BinomialTest(sum(x), length(x), p)

"""
```julia
testname(::HypothesisTest)
```
testname(::HypothesisTest)
Returns the string value. E.g. "Binomial test", "Sign Test"
Returns the string value, e.g. "Binomial test" or "Sign Test".
"""
testname(::BinomialTest) = "Binomial test"
population_param_of_interest(x::BinomialTest) = ("Probability of success", x.p, x.x/x.n) # parameter of interest: name, value under h0, point estimate
Expand All @@ -56,18 +67,38 @@ end
pvalue(x::BinomialTest; tail=:both) = pvalue(Binomial(x.n, x.p), x.x; tail=tail)

# Confidence interval

"""
```julia
function confint(x::HypothesisTest, alpha::Float64=0.05; tail=:both, method=:clopper_pearson)
```
Compute a confidence interval with coverage 1-alpha for binomial proportions using one of the following methods. Possible values for method are:
- Clopper-Pearson :clopper_pearson (default)
- Agresti-Coull :agresti_coull
- Jeffrey :jeffrey
- Wald :wald
- Wilson :wilson
confint(test::BinomialTest, alpha = 0.05; tail = :both, method = :clopper_pearson)
Compute a confidence interval with coverage 1-`alpha` for a binomial proportion using one
of the following methods. Possible values for `method` are:
- `:clopper_pearson` (default): Clopper-Pearson interval is based on the binomial
distribution. The empirical coverage is never less than the nominal coverage of
1-`alpha`; it is usually too conservative.
- `:wald`: Wald (or normal approximation) interval relies on the standard approximation of
the actual binomial distribution by a normal distribution. Coverage can be erratically
poor for success probabilities close to zero or one.
- `:wilson`: Wilson score interval relies on a normal approximation. In contrast to `:wald`,
the standard deviation is not approximated by an empirical estimate, resulting in good
empirical coverages even for small numbers of draws and extreme success probabilities.
- `:jeffrey`: Jeffreys interval is a Bayesian credible interval obtained by using a
non-informative Jeffreys prior. The interval is very similar to the Wilson interval.
- `:agresti_coull`: Agresti-Coull interval is a simplified version of the Wilson interval;
both are centered around the same value. The Agresti Coull interval has higher or equal
coverage.
- `:arcsine`: Confidence interval computed using the arcsine transformation to make
``var(p)`` independent of the probability ``p``.
# References
* Brown, L.D., Cai, T.T., and DasGupta, A. Interval estimation for a binomial proportion.
Statistical Science, 16(2):101–117, 2001.
# External links
* [Binomial confidence interval on Wikipedia](https://en.wikipedia.org/wiki/
Binomial_proportion_confidence_interval)
"""
function StatsBase.confint(x::BinomialTest, alpha::Float64=0.05; tail=:both, method=:clopper_pearson)
check_alpha(alpha)
Expand Down Expand Up @@ -152,9 +183,20 @@ immutable SignTest <: HypothesisTest
data
end

"""
SignTest(x::AbstractVector{T<:Real}, median::Real = 0)
SignTest(x::AbstractVector{T<:Real}, y::AbstractVector{T<:Real}, median::Real = 0)
Perform a sign test of the null hypothesis that the distribution from which `x`
(or `x - y` if `y` is provided) was drawn has median `median` against the alternative
hypothesis that the median is not equal to `median`.
Implements: [`pvalue`](@ref), [`confint`](@ref)
"""
SignTest{T<:Real}(x::AbstractVector{T}, median::Real=0) =
SignTest(median, sum(x .> median), sum(x .!= median), sort(x))
SignTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}) = SignTest(x - y, 0.0)
SignTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}) =
SignTest(x - y, 0.0)

testname(::SignTest) = "Sign Test"
population_param_of_interest(x::SignTest) = ("Median", x.median, median(x.data)) # parameter of interest: name, value under h0, point estimate
Expand All @@ -169,26 +211,8 @@ function show_params(io::IO, x::SignTest, ident="")
println(io, ident, text2, x.x)
end

"""
```julia
pvalue(x::HypothesisTest; tail=:both)
```
Compute the p-value for a given significance test.
If tail is :both (default), then the p-value for the two-sided test is returned. If tail is :left or :right, then a one-sided test is performed.
"""
pvalue(x::SignTest; tail=:both) = pvalue(Binomial(x.n, 0.5), x.x; tail=tail)

"""
```julia
function confint(x::HypothesisTest, alpha::Float64=0.05; tail=:both)
```
Compute a confidence interval C with coverage 1-alpha.
If tail is :both (default), then a two-sided confidence interval is returned. If tail is :left or :right, then a one-sided confidence interval is returned
"""
function StatsBase.confint(x::SignTest, alpha::Float64=0.05; tail=:both)
check_alpha(alpha)

Expand Down
77 changes: 77 additions & 0 deletions src/fisher.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,33 @@

export FisherExactTest

"""
FisherExactTest(a::Integer, b::Integer, c::Integer, d::Integer)
Perform Fisher's exact test of the null hypothesis that the success probabilities ``a/c``
and ``b/d`` are equal, that is the odds ratio ``(a/c) / (b/d)`` is one, against the
alternative hypothesis that they are not equal.
The contingency table is structured as:
| - | X1 | X2 |
|:--:|:--:|:--:|
|*Y1*| a | b |
|*Y2*| c | d |
!!! note
The `show` function output contains the conditional maximum likelihood estimate of the
odds ratio rather than the sample odds ratio; it maximizes the likelihood given by
Fisher's non-central hypergeometric distribution.
Implements: [`pvalue`](@ref), [`confint`](@ref)
# References
* Fay, M.P., Supplementary material to "Confidence intervals that match Fisher’s exact or
Blaker’s exact tests". Biostatistics, Volume 11, Issue 2, 1 April 2010, Pages 373–374,
[link](https://doi.org/10.1093/biostatistics/kxp050)
"""
immutable FisherExactTest <: HypothesisTest
# Format:
# X1 X2
Expand Down Expand Up @@ -59,6 +86,37 @@ function show_params(io::IO, x::FisherExactTest, ident="")
end

# DOC: for tail=:both there exist multiple ``method``s for computing a pvalue and the corresponding ci.
"""
pvalue(x::FisherExactTest; tail = :both, method = :central)
Compute the p-value for a given Fisher exact test.
The one-sided p-values are based on Fisher's non-central hypergeometric distribution
``f_ω(i)`` with odds ratio ``ω``:
```math
\\begin{align*}
p_ω^{(\\text{left})} &=\\sum_{i ≤ a} f_ω(i)\\\\
p_ω^{(\\text{right})} &=\\sum_{i ≥ a} f_ω(i)
\\end{align*}
```
For `tail = :both`, possible values for `method` are:
- `:central` (default): Central interval, i.e. the p-value is two times the minimum of the
one-sided p-values.
- `:minlike`: Minimum likelihood interval, i.e. the p-value is computed by summing all
tables with the same marginals that are equally or less probable:
```math
p_ω = \\sum_{f_ω(i)≤ f_ω(a)} f_ω(i)
```
# References
* Gibbons, J.D., Pratt, J.W., P-values: Interpretation and Methodology, American
Statistican, 29(1):20-25, 1975.
* Fay, M.P., Supplementary material to "Confidence intervals that match Fisher’s exact or
Blaker’s exact tests". Biostatistics, Volume 11, Issue 2, 1 April 2010, Pages 373–374,
[link](https://doi.org/10.1093/biostatistics/kxp050)
"""
function pvalue(x::FisherExactTest; tail=:both, method=:central)
if tail == :both && method != :central
if method == :minlike
Expand Down Expand Up @@ -97,6 +155,25 @@ function pvalue_both_minlike(x::FisherExactTest, ω::Float64=1.0)
end

# confidence interval by inversion of p-value
"""
confint(x::FisherExactTest, alpha::Float64=0.05; tail=:both, method=:central)
Compute a confidence interval with coverage 1 - `alpha`. One-sided intervals are based on
Fisher's non-central hypergeometric distribution. For `tail = :both`, the only
`method` implemented yet is the central interval (`:central`).
!!! note
Since the p-value is not necessarily unimodal, the corresponding confidence region might
not be an interval.
# References
* Gibbons, J.D, Pratt, J.W. P-values: Interpretation and Methodology, American
Statistican, 29(1):20-25, 1975.
* Fay, M.P., Supplementary material to "Confidence intervals that match Fisher’s exact or
Blaker’s exact tests". Biostatistics, Volume 11, Issue 2, 1 April 2010, Pages 373–374,
[link](https://doi.org/10.1093/biostatistics/kxp050)
"""
function StatsBase.confint(x::FisherExactTest, alpha::Float64=0.05; tail=:both, method=:central)
check_alpha(alpha)
dist(ω) = FisherNoncentralHypergeometric(x.a+x.b, x.c+x.d, x.a+x.c, ω)
Expand Down
32 changes: 32 additions & 0 deletions src/kolmogorov_smirnov.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ immutable ExactOneSampleKSTest <: ExactKSTest
δn::Float64 # supremum of the negative CDF differences
end

"""
ExactOneSampleKSTest(x::AbstractVector{<:Real}, d::UnivariateDistribution)
Perform a one-sample exact Kolmogorov–Smirnov test of the null hypothesis that the data in
vector `x` comes from the distribution `d` against the alternative hypothesis that the
sample is not drawn from `d`.
Implements: [`pvalue`](@ref)
"""
function ExactOneSampleKSTest{T<:Real}(x::AbstractVector{T}, d::UnivariateDistribution)
if length(x) > length(unique(x))
warn("This test is inaccurate with ties")
Expand Down Expand Up @@ -89,6 +98,15 @@ immutable ApproximateOneSampleKSTest <: ApproximateKSTest
δn::Float64 # suproemum of the negative CDF differences
end

"""
ApproximateOneSampleKSTest(x::AbstractVector{<:Real}, d::UnivariateDistribution)
Perform an asymptotic one-sample Kolmogorov–Smirnov test of the null hypothesis that the
data in vector `x` comes from the distribution `d` against the alternative hypothesis
that the sample is not drawn from `d`.
Implements: [`pvalue`](@ref)
"""
function ApproximateOneSampleKSTest{T<:Real}(x::AbstractVector{T}, d::UnivariateDistribution)
if length(x) > length(unique(x))
warn("This test is inaccurate with ties")
Expand Down Expand Up @@ -129,6 +147,20 @@ immutable ApproximateTwoSampleKSTest <: ApproximateKSTest
δn::Float64 # suproemum of the negative CDF differences
end

"""
ApproximateTwoSampleKSTest(x::AbstractVector{<:Real}, y::AbstractVector{<:Real})
Perform an asymptotic two-sample Kolmogorov–Smirnov-test of the null hypothesis that `x`
and `y` are drawn from the same distribution against the alternative hypothesis that they
come from different distributions.
Implements: [`pvalue`](@ref)
# External links
* [Approximation of one-sided test (Encyclopedia of Mathematics)
](https://www.encyclopediaofmath.org/index.php/Kolmogorov-Smirnov_test)
"""
function ApproximateTwoSampleKSTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S})
n_x, n_y = length(x), length(y)
if n_x+n_y > length(unique([x; y]))
Expand Down

0 comments on commit 6cc7d93

Please sign in to comment.