Allow for one-sided testing in printing of pvalue (#93)

* Allow for one-sided testing in printing of pvalue * Allow for one-sided testing in printing of pvalue (Simon's approach) * Add tests for p-value printing
JuliaStats · Apr 14, 2017 · e764c02 · e764c02
1 parent 7e5c2a1
commit e764c02
Show file tree

Hide file tree

Showing 27 changed files with 78 additions and 7 deletions.
diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl
@@ -88,9 +88,16 @@ function Base.show{T<:HypothesisTest}(io::IO, test::T)
     # test summary
     p = pvalue(test)
     outcome = if p > 0.05 "fail to reject" else "reject" end
+    tail = default_tail(test)
     println(io, "Test summary:")
     println(io, "    outcome with 95% confidence: $outcome h_0")
-    println(io, "    two-sided p-value:           $p")
+    if tail == :both
+        println(io, "    two-sided p-value:           $p")
+    elseif tail == :left || tail == :right
+        println(io, "    one-sided p-value:           $p")
+    else
+        println(io, "    p-value:                     $p")
+    end
     println(io)
 
     # further details
@@ -101,6 +108,9 @@ end
 # parameter of interest: name, value under h0, point estimate
 population_param_of_interest{T<:HypothesisTest}(test::T) = ("not implemented yet", NaN, NaN)
 
+# is the test one- or two-sided
+default_tail(test::HypothesisTest) = :undefined
+
 function show_params{T<:HypothesisTest}(io::IO, test::T, ident="")
     fieldidx = find(Bool[t<:Number for t in T.types])
     if !isempty(fieldidx)

diff --git a/src/anderson_darling.jl b/src/anderson_darling.jl
@@ -35,6 +35,7 @@ function OneSampleADTest{T<:Real}(x::AbstractVector{T}, d::UnivariateDistributio
 end
 
 testname(::OneSampleADTest) = "One sample Anderson-Darling test"
+default_tail(test::OneSampleADTest) = :right
 
 function show_params(io::IO, x::OneSampleADTest, ident="")
     println(io, ident, "number of observations:   $(x.n)")
@@ -77,6 +78,7 @@ function KSampleADTest{T<:Real}(xs::AbstractVector{T}...; modified=true)
 end
 
 testname(::KSampleADTest) = "k-sample Anderson-Darling test"
+default_tail(test::KSampleADTest) = :right
 
 function show_params(io::IO, x::KSampleADTest, ident="")
     println(io, ident, "number of samples:        $(x.k)")

diff --git a/src/binomial.jl b/src/binomial.jl
@@ -46,6 +46,7 @@ Returns the string value. E.g. "Binomial test", "Sign Test"
 """
 testname(::BinomialTest) = "Binomial test"
 population_param_of_interest(x::BinomialTest) = ("Probability of success", x.p, x.x/x.n) # parameter of interest: name, value under h0, point estimate
+default_tail(test::BinomialTest) = :both
 
 function show_params(io::IO, x::BinomialTest, ident="")
     println(io, ident, "number of observations: $(x.n)")
@@ -156,6 +157,7 @@ SignTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}) = SignTes
 
 testname(::SignTest) = "Sign Test"
 population_param_of_interest(x::SignTest) = ("Median", x.median, median(x.data)) # parameter of interest: name, value under h0, point estimate
+default_tail(test::SignTest) = :both
 
 function show_params(io::IO, x::SignTest, ident="")
     text1 = "number of observations:"

diff --git a/src/box_test.jl b/src/box_test.jl
@@ -62,6 +62,7 @@ end
 testname(::BoxPierceTest) = "Box-Pierce autocorrelation test"
 population_param_of_interest(x::BoxPierceTest) = ("autocorrelations up to lag k",
     "all zero", NaN)
+default_tail(test::BoxPierceTest) = :right
 
 function show_params(io::IO, x::BoxPierceTest, ident)
     println(io, ident, "number of observations:         ", x.n)
@@ -110,6 +111,7 @@ end
 testname(::LjungBoxTest) = "Ljung-Box autocorrelation test"
 population_param_of_interest(x::LjungBoxTest) = ("autocorrelations up to lag k",
     "all zero", NaN)
+default_tail(test::LjungBoxTest) = :right
 
 function show_params(io::IO, x::LjungBoxTest, ident)
     println(io, ident, "number of observations:         ", x.n)

diff --git a/src/breusch_godfrey.jl b/src/breusch_godfrey.jl
@@ -68,6 +68,7 @@ end
 testname(::BreuschGodfreyTest) = "Breusch-Godfrey autocorrelation test"
 population_param_of_interest(x::BreuschGodfreyTest) =
     ("coefficients on lagged residuals up to lag p", "all zero", NaN)
+default_tail(test::BreuschGodfreyTest) = :right
 
 function show_params(io::IO, x::BreuschGodfreyTest, ident)
     println(io, ident, "number of observations:         ", x.n)

diff --git a/src/circular.jl b/src/circular.jl
@@ -53,6 +53,7 @@ end
 
 testname(::RayleighTest) = "Rayleigh test"
 population_param_of_interest(x::RayleighTest) = ("Mean resultant length", 0, x.Rbar) # parameter of interest: name, value under h0, point estimate
+default_tail(test::RayleighTest) = :both
 
 function show_params(io::IO, x::RayleighTest, ident="")
     println(io, ident, "number of observations: $(x.n)")
@@ -98,6 +99,8 @@ FisherTLinearAssociation{S <: Real, T <: Real}(theta::Vector{S},
 testname(::FisherTLinearAssociation) =
     "T-linear test of circular-circular association"
 population_param_of_interest(x::FisherTLinearAssociation) = ("Circular correlation coefficient", 0, x.rho_t) # parameter of interest: name, value under h0, point estimate
+default_tail(test::FisherTLinearAssociation) = :both
+
 function show_params(io::IO, x::FisherTLinearAssociation, ident="")
     println(io, ident, "number of observations: [$(length(x.theta)),$(length(x.phi))]")
 end
@@ -209,6 +212,8 @@ end
 
 testname(::JammalamadakaCircularCorrelation) = "Jammalamadaka circular correlation"
 population_param_of_interest(x::JammalamadakaCircularCorrelation) = ("Circular-circular correlation coefficient", 0, x.r) # parameter of interest: name, value under h0, point estimate
+default_tail(test::JammalamadakaCircularCorrelation) = :both
+
 function show_params(io::IO, x::JammalamadakaCircularCorrelation, ident="")
     println(io, ident, "test statistic: $(x.Z)")
 end

diff --git a/src/fisher.jl b/src/fisher.jl
@@ -45,6 +45,7 @@ end
 
 testname(::FisherExactTest) = "Fisher's exact test"
 population_param_of_interest(x::FisherExactTest) = ("Odds ratio", 1.0, x.ω) # parameter of interest: name, value under h0, point estimate
+default_tail(test::FisherExactTest) = :both
 
 # The sizing argument to print_matrix was removed during the 0.5 dev period
 if VERSION < v"0.5.0-dev+1936"

diff --git a/src/kolmogorov_smirnov.jl b/src/kolmogorov_smirnov.jl
@@ -31,6 +31,7 @@ export
 @compat abstract type ExactKSTest <: KSTest end
 
 population_param_of_interest(x::KSTest) = ("Supremum of CDF differences", 0.0, x.δ) # parameter of interest: name, value under h0, point estimate
+default_tail(test::KSTest) = :both
 
 ## ONE SAMPLE KS-TEST
 

diff --git a/src/kruskal_wallis.jl b/src/kruskal_wallis.jl
@@ -32,7 +32,7 @@ immutable KruskalWallisTest <: HypothesisTest
     tie_adjustment::Float64  # adjustment for ties
 end
 
-function KruskalWallisTest{T<:Real}(groups::AbstractVector{T}...) 
+function KruskalWallisTest{T<:Real}(groups::AbstractVector{T}...)
     (H, R_i, tieadj, n_i) = kwstats(groups...)
     if length(groups)<=3 && any(n_i .< 6)
         warn("This test is only asymptotically correct and might be inaccurate for the given group size")
@@ -43,6 +43,7 @@ end
 
 testname(::KruskalWallisTest) = "Kruskal-Wallis rank sum test (chi-square approximation)"
 population_param_of_interest(x::KruskalWallisTest) = ("Location parameters", "all equal", NaN) # parameter of interest: name, value under h0, point estimate
+default_tail(test::KruskalWallisTest) = :right
 
 function show_params(io::IO, x::KruskalWallisTest, ident)
     println(io, ident, "number of observation in each group: ", x.n_i)
@@ -75,7 +76,7 @@ function kwstats{T<:Real}(groups::AbstractVector{T}...)
     end
 
     # compute test statistic and correct for ties
-    H = 12 * sum(R_i.^2./n_i) / (n * (n + 1)) - 3 * (n + 1) 
+    H = 12 * sum(R_i.^2./n_i) / (n * (n + 1)) - 3 * (n + 1)
     H /= C
 
     (H, R_i, C, n_i)

diff --git a/src/mann_whitney.jl b/src/mann_whitney.jl
@@ -66,6 +66,7 @@ ExactMannWhitneyUTest{S<:Real,T<:Real}(x::AbstractVector{S}, y::AbstractVector{T
 
 testname(::ExactMannWhitneyUTest) = "Exact Mann-Whitney U test"
 population_param_of_interest(x::ExactMannWhitneyUTest) = ("Location parameter (pseudomedian)", 0, x.median) # parameter of interest: name, value under h0, point estimate
+default_tail(test::ExactMannWhitneyUTest) = :both
 
 function show_params(io::IO, x::ExactMannWhitneyUTest, ident)
     println(io, ident, "number of observations in each group: ", [x.nx, x.ny])
@@ -96,7 +97,7 @@ function mwuenumerate(x::ExactMannWhitneyUTest)
     (le/tot, gr/tot)
 end
 
-function pvalue(x::ExactMannWhitneyUTest; tail=:both) 
+function pvalue(x::ExactMannWhitneyUTest; tail=:both)
     if x.tie_adjustment == 0
         # Compute exact p-value using method from Rmath, which is fast but
         # cannot account for ties
@@ -152,6 +153,7 @@ ApproximateMannWhitneyUTest{S<:Real,T<:Real}(x::AbstractVector{S}, y::AbstractVe
 
 testname(::ApproximateMannWhitneyUTest) = "Approximate Mann-Whitney U test"
 population_param_of_interest(x::ApproximateMannWhitneyUTest) = ("Location parameter (pseudomedian)", 0, x.median) # parameter of interest: name, value under h0, point estimate
+default_tail(test::ApproximateMannWhitneyUTest) = :both
 
 function show_params(io::IO, x::ApproximateMannWhitneyUTest, ident)
     println(io, ident, "number of observations in each group: ", [x.nx, x.ny])
@@ -161,7 +163,7 @@ function show_params(io::IO, x::ApproximateMannWhitneyUTest, ident)
     println(io, ident, "normal approximation (μ, σ):          ", (x.mu, x.sigma))
 end
 
-function pvalue(x::ApproximateMannWhitneyUTest; tail=:both) 
+function pvalue(x::ApproximateMannWhitneyUTest; tail=:both)
     if x.mu == x.sigma == 0
         1
     else

diff --git a/src/power_divergence.jl b/src/power_divergence.jl
@@ -40,6 +40,7 @@ end
 
 # parameter of interest: name, value under h0, point estimate
 population_param_of_interest(x::PowerDivergenceTest) = ("Multinomial Probabilities", x.theta0, x.thetahat)
+default_tail(test::PowerDivergenceTest) = :right
 
 pvalue(x::PowerDivergenceTest; tail=:right) = pvalue(Chisq(x.df),x.stat; tail=tail)
 

diff --git a/src/t.jl b/src/t.jl
@@ -30,6 +30,8 @@ export OneSampleTTest, TwoSampleTTest, EqualVarianceTTest,
 
 pvalue(x::TTest; tail=:both) = pvalue(TDist(x.df), x.t; tail=tail)
 
+default_tail(test::TTest) = :both
+
 # confidence interval by inversion
 function StatsBase.confint(x::TTest, alpha::Float64=0.05; tail=:both)
     check_alpha(alpha)

diff --git a/src/wilcoxon.jl b/src/wilcoxon.jl
@@ -69,6 +69,7 @@ ExactSignedRankTest{S<:Real,T<:Real}(x::AbstractVector{S}, y::AbstractVector{T})
 
 testname(::ExactSignedRankTest) = "Exact Wilcoxon signed rank test"
 population_param_of_interest(x::ExactSignedRankTest) = ("Location parameter (pseudomedian)", 0, x.median) # parameter of interest: name, value under h0, point estimate
+default_tail(test::ExactSignedRankTest) = :both
 
 function show_params(io::IO, x::ExactSignedRankTest, ident)
     println(io, ident, "number of observations:      ", x.n)
@@ -159,6 +160,7 @@ ApproximateSignedRankTest{S<:Real,T<:Real}(x::AbstractVector{S}, y::AbstractVect
 
 testname(::ApproximateSignedRankTest) = "Approximate Wilcoxon signed rank test"
 population_param_of_interest(x::ApproximateSignedRankTest) = ("Location parameter (pseudomedian)", 0, x.median) # parameter of interest: name, value under h0, point estimate
+default_tail(test::ApproximateSignedRankTest) = :both
 
 function show_params(io::IO, x::ApproximateSignedRankTest, ident)
     println(io, ident, "number of observations:      ", x.n)

diff --git a/src/z.jl b/src/z.jl
@@ -30,6 +30,8 @@ export OneSampleZTest, TwoSampleZTest, EqualVarianceZTest,
 
 pvalue(x::ZTest; tail=:both) = pvalue(Normal(0.0, 1.0), x.z; tail=tail)
 
+default_tail(test::ZTest) = :both
+
 # confidence interval by inversion
 function StatsBase.confint(x::ZTest, alpha::Float64=0.05; tail=:both)
     check_alpha(alpha)

diff --git a/test/anderson_darling.jl b/test/anderson_darling.jl
@@ -1,4 +1,5 @@
 using HypothesisTests, Distributions, Base.Test
+using HypothesisTests: default_tail
 
 # One sample test
 n = 1000
@@ -8,6 +9,7 @@ x = rand(Normal(), n)
 t = OneSampleADTest(x, Normal())
 @test isapprox(t.A², 0.2013, atol=0.1^4)
 @test isapprox(pvalue(t), 0.8811, atol=0.1^4)
+@test default_tail(t) == :right
 
 x = rand(DoubleExponential(), n)
 t = OneSampleADTest(x, Normal())
@@ -34,6 +36,7 @@ t = KSampleADTest(samples...)
 @test isapprox(t.A²k, 8.3926, atol=0.1^4)
 @test isapprox(t.σ, 1.2038, atol=0.1^4)
 @test isapprox(pvalue(t), 0.0020, atol=0.1^4)
+@test default_tail(t) == :right
 
 t = KSampleADTest(samples..., modified = false)
 @test isapprox(t.A²k, 8.3559, atol=0.1^4)

diff --git a/test/binomial.jl b/test/binomial.jl
@@ -1,9 +1,11 @@
 using HypothesisTests, Base.Test
+using HypothesisTests: default_tail
 
 t = BinomialTest(26, 78)
 @test pvalue(t) ≈ 0.004334880883507431
 @test pvalue(t, tail=:left) ≈ 0.002167440441753716
 @test pvalue(t, tail=:right) ≈ 0.9989844298129187
+@test default_tail(t) == :both
 @test_ci_approx confint(t) (0.23058523962930383, 0.4491666887959782)
 @test_ci_approx confint(t, tail=:left) (0.0, 0.4313047758370174)
 @test_ci_approx confint(t, tail=:right) (0.2451709633730693, 1.0)
@@ -56,6 +58,7 @@ x = [55, 58, 61, 61, 62, 62, 62, 63, 63, 64, 66, 68, 68, 69, 69, 69, 70, 71, 72,
 @test pvalue(SignTest(x, 70)) ≈ 0.004425048828125003
 @test pvalue(SignTest(x, 70), tail=:left) ≈ 0.0022125244140625013
 @test pvalue(SignTest(x, 70), tail=:right) ≈ 0.9996356964111328
+@test default_tail(SignTest(x)) == :both
 @test_ci_approx confint(SignTest(x, 70)) (62, 69)
 @test_ci_approx confint(SignTest(x, 70), 0.0002) (61, 71)
 show(IOBuffer(), SignTest(x, 70))

diff --git a/test/box_test.jl b/test/box_test.jl
@@ -1,4 +1,5 @@
 using HypothesisTests, Base.Test
+using HypothesisTests: default_tail
 
 sim_data_h0=[
     0.297287984535462;0.382395967790608;-0.597634476728231;-0.0104452446373756;
@@ -35,6 +36,7 @@ t = HypothesisTests.BoxPierceTest(sim_data_h0,2,1)
 @test t.dof == 1
 @test t.Q ≈ 1.233942980734545
 @test pvalue(t) ≈ 0.2666415904008932
+@test default_tail(t) == :right
 show(IOBuffer(), t)
 
 t = HypothesisTests.LjungBoxTest(sim_data_h0,5,2)
@@ -44,6 +46,7 @@ t = HypothesisTests.LjungBoxTest(sim_data_h0,5,2)
 @test t.dof == 2
 @test t.Q ≈ 3.2090126519163626
 @test pvalue(t) ≈ 0.36050846449240337
+@test default_tail(t) == :right
 show(IOBuffer(), t)
 
 sim_data_h1 = [

diff --git a/test/breusch_godfrey.jl b/test/breusch_godfrey.jl
@@ -1,4 +1,5 @@
 using HypothesisTests, Base.Test
+using HypothesisTests: default_tail
 
 # data simulated under H_1
 data_h1 = [
@@ -113,6 +114,7 @@ t = BreuschGodfreyTest(data_h1[:,2:end],res_vec,4)
 @test t.lag == 4
 @test t.BG ≈ 31.39810637185552
 @test pvalue(t) ≈ 2.5390992557054064e-6
+@test default_tail(t) == :right
 show(IOBuffer(), t)
 
 t = BreuschGodfreyTest(data_h1[:,2:end],res_vec,2,false)

diff --git a/test/circular.jl b/test/circular.jl
@@ -1,4 +1,5 @@
 using HypothesisTests, Base.Test
+using HypothesisTests: default_tail
 
 # Fisher, 1995 example 4.11
 @test abs(pvalue(RayleighTest(0.2370, 60)) - 0.034) <= 0.001
@@ -12,6 +13,7 @@ t = RayleighTest(
     285, 292, 305, 315, 325, 328, 329, 343, 354, 359]
     *pi/180)
 @test abs(pvalue(t) - 0.20) <= 0.01
+@test default_tail(t) == :both
 show(IOBuffer(), t)
 
 # Fisher, 1995 example 6.8
@@ -26,10 +28,12 @@ wind_direction_12pm =
 t = FisherTLinearAssociation(wind_direction_6am, wind_direction_12pm)
 @test abs(t.rho_t- 0.191) < 0.001
 @test abs(pvalue(t) - 0.01) < 0.01
+@test default_tail(t) == :both
 show(IOBuffer(), t)
 
 # Jammaladak, 2001 example 8.1
 t = JammalamadakaCircularCorrelation(wind_direction_6am, wind_direction_12pm)
 @test abs(t.r - 0.2704648) < 1e-7
 @test abs(pvalue(t) - 0.2247383) < 1e-7
+@test default_tail(t) == :both
 show(IOBuffer(), t)
diff --git a/test/fisher.jl b/test/fisher.jl
@@ -1,11 +1,13 @@
 using HypothesisTests, Base.Test
+using HypothesisTests: default_tail
 
 t = HypothesisTests.FisherExactTest(1, 1, 1, 1)
 @test t.ω ≈ 1.0
 @test pvalue(t; tail=:left) ≈ 0.8333333333333337
 @test pvalue(t; tail=:right) ≈ 0.8333333333333337
 @test pvalue(t; method=:central) ≈ 1.0
 @test pvalue(t; method=:minlike) ≈ 1.0
+@test default_tail(t) == :both
 @test_ci_approx confint(t; tail=:left) (0.0, 76.24918299781056)
 @test_ci_approx confint(t; tail=:right) (0.013114894621608135, Inf)
 @test_ci_approx confint(t; method=:central) (0.006400016357911029, 156.2496006379585)

diff --git a/test/kolmogorov_smirnov.jl b/test/kolmogorov_smirnov.jl
@@ -14,6 +14,7 @@ t = ApproximateOneSampleKSTest(x, Uniform())
 @test pvalue(t) ≈ 0.6777349664784745
 @test pvalue(t; tail=:left) ≈ 0.849573771973747
 @test pvalue(t; tail=:right) ≈ 0.3545875485608989
+@test default_tail(t) == :both
 show(IOBuffer(), t)
 
 t = ApproximateTwoSampleKSTest(x, [(0:24)/25...])
@@ -23,6 +24,7 @@ t = ApproximateTwoSampleKSTest(x, [(0:24)/25...])
 @test pvalue(t) ≈ 0.993764859699076
 @test pvalue(t; tail=:left) ≈ 0.8521437889662113
 @test pvalue(t; tail=:right) ≈ 0.697676326071031
+@test default_tail(t) == :both
 show(IOBuffer(), t)
 
 t = ExactOneSampleKSTest(x, Uniform())
@@ -32,6 +34,7 @@ t = ExactOneSampleKSTest(x, Uniform())
 @test pvalue(t) ≈ 0.6263437768244742
 @test pvalue(t; tail=:left) ≈ 0.8195705417998183
 @test pvalue(t; tail=:right) ≈ 0.32350648882777194
+@test default_tail(t) == :both
 show(IOBuffer(), t)
 
 ## check fit to normal distribution

diff --git a/test/kruskal_wallis.jl b/test/kruskal_wallis.jl
@@ -1,4 +1,5 @@
 using HypothesisTests, Base.Test
+using HypothesisTests: default_tail
 
 # www.uni-siegen.de/phil/sozialwissenschaften/soziologie/mitarbeiter/ludwig-mayerhofer/statistik/statistik_downloads/statistik_ii_7.pdf
 u5 = [620, 5350, 7220]
@@ -13,6 +14,7 @@ t = HypothesisTests.KruskalWallisTest(u5, u250, u2500, more)
 @test t.H ≈ 1.5803174603174597
 @test t.tie_adjustment == 1
 @test pvalue(t) ≈ 0.6638608922384397
+@test default_tail(t) == :right
 show(IOBuffer(), t)
 
 # http://www.brightstat.com/index.php?option=com_content&task=view&id=41&Itemid=1&limit=1&limitstart=2