Merge pull request #71 from JuliaStats/jmw/ztest

Implement z-tests
JuliaStats · Aug 7, 2016 · 6997e30 · 6997e30
2 parents fbd8220 + a0e6f29
commit 6997e30
Show file tree

Hide file tree

Showing 4 changed files with 233 additions and 0 deletions.
diff --git a/src/HypothesisTests.jl b/src/HypothesisTests.jl
@@ -135,6 +135,7 @@ include("kolmogorov_smirnov.jl")
 include("kruskal_wallis.jl")
 include("mann_whitney.jl")
 include("t.jl")
+include("z.jl")
 include("wilcoxon.jl")
 include("power_divergence.jl")
 include("anderson_darling.jl")

diff --git a/src/z.jl b/src/z.jl
@@ -0,0 +1,134 @@
+# z.jl
+# Various forms of z-tests
+#
+# Copyright (C) 2016   John Myles White
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+export OneSampleZTest, TwoSampleZTest, EqualVarianceZTest,
+    UnequalVarianceZTest
+
+abstract ZTest <: HypothesisTest
+abstract TwoSampleZTest <: ZTest
+
+pvalue(x::ZTest; tail=:both) = pvalue(Normal(0.0, 1.0), x.z; tail=tail)
+
+# confidence interval by inversion
+function ci(x::ZTest, alpha::Float64=0.05; tail=:both)
+    check_alpha(alpha)
+
+    if tail == :left
+        (-Inf, ci(x, alpha*2)[2])
+    elseif tail == :right
+        (ci(x, alpha*2)[1], Inf)
+    elseif tail == :both
+        q = cquantile(Normal(0.0, 1.0), alpha/2)
+        (x.xbar-q*x.stderr, x.xbar+q*x.stderr)
+    else
+        throw(ArgumentError("tail=$(tail) is invalid"))
+    end
+end
+
+
+## ONE SAMPLE Z-TEST
+
+immutable OneSampleZTest <: ZTest
+    n::Int       # number of observations
+    xbar::Real   # estimated mean
+    stderr::Real # population standard error
+    z::Real      # t-statistic
+    μ0::Real     # mean under h_0
+end
+
+testname(::OneSampleZTest) = "One sample z-test"
+population_param_of_interest(x::OneSampleZTest) = ("Mean", x.μ0, x.xbar) # parameter of interest: name, value under h0, point estimate
+
+function show_params(io::IO, x::OneSampleZTest, ident="")
+    println(io, ident, "number of observations:   $(x.n)")
+    println(io, ident, "z-statistic:              $(x.z)")
+    println(io, ident, "population standard error: $(x.stderr)")
+end
+
+function OneSampleZTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0)
+    stderr = stddev/sqrt(n)
+    z = (xbar-μ0)/stderr
+    OneSampleZTest(n, xbar, stderr, z, μ0)
+end
+
+OneSampleZTest{T<:Real}(v::AbstractVector{T}, μ0::Real=0) = OneSampleZTest(mean(v), std(v), length(v), μ0)
+
+function OneSampleZTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0)
+    check_same_length(x, y)
+
+    OneSampleZTest(x - y, μ0)
+end
+
+
+## TWO SAMPLE Z-TEST (EQUAL VARIANCE)
+
+immutable EqualVarianceZTest <: TwoSampleZTest
+    n_x::Int     # number of observations
+    n_y::Int     # number of observations
+    xbar::Real   # estimated mean difference
+    stderr::Real # population standard error
+    z::Real      # z-statistic
+    μ0::Real     # mean difference under h_0
+end
+
+function show_params(io::IO, x::TwoSampleZTest, ident="")
+    println(io, ident, "number of observations:   [$(x.n_x),$(x.n_y)]")
+    println(io, ident, "z-statistic:              $(x.z)")
+    println(io, ident, "population standard error: $(x.stderr)")
+end
+
+testname(::EqualVarianceZTest) = "Two sample z-test (equal variance)"
+population_param_of_interest(x::TwoSampleZTest) = ("Mean difference", x.μ0, x.xbar) # parameter of interest: name, value under h0, point estimate
+
+function EqualVarianceZTest{T<:Real,S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0)
+    nx, ny = length(x), length(y)
+    xbar = mean(x) - mean(y)
+    stddev = sqrt(((nx - 1) * var(x) + (ny - 1) * var(y)) / (nx + ny - 2))
+    stderr = stddev * sqrt(1/nx + 1/ny)
+    z = (xbar - μ0) / stderr
+    EqualVarianceZTest(nx, ny, xbar, stderr, z, μ0)
+end
+
+
+## TWO SAMPLE Z-TEST (UNEQUAL VARIANCE)
+
+immutable UnequalVarianceZTest <: TwoSampleZTest
+    n_x::Int     # number of observations
+    n_y::Int     # number of observations
+    xbar::Real   # estimated mean
+    stderr::Real # empirical standard error
+    z::Real      # z-statistic
+    μ0::Real     # mean under h_0
+end
+
+testname(::UnequalVarianceZTest) = "Two sample z-test (unequal variance)"
+
+function UnequalVarianceZTest{T<:Real,S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0)
+    nx, ny = length(x), length(y)
+    xbar = mean(x)-mean(y)
+    varx, vary = var(x), var(y)
+    stderr = sqrt(varx/nx + vary/ny)
+    z = (xbar-μ0)/stderr
+    UnequalVarianceZTest(nx, ny, xbar, stderr, z, μ0)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -17,6 +17,7 @@ include("kolmogorov_smirnov.jl")
 include("kruskal_wallis.jl")
 include("mann_whitney.jl")
 include("t.jl")
+include("z.jl")
 include("wilcoxon.jl")
 include("power_divergence.jl")
 include("anderson_darling.jl")

diff --git a/test/z.jl b/test/z.jl
@@ -0,0 +1,97 @@
+using HypothesisTests, Base.Test
+using Distributions
+
+# This is always the null in our tests.
+null = Normal(0.0, 1.0)
+
+## ONE SAMPLE T-TEST
+
+# One sample
+x = -5:5
+@test pvalue(OneSampleZTest(x)) == 1
+
+x = -5:10
+m, s, n = mean(x), std(x), length(x)
+se = s / sqrt(n)
+z = (m - 0) / se
+
+tst = OneSampleZTest(x)
+@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
+@test_approx_eq(pvalue(tst; tail=:left), cdf(null, z))
+@test_approx_eq(pvalue(tst; tail=:right), ccdf(null, z))
+show(IOBuffer(), tst)
+
+tst = OneSampleZTest(m, s, n)
+@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
+@test_approx_eq(ci(tst)[1], m + quantile(null, 0.05 / 2) * se)
+@test_approx_eq(ci(tst)[2], m + cquantile(null, 0.05 / 2) * se)
+@test_approx_eq(ci(tst, 0.10)[1], m + quantile(null, 0.10 / 2) * se)
+@test_approx_eq(ci(tst, 0.10)[2], m + cquantile(null, 0.10 / 2) * se)
+@test_approx_eq(ci(tst; tail=:left)[1], -Inf)
+@test_approx_eq(ci(tst; tail=:left)[2], m + cquantile(null, 0.05) * se)
+@test_approx_eq(ci(tst; tail=:right)[1], m + quantile(null, 0.05) * se)
+@test_approx_eq(ci(tst; tail=:right)[2], Inf)
+show(IOBuffer(), tst)
+
+x = -10:5
+m, s, n = mean(x), std(x), length(x)
+se = s / sqrt(n)
+z = (m - 0) / se
+
+tst = OneSampleZTest(x)
+@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
+@test_approx_eq(pvalue(tst; tail=:left), cdf(null, z))
+@test_approx_eq(pvalue(tst; tail=:right), ccdf(null, z))
+show(IOBuffer(), tst)
+
+tst = OneSampleZTest(m, s, n)
+@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
+@test_approx_eq(ci(tst)[1], m + quantile(null, 0.05 / 2) * se)
+@test_approx_eq(ci(tst)[2], m + cquantile(null, 0.05 / 2) * se)
+@test_approx_eq(ci(tst, 0.10)[1], m + quantile(null, 0.10 / 2) * se)
+@test_approx_eq(ci(tst, 0.10)[2], m + cquantile(null, 0.10 / 2) * se)
+@test_approx_eq(ci(tst; tail=:left)[1], -Inf)
+@test_approx_eq(ci(tst; tail=:left)[2], m + cquantile(null, 0.05) * se)
+@test_approx_eq(ci(tst; tail=:right)[1], m + quantile(null, 0.05) * se)
+@test_approx_eq(ci(tst; tail=:right)[2], Inf)
+show(IOBuffer(), tst)
+
+# Paired samples
+x, y = [1, 1, 2, 1, 0], [0, 1, 1, 1, 0]
+m, s, n = mean(x - y), std(x - y), length(x - y)
+se = s / sqrt(n)
+z = (m - 0) / se
+tst = OneSampleZTest(x, y)
+@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
+
+## TWO SAMPLE Z-TESTS
+
+a1 = [30.02, 29.99, 30.11, 29.97, 30.01, 29.99]
+a2 = [29.89, 29.93, 29.72, 29.98, 30.02, 29.98]
+
+tst = EqualVarianceZTest(a1, a2)
+m1, s1sq, n1 = mean(a1), var(a1), length(a1)
+m2, s2sq, n2 = mean(a2), var(a2), length(a2)
+xbar = (m1 - m2)
+avg_var = (n1 - 1) / (n1 + n2 - 2) * s1sq + (n2 - 1) / (n1 + n2 - 2) * s2sq
+se = sqrt(avg_var / n1 + avg_var / n2)
+z = xbar / se
+
+@test_approx_eq(tst.z, z)
+@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
+@test_approx_eq(pvalue(tst; tail=:left), cdf(null, z))
+@test_approx_eq(pvalue(tst; tail=:right), ccdf(null, z))
+@test_approx_eq(ci(tst)[1], xbar + quantile(null, 0.05 / 2) * se)
+@test_approx_eq(ci(tst)[2], xbar + cquantile(null, 0.05 / 2) * se)
+show(IOBuffer(), tst)
+
+tst = UnequalVarianceZTest(a1, a2)
+se = sqrt(s1sq / n1 + s2sq / n2)
+z = xbar / se
+@test_approx_eq(tst.z, z)
+@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
+@test_approx_eq(pvalue(tst; tail=:left), cdf(null, z))
+@test_approx_eq(pvalue(tst; tail=:right), ccdf(null, z))
+@test_approx_eq(ci(tst)[1], xbar + quantile(null, 0.05 / 2) * se)
+@test_approx_eq(ci(tst)[2], xbar + cquantile(null, 0.05 / 2) * se)
+show(IOBuffer(), tst)