Skip to content

Commit

Permalink
Merge pull request #71 from JuliaStats/jmw/ztest
Browse files Browse the repository at this point in the history
Implement z-tests
  • Loading branch information
johnmyleswhite committed Aug 7, 2016
2 parents fbd8220 + a0e6f29 commit 6997e30
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/HypothesisTests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ include("kolmogorov_smirnov.jl")
include("kruskal_wallis.jl")
include("mann_whitney.jl")
include("t.jl")
include("z.jl")
include("wilcoxon.jl")
include("power_divergence.jl")
include("anderson_darling.jl")
Expand Down
134 changes: 134 additions & 0 deletions src/z.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# z.jl
# Various forms of z-tests
#
# Copyright (C) 2016 John Myles White
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

export OneSampleZTest, TwoSampleZTest, EqualVarianceZTest,
UnequalVarianceZTest

abstract ZTest <: HypothesisTest
abstract TwoSampleZTest <: ZTest

pvalue(x::ZTest; tail=:both) = pvalue(Normal(0.0, 1.0), x.z; tail=tail)

# confidence interval by inversion
function ci(x::ZTest, alpha::Float64=0.05; tail=:both)
check_alpha(alpha)

if tail == :left
(-Inf, ci(x, alpha*2)[2])
elseif tail == :right
(ci(x, alpha*2)[1], Inf)
elseif tail == :both
q = cquantile(Normal(0.0, 1.0), alpha/2)
(x.xbar-q*x.stderr, x.xbar+q*x.stderr)
else
throw(ArgumentError("tail=$(tail) is invalid"))
end
end


## ONE SAMPLE Z-TEST

immutable OneSampleZTest <: ZTest
n::Int # number of observations
xbar::Real # estimated mean
stderr::Real # population standard error
z::Real # t-statistic
μ0::Real # mean under h_0
end

testname(::OneSampleZTest) = "One sample z-test"
population_param_of_interest(x::OneSampleZTest) = ("Mean", x.μ0, x.xbar) # parameter of interest: name, value under h0, point estimate

function show_params(io::IO, x::OneSampleZTest, ident="")
println(io, ident, "number of observations: $(x.n)")
println(io, ident, "z-statistic: $(x.z)")
println(io, ident, "population standard error: $(x.stderr)")
end

function OneSampleZTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0)
stderr = stddev/sqrt(n)
z = (xbar-μ0)/stderr
OneSampleZTest(n, xbar, stderr, z, μ0)
end

OneSampleZTest{T<:Real}(v::AbstractVector{T}, μ0::Real=0) = OneSampleZTest(mean(v), std(v), length(v), μ0)

function OneSampleZTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0)
check_same_length(x, y)

OneSampleZTest(x - y, μ0)
end


## TWO SAMPLE Z-TEST (EQUAL VARIANCE)

immutable EqualVarianceZTest <: TwoSampleZTest
n_x::Int # number of observations
n_y::Int # number of observations
xbar::Real # estimated mean difference
stderr::Real # population standard error
z::Real # z-statistic
μ0::Real # mean difference under h_0
end

function show_params(io::IO, x::TwoSampleZTest, ident="")
println(io, ident, "number of observations: [$(x.n_x),$(x.n_y)]")
println(io, ident, "z-statistic: $(x.z)")
println(io, ident, "population standard error: $(x.stderr)")
end

testname(::EqualVarianceZTest) = "Two sample z-test (equal variance)"
population_param_of_interest(x::TwoSampleZTest) = ("Mean difference", x.μ0, x.xbar) # parameter of interest: name, value under h0, point estimate

function EqualVarianceZTest{T<:Real,S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0)
nx, ny = length(x), length(y)
xbar = mean(x) - mean(y)
stddev = sqrt(((nx - 1) * var(x) + (ny - 1) * var(y)) / (nx + ny - 2))
stderr = stddev * sqrt(1/nx + 1/ny)
z = (xbar - μ0) / stderr
EqualVarianceZTest(nx, ny, xbar, stderr, z, μ0)
end


## TWO SAMPLE Z-TEST (UNEQUAL VARIANCE)

immutable UnequalVarianceZTest <: TwoSampleZTest
n_x::Int # number of observations
n_y::Int # number of observations
xbar::Real # estimated mean
stderr::Real # empirical standard error
z::Real # z-statistic
μ0::Real # mean under h_0
end

testname(::UnequalVarianceZTest) = "Two sample z-test (unequal variance)"

function UnequalVarianceZTest{T<:Real,S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0)
nx, ny = length(x), length(y)
xbar = mean(x)-mean(y)
varx, vary = var(x), var(y)
stderr = sqrt(varx/nx + vary/ny)
z = (xbar-μ0)/stderr
UnequalVarianceZTest(nx, ny, xbar, stderr, z, μ0)
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ include("kolmogorov_smirnov.jl")
include("kruskal_wallis.jl")
include("mann_whitney.jl")
include("t.jl")
include("z.jl")
include("wilcoxon.jl")
include("power_divergence.jl")
include("anderson_darling.jl")
Expand Down
97 changes: 97 additions & 0 deletions test/z.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
using HypothesisTests, Base.Test
using Distributions

# This is always the null in our tests.
null = Normal(0.0, 1.0)

## ONE SAMPLE T-TEST

# One sample
x = -5:5
@test pvalue(OneSampleZTest(x)) == 1

x = -5:10
m, s, n = mean(x), std(x), length(x)
se = s / sqrt(n)
z = (m - 0) / se

tst = OneSampleZTest(x)
@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
@test_approx_eq(pvalue(tst; tail=:left), cdf(null, z))
@test_approx_eq(pvalue(tst; tail=:right), ccdf(null, z))
show(IOBuffer(), tst)

tst = OneSampleZTest(m, s, n)
@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
@test_approx_eq(ci(tst)[1], m + quantile(null, 0.05 / 2) * se)
@test_approx_eq(ci(tst)[2], m + cquantile(null, 0.05 / 2) * se)
@test_approx_eq(ci(tst, 0.10)[1], m + quantile(null, 0.10 / 2) * se)
@test_approx_eq(ci(tst, 0.10)[2], m + cquantile(null, 0.10 / 2) * se)
@test_approx_eq(ci(tst; tail=:left)[1], -Inf)
@test_approx_eq(ci(tst; tail=:left)[2], m + cquantile(null, 0.05) * se)
@test_approx_eq(ci(tst; tail=:right)[1], m + quantile(null, 0.05) * se)
@test_approx_eq(ci(tst; tail=:right)[2], Inf)
show(IOBuffer(), tst)

x = -10:5
m, s, n = mean(x), std(x), length(x)
se = s / sqrt(n)
z = (m - 0) / se

tst = OneSampleZTest(x)
@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
@test_approx_eq(pvalue(tst; tail=:left), cdf(null, z))
@test_approx_eq(pvalue(tst; tail=:right), ccdf(null, z))
show(IOBuffer(), tst)

tst = OneSampleZTest(m, s, n)
@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
@test_approx_eq(ci(tst)[1], m + quantile(null, 0.05 / 2) * se)
@test_approx_eq(ci(tst)[2], m + cquantile(null, 0.05 / 2) * se)
@test_approx_eq(ci(tst, 0.10)[1], m + quantile(null, 0.10 / 2) * se)
@test_approx_eq(ci(tst, 0.10)[2], m + cquantile(null, 0.10 / 2) * se)
@test_approx_eq(ci(tst; tail=:left)[1], -Inf)
@test_approx_eq(ci(tst; tail=:left)[2], m + cquantile(null, 0.05) * se)
@test_approx_eq(ci(tst; tail=:right)[1], m + quantile(null, 0.05) * se)
@test_approx_eq(ci(tst; tail=:right)[2], Inf)
show(IOBuffer(), tst)

# Paired samples
x, y = [1, 1, 2, 1, 0], [0, 1, 1, 1, 0]
m, s, n = mean(x - y), std(x - y), length(x - y)
se = s / sqrt(n)
z = (m - 0) / se
tst = OneSampleZTest(x, y)
@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))

## TWO SAMPLE Z-TESTS

a1 = [30.02, 29.99, 30.11, 29.97, 30.01, 29.99]
a2 = [29.89, 29.93, 29.72, 29.98, 30.02, 29.98]

tst = EqualVarianceZTest(a1, a2)
m1, s1sq, n1 = mean(a1), var(a1), length(a1)
m2, s2sq, n2 = mean(a2), var(a2), length(a2)
xbar = (m1 - m2)
avg_var = (n1 - 1) / (n1 + n2 - 2) * s1sq + (n2 - 1) / (n1 + n2 - 2) * s2sq
se = sqrt(avg_var / n1 + avg_var / n2)
z = xbar / se

@test_approx_eq(tst.z, z)
@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
@test_approx_eq(pvalue(tst; tail=:left), cdf(null, z))
@test_approx_eq(pvalue(tst; tail=:right), ccdf(null, z))
@test_approx_eq(ci(tst)[1], xbar + quantile(null, 0.05 / 2) * se)
@test_approx_eq(ci(tst)[2], xbar + cquantile(null, 0.05 / 2) * se)
show(IOBuffer(), tst)

tst = UnequalVarianceZTest(a1, a2)
se = sqrt(s1sq / n1 + s2sq / n2)
z = xbar / se
@test_approx_eq(tst.z, z)
@test_approx_eq(pvalue(tst), 2 * min(cdf(null, z), ccdf(null, z)))
@test_approx_eq(pvalue(tst; tail=:left), cdf(null, z))
@test_approx_eq(pvalue(tst; tail=:right), ccdf(null, z))
@test_approx_eq(ci(tst)[1], xbar + quantile(null, 0.05 / 2) * se)
@test_approx_eq(ci(tst)[2], xbar + cquantile(null, 0.05 / 2) * se)
show(IOBuffer(), tst)

0 comments on commit 6997e30

Please sign in to comment.