Skip to content

Commit

Permalink
Implement single-model ftest (#424)
Browse files Browse the repository at this point in the history
  • Loading branch information
OndrejSlamecka authored Nov 2, 2021
1 parent fa8c6e3 commit 4e72e76
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "GLM"
uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
version = "1.5.1"
version = "1.6.0"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Expand Down
3 changes: 2 additions & 1 deletion src/GLM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ module GLM
fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue
import StatsFuns: xlogy
import SpecialFunctions: erfc, erfcinv, digamma, trigamma
import StatsModels: hasintercept
export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr²,
cooksdistance
cooksdistance, hasintercept

export
# types
Expand Down
47 changes: 44 additions & 3 deletions src/ftest.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
struct SingleFTestResult
nobs::Int
dof::Int
fstat::Float64
pval::Float64
end

mutable struct FTestResult{N}
nobs::Int
ssr::NTuple{N, Float64}
Expand Down Expand Up @@ -28,6 +35,37 @@ _diffn(t::NTuple{N, T}) where {N, T} = ntuple(i->t[i]-t[i+1], N-1)

_diff(t::NTuple{N, T}) where {N, T} = ntuple(i->t[i+1]-t[i], N-1)

"""
ftest(mod::LinearModel)
Perform an F-test to determine whether model `mod` fits significantly better
than the null model (i.e. which includes only the intercept).
```jldoctest
julia> dat = DataFrame(Result=[1.1, 1.2, 1, 2.2, 1.9, 2, 0.9, 1, 1, 2.2, 2, 2],
Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2]);
julia> model = lm(@formula(Result ~ 1 + Treatment), dat);
julia> ftest(model.model)
F-test against the null model:
F-statistic: 241.62 on 12 observations and 1 degrees of freedom, p-value: <1e-07
```
"""
function ftest(mod::LinearModel)
hasintercept(mod) || throw(ArgumentError("ftest only works for models with an intercept"))

rss = deviance(mod)
tss = nulldeviance(mod)

n = Int(nobs(mod))
p = dof(mod) - 2 # -2 for intercept and dispersion parameter
fstat = ((tss - rss) / rss) * ((n - p - 1) / p)
fdist = FDist(p, dof_residual(mod))

SingleFTestResult(n, p, promote(fstat, ccdf(fdist, abs(fstat)))...)
end

"""
ftest(mod::LinearModel...; atol::Real=0.0)
Expand Down Expand Up @@ -85,9 +123,6 @@ F-test: 3 models fitted on 12 observations
```
"""
function ftest(mods::LinearModel...; atol::Real=0.0)
if length(mods) < 2
Base.depwarn("Passing less than two models to ftest is deprecated", :ftest)
end
if !all(==(nobs(mods[1])), nobs.(mods))
throw(ArgumentError("F test is only valid for models fitted on the same data, " *
"but number of observations differ"))
Expand Down Expand Up @@ -127,6 +162,12 @@ function ftest(mods::LinearModel...; atol::Real=0.0)
return FTestResult(Int(nobs(mods[1])), SSR, df, r2.(mods), fstat, pval)
end

function show(io::IO, ftr::SingleFTestResult)
print(io, "F-test against the null model:\nF-statistic: ", StatsBase.TestStat(ftr.fstat), " ")
print(io, "on ", ftr.nobs, " observations and ", ftr.dof, " degrees of freedom, ")
print(io, "p-value: ", PValue(ftr.pval))
end

function show(io::IO, ftr::FTestResult{N}) where N
Δdof = _diff(ftr.dof)
Δssr = _diff(ftr.ssr)
Expand Down
2 changes: 2 additions & 0 deletions src/linpred.jl
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,5 @@ coef(x::LinPred) = x.beta0
coef(obj::LinPredModel) = coef(obj.pp)

dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1

hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2))
76 changes: 75 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,62 @@ end
@test preds_delta.upper .- preds_delta.lower 2 .* 1.96 .* R_glm_se atol=1e-3
@test_throws ArgumentError predict(gm, newX, interval=:confidence, interval_method=:undefined_method)
@test_throws ArgumentError predict(gm, newX, interval=:undefined)
end
end

@testset "F test comparing to null model" begin
d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2],
Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]))
mod = lm(@formula(Result~Treatment), d).model
othermod = lm(@formula(Result~Other), d).model
nullmod = lm(@formula(Result~1), d).model
bothmod = lm(@formula(Result~Other+Treatment), d).model
nointerceptmod = lm(reshape(d.Treatment, :, 1), d.Result)

ft1 = ftest(mod)
ft1base = ftest(nullmod, mod)
@test ft1.nobs == ft1base.nobs
@test ft1.dof dof(mod) - dof(nullmod)
@test ft1.fstat ft1base.fstat[2]
@test ft1.pval ft1base.pval[2]
if VERSION >= v"1.6.0"
@test sprint(show, ft1) == """
F-test against the null model:
F-statistic: 241.62 on 12 observations and 1 degrees of freedom, p-value: <1e-07"""
else
@test sprint(show, ft1) == """
F-test against the null model:
F-statistic: 241.62 on 12 observations and 1 degrees of freedom, p-value: <1e-7"""
end

ft2 = ftest(othermod)
ft2base = ftest(nullmod, othermod)
@test ft2.nobs == ft2base.nobs
@test ft2.dof dof(othermod) - dof(nullmod)
@test ft2.fstat ft2base.fstat[2]
@test ft2.pval ft2base.pval[2]
@test sprint(show, ft2) == """
F-test against the null model:
F-statistic: 1.12 on 12 observations and 2 degrees of freedom, p-value: 0.3690"""

ft3 = ftest(bothmod)
ft3base = ftest(nullmod, bothmod)
@test ft3.nobs == ft3base.nobs
@test ft3.dof dof(bothmod) - dof(nullmod)
@test ft3.fstat ft3base.fstat[2]
@test ft3.pval ft3base.pval[2]
if VERSION >= v"1.6.0"
@test sprint(show, ft3) == """
F-test against the null model:
F-statistic: 81.97 on 12 observations and 3 degrees of freedom, p-value: <1e-05"""
else
@test sprint(show, ft3) == """
F-test against the null model:
F-statistic: 81.97 on 12 observations and 3 degrees of freedom, p-value: <1e-5"""
end

@test_throws ArgumentError ftest(nointerceptmod)
end

@testset "F test for model comparison" begin
d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
Expand Down Expand Up @@ -895,3 +950,22 @@ end
@test hash(NegativeBinomialLink(0.3)) == hash(NegativeBinomialLink(0.3))
@test hash(NegativeBinomialLink(0.31)) != hash(NegativeBinomialLink(0.3))
end

@testset "hasintercept" begin
d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2],
Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]))

mod = lm(@formula(Result~Treatment), d).model
@test hasintercept(mod)

nullmod = lm(@formula(Result~1), d).model
@test hasintercept(nullmod)

nointerceptmod = lm(reshape(d.Treatment, :, 1), d.Result)
@test !hasintercept(nointerceptmod)

rng = StableRNG(1234321)
secondcolinterceptmod = glm([randn(rng, 5) ones(5)], ones(5), Binomial(), LogitLink())
@test hasintercept(secondcolinterceptmod)
end

0 comments on commit 4e72e76

Please sign in to comment.