Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement fstatistic #424

Merged
merged 1 commit into from
Nov 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "GLM"
uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
version = "1.5.1"
version = "1.6.0"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Expand Down
3 changes: 2 additions & 1 deletion src/GLM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ module GLM
fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue
import StatsFuns: xlogy
import SpecialFunctions: erfc, erfcinv, digamma, trigamma
import StatsModels: hasintercept
export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
OndrejSlamecka marked this conversation as resolved.
Show resolved Hide resolved
loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr²,
cooksdistance
cooksdistance, hasintercept

export
# types
Expand Down
47 changes: 44 additions & 3 deletions src/ftest.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
struct SingleFTestResult
nobs::Int
dof::Int
fstat::Float64
pval::Float64
end

mutable struct FTestResult{N}
nobs::Int
ssr::NTuple{N, Float64}
Expand Down Expand Up @@ -28,6 +35,37 @@ _diffn(t::NTuple{N, T}) where {N, T} = ntuple(i->t[i]-t[i+1], N-1)

_diff(t::NTuple{N, T}) where {N, T} = ntuple(i->t[i+1]-t[i], N-1)

"""
ftest(mod::LinearModel)

Perform an F-test to determine whether model `mod` fits significantly better
than the null model (i.e. which includes only the intercept).

```jldoctest
julia> dat = DataFrame(Result=[1.1, 1.2, 1, 2.2, 1.9, 2, 0.9, 1, 1, 2.2, 2, 2],
Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2]);

julia> model = lm(@formula(Result ~ 1 + Treatment), dat);

julia> ftest(model.model)
F-test against the null model:
F-statistic: 241.62 on 12 observations and 1 degrees of freedom, p-value: <1e-07
```
"""
function ftest(mod::LinearModel)
hasintercept(mod) || throw(ArgumentError("ftest only works for models with an intercept"))

rss = deviance(mod)
tss = nulldeviance(mod)

n = Int(nobs(mod))
p = dof(mod) - 2 # -2 for intercept and dispersion parameter
fstat = ((tss - rss) / rss) * ((n - p - 1) / p)
fdist = FDist(p, dof_residual(mod))

SingleFTestResult(n, p, promote(fstat, ccdf(fdist, abs(fstat)))...)
end

"""
ftest(mod::LinearModel...; atol::Real=0.0)

Expand Down Expand Up @@ -85,9 +123,6 @@ F-test: 3 models fitted on 12 observations
```
"""
function ftest(mods::LinearModel...; atol::Real=0.0)
if length(mods) < 2
Base.depwarn("Passing less than two models to ftest is deprecated", :ftest)
end
if !all(==(nobs(mods[1])), nobs.(mods))
throw(ArgumentError("F test is only valid for models fitted on the same data, " *
"but number of observations differ"))
Expand Down Expand Up @@ -127,6 +162,12 @@ function ftest(mods::LinearModel...; atol::Real=0.0)
return FTestResult(Int(nobs(mods[1])), SSR, df, r2.(mods), fstat, pval)
end

function show(io::IO, ftr::SingleFTestResult)
print(io, "F-test against the null model:\nF-statistic: ", StatsBase.TestStat(ftr.fstat), " ")
print(io, "on ", ftr.nobs, " observations and ", ftr.dof, " degrees of freedom, ")
print(io, "p-value: ", PValue(ftr.pval))
end

function show(io::IO, ftr::FTestResult{N}) where N
Δdof = _diff(ftr.dof)
Δssr = _diff(ftr.ssr)
Expand Down
2 changes: 2 additions & 0 deletions src/linpred.jl
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,5 @@ coef(x::LinPred) = x.beta0
coef(obj::LinPredModel) = coef(obj.pp)

dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1

hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2))
76 changes: 75 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,62 @@ end
@test preds_delta.upper .- preds_delta.lower ≈ 2 .* 1.96 .* R_glm_se atol=1e-3
@test_throws ArgumentError predict(gm, newX, interval=:confidence, interval_method=:undefined_method)
@test_throws ArgumentError predict(gm, newX, interval=:undefined)
end
end

@testset "F test comparing to null model" begin
d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2],
Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]))
mod = lm(@formula(Result~Treatment), d).model
othermod = lm(@formula(Result~Other), d).model
nullmod = lm(@formula(Result~1), d).model
bothmod = lm(@formula(Result~Other+Treatment), d).model
nointerceptmod = lm(reshape(d.Treatment, :, 1), d.Result)

ft1 = ftest(mod)
ft1base = ftest(nullmod, mod)
@test ft1.nobs == ft1base.nobs
@test ft1.dof ≈ dof(mod) - dof(nullmod)
@test ft1.fstat ≈ ft1base.fstat[2]
@test ft1.pval ≈ ft1base.pval[2]
if VERSION >= v"1.6.0"
@test sprint(show, ft1) == """
F-test against the null model:
F-statistic: 241.62 on 12 observations and 1 degrees of freedom, p-value: <1e-07"""
else
@test sprint(show, ft1) == """
F-test against the null model:
F-statistic: 241.62 on 12 observations and 1 degrees of freedom, p-value: <1e-7"""
end

ft2 = ftest(othermod)
ft2base = ftest(nullmod, othermod)
@test ft2.nobs == ft2base.nobs
@test ft2.dof ≈ dof(othermod) - dof(nullmod)
@test ft2.fstat ≈ ft2base.fstat[2]
@test ft2.pval ≈ ft2base.pval[2]
@test sprint(show, ft2) == """
F-test against the null model:
F-statistic: 1.12 on 12 observations and 2 degrees of freedom, p-value: 0.3690"""

ft3 = ftest(bothmod)
ft3base = ftest(nullmod, bothmod)
@test ft3.nobs == ft3base.nobs
@test ft3.dof ≈ dof(bothmod) - dof(nullmod)
@test ft3.fstat ≈ ft3base.fstat[2]
@test ft3.pval ≈ ft3base.pval[2]
if VERSION >= v"1.6.0"
@test sprint(show, ft3) == """
F-test against the null model:
F-statistic: 81.97 on 12 observations and 3 degrees of freedom, p-value: <1e-05"""
else
@test sprint(show, ft3) == """
F-test against the null model:
F-statistic: 81.97 on 12 observations and 3 degrees of freedom, p-value: <1e-5"""
end

@test_throws ArgumentError ftest(nointerceptmod)
end

@testset "F test for model comparison" begin
d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
Expand Down Expand Up @@ -895,3 +950,22 @@ end
@test hash(NegativeBinomialLink(0.3)) == hash(NegativeBinomialLink(0.3))
@test hash(NegativeBinomialLink(0.31)) != hash(NegativeBinomialLink(0.3))
end

@testset "hasintercept" begin
d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2],
Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]))

mod = lm(@formula(Result~Treatment), d).model
@test hasintercept(mod)

nullmod = lm(@formula(Result~1), d).model
@test hasintercept(nullmod)

nointerceptmod = lm(reshape(d.Treatment, :, 1), d.Result)
@test !hasintercept(nointerceptmod)

rng = StableRNG(1234321)
secondcolinterceptmod = glm([randn(rng, 5) ones(5)], ones(5), Binomial(), LogitLink())
@test hasintercept(secondcolinterceptmod)
end