Skip to content

Commit

Permalink
ftest improvements
Browse files Browse the repository at this point in the history
Allow passing models from simpler to more complex, giving equivalent tests but with
reversed signs for ΔDOF, ΔSSR and ΔR².
Stop printing degrees of freedom, as residual DOF and difference in DOF seems enough.
Print two spaces instead of one between columns to make table more readable, and add
horizontal lines to match the CoefTable printing.
Make struct type stable by filling tuples with an initial NaN entry when needed.
Make tests cover more situations by having a model with ΔDOF of 2.
Small code improvements.
  • Loading branch information
nalimilan committed Oct 13, 2019
1 parent f86dc6d commit f50204a
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 72 deletions.
120 changes: 70 additions & 50 deletions src/ftest.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
mutable struct FTestResult{N}
ssr::NTuple{N, Float64}
dof::NTuple{N, Int}
dof_resid::NTuple{N, Int}
r2::NTuple{N, Float64}
fstat::Tuple{Vararg{Float64}}
pval::Tuple{Vararg{PValue}}
fstat::NTuple{N, Float64}
pval::NTuple{N, Float64}
end

"""A helper function to determine if mod1 is nested in mod2"""
function issubmodel(mod1::LinPredModel, mod2::LinPredModel; atol=0::Real)
function issubmodel(mod1::LinPredModel, mod2::LinPredModel; atol::Real=0.0)
mod1.rr.y != mod2.rr.y && return false # Response variables must be equal

# Test that models are nested
Expand All @@ -28,15 +27,14 @@ _diffn(t::NTuple{N, T}) where {N, T} = ntuple(i->t[i]-t[i+1], N-1)

_diff(t::NTuple{N, T}) where {N, T} = ntuple(i->t[i+1]-t[i], N-1)

dividetuples(t1::NTuple{N}, t2::NTuple{N}) where {N} = ntuple(i->t1[i]/t2[i], N)

"""
ftest(mod::LinearModel...; atol=0::Real)
ftest(mod::LinearModel...; atol::Real=0.0)
For each sequential pair of linear predictors in `mod`, perform an F-test to determine if
the first one fits significantly better than the next.
For each sequential pair of linear models in `mod...`, perform an F-test to determine if
the one model fits significantly better than the other. Models must have been fitted
on the same data, and be nested either in forward or backward direction.
A table is returned containing residual degrees of freedom (DOF), degrees of freedom,
A table is returned containing residual degrees of freedom (DOF),
difference in DOF from the preceding model, sum of squared residuals (SSR), difference in
SSR from the preceding model, R², difference in R² from the preceding model, and F-statistic
and p-value for the comparison between the two models.
Expand All @@ -57,75 +55,95 @@ this is an ANOVA, our null hypothesis is that `Result ~ 1` fits the data as well
```jldoctest
julia> dat = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2],
Other=[1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]);
julia> model = lm(@formula(Result ~ 1 + Treatment), dat);
Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]));
julia> nullmodel = lm(@formula(Result ~ 1), dat);
julia> bigmodel = lm(@formula(Result ~ 1 + Treatment + Other), dat);
julia> ft = ftest(model.model, nullmodel.model)
Res. DOF DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
Model 1 10 3 0.1283 0.9603
Model 2 11 2 -1 3.2292 -3.1008 0.0000 0.9603 241.6234 <1e-7
julia> model = lm(@formula(Result ~ 1 + Treatment), dat);
julia> bigmodel = lm(@formula(Result ~ 1 + Treatment + Other), dat);
julia> ftest(bigmodel.model, model.model, nullmodel.model)
Res. DOF DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
Model 1 9 4 0.1038 0.9678
Model 2 10 3 -1 0.1283 -0.0245 0.9603 0.0076 2.1236 0.1790
Model 3 11 2 -1 3.2292 -3.1008 0.0000 0.9603 241.6234 <1e-7
julia> ftest(nullmodel.model, model.model)
──────────────────────────────────────────────────────────────────────
Res. DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
──────────────────────────────────────────────────────────────────────
[1] 11 3.2292 -0.0000
[2] 10 -1 0.1283 -3.1008 0.9603 0.9603 241.6234 <1e-7
──────────────────────────────────────────────────────────────────────
julia> ftest(nullmodel.model, model.model, bigmodel.model)
───────────────────────────────────────────────────────────────────────
Res. DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
───────────────────────────────────────────────────────────────────────
[1] 11 3.2292 -0.0000
[2] 10 -1 0.1283 -3.1008 0.9603 0.9603 241.6234 <1e-7
[3] 8 -2 0.1017 -0.0266 0.9685 0.0082 1.0456 0.3950
───────────────────────────────────────────────────────────────────────
```
"""
function ftest(mods::LinearModel...; atol=0::Real)
nmodels = length(mods)
for i in 2:nmodels
issubmodel(mods[i], mods[i-1], atol=atol) ||
throw(ArgumentError("F test $i is only valid if model $i is nested in model $(i-1)"))
function ftest(mods::LinearModel...; atol::Real=0.0)
forward = length(mods) == 1 || dof(mods[1]) <= dof(mods[2])
if forward
for i in 2:length(mods)
if dof(mods[i-1]) >= dof(mods[i]) || !issubmodel(mods[i-1], mods[i], atol=atol)
throw(ArgumentError("F test is only valid for nested models"))
end
end
else
for i in 2:length(mods)
if dof(mods[i]) >= dof(mods[i-1]) || !issubmodel(mods[i], mods[i-1], atol=atol)
throw(ArgumentError("F test is only valid for nested models"))
end
end
end

SSR = deviance.(mods)

nparams = dof.(mods)

df2 = _diffn(nparams)
df1 = Int.(dof_residual.(mods))

MSR1 = dividetuples(_diff(SSR), df2)
MSR2 = dividetuples(SSR, df1)[1:nmodels-1]
Δdf = _diff(dof.(mods))
dfr = Int.(dof_residual.(mods))
MSR1 = _diffn(SSR) ./ Δdf
MSR2 = (SSR ./ dfr)
if forward
MSR2 = MSR2[2:end]
dfr_big = dfr[2:end]
else
MSR2 = MSR2[1:end-1]
dfr_big = dfr[1:end-1]
end

fstat = dividetuples(MSR1, MSR2)
pval = PValue.(ccdf.(FDist.(df2, df1[1:nmodels-1]), fstat))
return FTestResult(SSR, nparams, df1, r2.(mods), fstat, pval)
fstat = (NaN, (MSR1 ./ MSR2)...)
pval = (NaN, ccdf.(FDist.(abs.(Δdf), dfr_big), abs.(fstat[2:end]))...)
return FTestResult(SSR, dfr, r2.(mods), fstat, pval)
end

function show(io::IO, ftr::FTestResult{N}) where N
Δdof = _diffn(ftr.dof_resid)
Δssr = _diffn(ftr.ssr)
ΔR² = _diffn(ftr.r2)
Δdof = _diff(ftr.dof_resid)
Δssr = _diff(ftr.ssr)
ΔR² = _diff(ftr.r2)

nc = 10
nc = 9
nr = N
outrows = Matrix{String}(undef, nr+1, nc)

outrows[1, :] = ["", "Res. DOF", "DOF", "ΔDOF", "SSR", "ΔSSR",
outrows[1, :] = ["", "Res. DOF", "ΔDOF", "SSR", "ΔSSR",
"", "ΔR²", "F*", "p(>F)"]

outrows[2, :] = ["Model 1", @sprintf("%.0d", ftr.dof_resid[1]),
@sprintf("%.0d", ftr.dof[1]), " ",
outrows[2, :] = ["[1]", @sprintf("%.0d", ftr.dof_resid[1]), " ",
@sprintf("%.4f", ftr.ssr[1]), " ",
@sprintf("%.4f", ftr.r2[1]), " ", " ", " "]

for i in 2:nr
outrows[i+1, :] = ["Model $i", @sprintf("%.0d", ftr.dof_resid[i]),
@sprintf("%.0d", ftr.dof[i]), @sprintf("%.0d", Δdof[i-1]),
outrows[i+1, :] = ["[$i]",
@sprintf("%.0d", ftr.dof_resid[i]), @sprintf("%.0d", Δdof[i-1]),
@sprintf("%.4f", ftr.ssr[i]), @sprintf("%.4f", Δssr[i-1]),
@sprintf("%.4f", ftr.r2[i]), @sprintf("%.4f", ΔR²[i-1]),
@sprintf("%.4f", ftr.fstat[i-1]), string(ftr.pval[i-1]) ]
@sprintf("%.4f", ftr.fstat[i]), string(PValue(ftr.pval[i])) ]
end
colwidths = length.(outrows)
max_colwidths = [maximum(view(colwidths, :, i)) for i in 1:nc]
totwidth = sum(max_colwidths) + 2*8

println(io, ''^totwidth)

for r in 1:nr+1
for c in 1:nc
Expand All @@ -134,12 +152,14 @@ function show(io::IO, ftr::FTestResult{N}) where N

padding = " "^(max_colwidths[c]-cur_cell_len)
if c > 1
padding = " "*padding
padding = " "*padding
end

print(io, padding)
print(io, cur_cell)
end
print(io, "\n")
r == 1 && println(io, ''^totwidth)
end
print(io, ''^totwidth)
end
78 changes: 56 additions & 22 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ end
@testset "F test for model comparison" begin
d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2],
Other=[1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1])
Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]))
mod = lm(@formula(Result~Treatment), d).model
othermod = lm(@formula(Result~Other), d).model
nullmod = lm(@formula(Result~1), d).model
Expand All @@ -483,32 +483,66 @@ end
@test !GLM.issubmodel(bothmod, mod)
@test GLM.issubmodel(othermod, bothmod)

@test_throws ArgumentError ftest(mod, othermod)

d[:Sum] = d[:Treatment] + d[:Other]
d[:Sum] = d[:Treatment] + (d[:Other] .== 1)
summod = lm(@formula(Result~Sum), d).model
@test GLM.issubmodel(summod, bothmod)

ft = ftest(mod, nullmod)
@test isapprox(ft.pval[1].v, 2.481215056713184e-8)
@test sprint(show, ftest(mod, nullmod)) ==
"""
Res. DOF DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
Model 1 10 3 0.1283 0.9603
Model 2 11 2 -1 3.2292 -3.1008 -0.0000 0.9603 241.6234 <1e-7
"""
ft0 = ftest(mod)
@test sprint(show, ft0) == """
─────────────────────────────────────────────────────────
Res. DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
─────────────────────────────────────────────────────────
[1] 10 0.1283 0.9603
─────────────────────────────────────────────────────────"""

ft1a = ftest(mod, nullmod)
@test isnan(ft1a.pval[1])
@test ft1a.pval[2] 2.481215056713184e-8
@test sprint(show, ft1a) == """
──────────────────────────────────────────────────────────────────────
Res. DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
──────────────────────────────────────────────────────────────────────
[1] 10 0.1283 0.9603
[2] 11 1 3.2292 3.1008 -0.0000 -0.9603 241.6234 <1e-7
──────────────────────────────────────────────────────────────────────"""

ft1b = ftest(nullmod, mod)
@test isnan(ft1b.pval[1])
@test ft1b.pval[2] 2.481215056713184e-8
@test sprint(show, ft1b) == """
──────────────────────────────────────────────────────────────────────
Res. DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
──────────────────────────────────────────────────────────────────────
[1] 11 3.2292 -0.0000
[2] 10 -1 0.1283 -3.1008 0.9603 0.9603 241.6234 <1e-7
──────────────────────────────────────────────────────────────────────"""

bigmod = lm(@formula(Result~Treatment+Other), d).model
ft2 = ftest(bigmod, mod, nullmod)
@test isapprox(ft2.pval[2].v, 2.481215056713184e-8)
@test isapprox(ft2.pval[1].v, 0.17903437900958952)
@test sprint(show, ftest(bigmod, mod, nullmod)) ==
"""
Res. DOF DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
Model 1 9 4 0.1038 0.9678
Model 2 10 3 -1 0.1283 -0.0245 0.9603 0.0076 2.1236 0.1790
Model 3 11 2 -1 3.2292 -3.1008 -0.0000 0.9603 241.6234 <1e-7
"""
ft2a = ftest(nullmod, mod, bigmod)
@test isnan(ft2a.pval[1])
@test ft2a.pval[2] 2.481215056713184e-8
@test ft2a.pval[3] 0.3949973540194818
@test sprint(show, ft2a) == """
───────────────────────────────────────────────────────────────────────
Res. DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
───────────────────────────────────────────────────────────────────────
[1] 11 3.2292 -0.0000
[2] 10 -1 0.1283 -3.1008 0.9603 0.9603 241.6234 <1e-7
[3] 8 -2 0.1017 -0.0266 0.9685 0.0082 1.0456 0.3950
───────────────────────────────────────────────────────────────────────"""

ft2b = ftest(bigmod, mod, nullmod)
@test isnan(ft2b.pval[1])
@test ft2b.pval[2] 0.3949973540194818
@test ft2b.pval[3] 2.481215056713184e-8
@test sprint(show, ft2b) == """
───────────────────────────────────────────────────────────────────────
Res. DOF ΔDOF SSR ΔSSR R² ΔR² F* p(>F)
───────────────────────────────────────────────────────────────────────
[1] 8 0.1017 0.9685
[2] 10 2 0.1283 0.0266 0.9603 -0.0082 1.0456 0.3950
[3] 11 1 3.2292 3.1008 -0.0000 -0.9603 241.6234 <1e-7
───────────────────────────────────────────────────────────────────────"""
end

@testset "F test rounding error" begin
Expand Down

0 comments on commit f50204a

Please sign in to comment.