Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Support Normal and Gamma distributions #59

Merged
merged 1 commit into from

2 participants

@simonster
Owner

This uses the moment estimator of scale parameter as in R and obtains the same results. Because some of the tests (taken from R examples) use an offset, they will fail to converge without #58.

@simonster simonster Support Normal and Gamma distributions
This uses the moment estimator of scale parameter as in R and obtains
the same results. Because some of the tests (taken from R examples)
use an offset, they will fail to converge without #58.
3ab944a
@dmbates dmbates merged commit 01cdeb3 into JuliaStats:master
@simonster simonster referenced this pull request
Closed

Enable Travis checking #60

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Mar 3, 2014
  1. @simonster

    Support Normal and Gamma distributions

    simonster authored
    This uses the moment estimator of scale parameter as in R and obtains
    the same results. Because some of the tests (taken from R examples)
    use an offset, they will fail to converge without #58.
This page is out of date. Refresh to see the latest.
Showing with 84 additions and 9 deletions.
  1. +30 −8 src/glmfit.jl
  2. +17 −1 src/glmtools.jl
  3. +37 −0 test/glmFit.jl
View
38 src/glmfit.jl
@@ -9,6 +9,7 @@ type GlmResp{V<:FPStoredVector} <: ModResp # response in a glm model
offset::V # offset added to linear predictor (usually 0)
var::V # (unweighted) variance at current mu
wts::V # prior weights
+ wrkwts::V # working weights
wrkresid::V # working residuals
function GlmResp(y::V, d::UnivariateDistribution, l::Link,
eta::V, mu::V,
@@ -21,7 +22,7 @@ type GlmResp{V<:FPStoredVector} <: ModResp # response in a glm model
n = length(y)
length(eta) == length(mu) == length(wts) == n || error("mismatched sizes")
lo = length(off); lo == 0 || lo == n || error("offset must have length $n or length 0")
- res = new(y,d,l,similar(y),eta,mu,similar(y),off,similar(y),wts,similar(y))
+ res = new(y,d,l,similar(y),eta,mu,similar(y),off,similar(y),wts,similar(y),similar(y))
updatemu!(res, eta)
res
end
@@ -60,9 +61,21 @@ function wrkresp(r::GlmResp)
map(Add(), r.eta, r.wrkresid)
end
-function wrkwt(r::GlmResp)
- length(r.wts) == 0 && return [r.mueta[i] * r.mueta[i]/r.var[i] for i in 1:length(r.var)]
- [r.wts[i] * r.mueta[i] * r.mueta[i]/r.var[i] for i in 1:length(r.var)]
+function wrkwt!(r::GlmResp)
+ wrkwts = r.wrkwts
+ mueta = r.mueta
+ var = r.var
+ if length(r.wts) == 0
+ for i = 1:length(r.var)
+ @inbounds wrkwts[i] = abs2(mueta[i])/var[i]
+ end
+ else
+ wts = r.wts
+ for i = 1:length(r.var)
+ @inbounds wrkwts[i] = wts[i] * abs2(mueta[i])/var[i]
+ end
+ end
+ wrkwts
end
type GlmMod <: LinPredModel
@@ -96,13 +109,13 @@ function fit(m::GlmMod; verbose::Bool=false, maxIter::Integer=30, minStepFac::Re
cvg = false; p = m.pp; r = m.rr
scratch = similar(p.X.m)
- devold = updatemu!(r, linpred(delbeta!(p, wrkresp(r), wrkwt(r), scratch)))
+ devold = updatemu!(r, linpred(delbeta!(p, wrkresp(r), wrkwt!(r), scratch)))
installbeta!(p)
for i=1:maxIter
f = 1.0
- dev = updatemu!(r, linpred(delbeta!(p, r.wrkresid, wrkwt(r), scratch)))
+ dev = updatemu!(r, linpred(delbeta!(p, r.wrkresid, wrkwt!(r), scratch)))
while dev > devold
- f /= 2.; f > minStepFac || error("step-halving failed at beta0 = $beta0")
+ f /= 2.; f > minStepFac || error("step-halving failed at beta0 = $(p.beta0)")
dev = updatemu!(r, linpred(p, f))
end
installbeta!(p, f)
@@ -141,4 +154,13 @@ glm(s::String, df::AbstractDataFrame, d::UnivariateDistribution) = glm(Formula(p
## scale(m) -> estimate, s, of the scale parameter
## scale(m,true) -> estimate, s^2, of the squared scale parameter
-scale(x::GlmMod,sqr::Bool=false) = 1. # generalize this - only appropriate for Bernoulli and Poisson
+type DispersionFun <: Functor{2} end
+evaluate{T<:FP}(::DispersionFun,wt::T,resid::T) = wt*abs2(resid)
+function scale(m::GlmMod, sqr::Bool=false)
+ if isa(m.rr.d, Union(Bernoulli, Poisson))
+ return 1.
+ end
+
+ s = sum(DispersionFun(), m.rr.wrkwts, m.rr.wrkresid)/df_residual(m)
+ sqr ? s : sqrt(s)
+end
View
18 src/glmtools.jl
@@ -44,7 +44,7 @@ mueta!{T<:FP}(::IdentityLink, me::Vector{T}, eta::Vector{T}) = fill!(me,one(T))
for (l, lf, li, mueta) in
((:CauchitLink, :CauchLink, :CauchInv, :CauchME),
(:CloglogLink, :CLgLgLink, :CLgLgInv, :CLgLgME),
- (:InverseLink, :Recip, :Recip, :InvME),
+ (:InverseLink, :RcpFun, :RcpFun, :InvME),
(:LogitLink, :LogitFun, :LogisticFun, :LogitME),
(:LogLink, :LogFun, :ExpFun, :ExpFun),
(:ProbitLink, :ProbLink, :ProbInv, :ProbME))
@@ -98,6 +98,14 @@ type PoissonDevResid <: Functor{3} end
evaluate{T<:FP}(::PoissonDevResid,y::T,mu::T,wt::T) = 2.wt * (xlogy(y,y/mu) - (y - mu))
result_type{T<:FP}(::PoissonDevResid,::Type{T},::Type{T},::Type{T}) = T
+type GammaDevResid <: Functor{3} end
+evaluate{T<:FP}(::GammaDevResid,y::T,mu::T,wt::T) = -2.wt * (log(y/mu) - (y - mu)/mu)
+result_type{T<:FP}(::GammaDevResid,::Type{T},::Type{T},::Type{T}) = T
+
+type GaussianDevResid <: Functor{3} end
+evaluate{T<:FP}(::GaussianDevResid,y::T,mu::T,wt::T) = wt * abs2(y - mu)
+result_type{T<:FP}(::GaussianDevResid,::Type{T},::Type{T},::Type{T}) = T
+
function devresid!{T<:FP}(::Binomial,dr::Vector{T},y::Vector{T},
mu::Vector{T},wt::Vector{T})
map!(BinomialDevResid(), dr, y, mu, wt)
@@ -106,3 +114,11 @@ function devresid!{T<:FP}(::Poisson,dr::Vector{T},y::Vector{T},
mu::Vector{T},wt::Vector{T})
map!(PoissonDevResid(), dr, y, mu, wt)
end
+function devresid!{T<:FP}(::Gamma,dr::Vector{T},y::Vector{T},
+ mu::Vector{T},wt::Vector{T})
+ map!(GammaDevResid(), dr, y, mu, wt)
+end
+function devresid!{T<:FP}(::Normal,dr::Vector{T},y::Vector{T},
+ mu::Vector{T},wt::Vector{T})
+ map!(GaussianDevResid(), dr, y, mu, wt)
+end
View
37 test/glmFit.jl
@@ -29,3 +29,40 @@ gm4 = glm(admit ~ gre + gpa + rank, df, Binomial(), CauchitLink())
gm5 = glm(admit ~ gre + gpa + rank, df, Binomial(), CloglogLink())
@test_approx_eq deviance(gm5) 458.89439629612616
+## Example with offsets from Venables & Ripley (2002, p.189)
+df = readtable(Pkg.dir("GLM","data","anorexia.csv.gz"))
+df[:Treat] = pool(df[:Treat])
+
+gm6 = glm(Postwt ~ Prewt + Treat, df, Normal(), IdentityLink(), offset=array(df[:Prewt]))
+@test_approx_eq deviance(gm6) 3311.262619919613
+@test_approx_eq coef(gm6) [49.7711090149846,-0.5655388496391,-4.0970655280729,4.5630626529188]
+@test_approx_eq scale(gm6, true) 48.6950385282296
+@test_approx_eq stderr(gm6) [13.3909581420259,0.1611823618518,1.8934926069669,2.1333359226431]
+
+gm7 = fit(glm(Postwt ~ Prewt + Treat, df, Normal(), LogLink(), offset=array(df[:Prewt]), dofit=false),
+ convTol=1e-8)
+@test_approx_eq deviance(gm7) 3265.207242977156
+@test_approx_eq coef(gm7) [3.992326787835955,-0.994452693131178,-0.050698258703974,0.051494029957641]
+@test_approx_eq scale(gm7, true) 48.01787789178518
+@test_approx_eq stderr(gm7) [0.157167944259695,0.001886285986164,0.022584069426311,0.023882826190166]
+
+## Gamma example from McCullagh & Nelder (1989, pp. 300-2)
+clotting = DataFrame(u = log([5,10,15,20,30,40,60,80,100]),
+ lot1 = [118,58,42,35,27,25,21,19,18])
+gm8 = glm(lot1 ~ u, clotting, Gamma())
+@test_approx_eq deviance(gm8) 0.01672971517848353
+@test_approx_eq coef(gm8) [-0.01655438172784895,0.01534311491072141]
+@test_approx_eq scale(gm8, true) 0.002446059333495581
+@test_approx_eq stderr(gm8) [0.0009275466067257,0.0004149596425600]
+
+gm9 = fit(glm(lot1 ~ u, clotting, Gamma(), LogLink(), dofit=false), convTol=1e-8)
+@test_approx_eq deviance(gm9) 0.16260829451739
+@test_approx_eq coef(gm9) [5.50322528458221,-0.60191617825971]
+@test_approx_eq scale(gm9, true) 0.02435442293561081
+@test_approx_eq stderr(gm9) [0.19030107482720,0.05530784660144]
+
+gm10 = fit(glm(lot1 ~ u, clotting, Gamma(), IdentityLink(), dofit=false), convTol=1e-8)
+@test_approx_eq deviance(gm10) 0.60845414895344
+@test_approx_eq coef(gm10) [99.250446880986,-18.374324929002]
+@test_approx_eq scale(gm10, true) 0.1041772704067886
+@test_approx_eq stderr(gm10) [17.864388462865,4.297968703823]
Something went wrong with that request. Please try again.