diff --git a/docs/src/cox.md b/docs/src/cox.md index 6a659d9..815b70c 100644 --- a/docs/src/cox.md +++ b/docs/src/cox.md @@ -11,7 +11,12 @@ where ``\lambda(t|X_i)`` is the estimated hazard for sample ``i``, ``\lambda_0`` ## API ```@docs +CoxModel StatsBase.fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; kwargs...) +coxph +baseline_hazard +baseline_cumulative_hazard +baseline_survival ``` ## References diff --git a/src/Survival.jl b/src/Survival.jl index 6288b82..f14d0a3 100644 --- a/src/Survival.jl +++ b/src/Survival.jl @@ -25,7 +25,10 @@ export nobs, dof, vcov, - stderr + stderr, + baseline_hazard, + baseline_cumulative_hazard, + baseline_survival abstract type AbstractEstimator end diff --git a/src/cox.jl b/src/cox.jl index c96c954..bcefc9d 100644 --- a/src/cox.jl +++ b/src/cox.jl @@ -80,10 +80,32 @@ function update_cox!(cs::CoxSum, fs, ls) end end -# Structure of Cox regression output - +""" + CoxModel + +An immutable type containing the result of a fitted Cox proportional hazard model. +The type has the following fields: + +* `aux`: auxiliary quantities used to fit the model +* `times`: times at which non-censored events happened +* `haz`: baseline hazard +* `chaz`: baseline cumulative hazard +* `survival`: baseline survival probability +* `β`: fitted coefficients +* `loglik`: log-likelihood at `β` +* `score`: score at `β` +* `fischer_info`: Fischer information matrix at `β` +* `vcov`: variance-covariance matrix at `β` + +Use `fit(CoxModel, ...)` to compute the estimates and construct +this type. +""" struct CoxModel{T <: Real} <: RegressionModel aux::CoxAux{T} + times::Array{T, 1} + haz::Array{Float64, 1} + chaz::Array{Float64, 1} + survival::Array{Float64, 1} β::Array{T,1} loglik::T score::Array{T,1} @@ -117,6 +139,41 @@ StatsBase.vcov(obj::CoxModel) = obj.vcov StatsBase.stderr(obj::CoxModel) = sqrt.(diag(vcov(obj))) +# get baseline stats + +""" + baseline_hazard(c::CoxModel) + +Compute the baseline hazard (when all regressors equal zero) of a fitted `CoxModel` +using the Nelson-Aalen estimator. Return times at which at least an envent happened +and the value of the baseline hazard at those times. +""" +baseline_hazard(c::CoxModel) = c.times, c.haz + +""" + baseline_cumulative_hazard(c::CoxModel) + +Compute the baseline cumulative hazard (when all regressors equal zero) of a fitted +`CoxModel` using the Nelson-Aalen estimator. Return times at which at least an envent +happened and the value of the baseline cumulative hazard at those times. +""" +baseline_cumulative_hazard(c::CoxModel) = c.times, c.chaz + +""" + baseline_cumulative_hazard(c::CoxModel) + +Compute the baseline survival function (when all regressors equal zero) of a fitted +`CoxModel` by exponentiating the negative cumulative hazard. Return times at which at +least an envent happened and the value of the baseline survival function at those times. +""" +baseline_survival(c::CoxModel) = c.times, c.survival + +# delegate from DataFrameRegressionModel. +for op in [:baseline_hazard, :baseline_cumulative_hazard, :baseline_survival] + @eval $op(m::StatsModels.DataFrameRegressionModel{S, T}) where {S<:CoxModel, T} = + $op(m.model) +end + #compute negative loglikelihood function _cox_f(β, c::CoxAux{T})::T where T @@ -187,7 +244,12 @@ function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost = zero(T), kwarg fgh! = (β, grad, hes, compute_derivatives) -> _cox_fgh!(β, grad, hes, c, compute_derivatives) β, neg_ll, grad, hes = newton_raphson(fgh!, zeros(T, size(X,2)); kwargs...) - CoxModel(c, β, -neg_ll, -grad, hes, pinv(hes)) + times = T[s[i].time for i in c.fs] + haz = fill(0.0, length(times)) + @. haz = (c.ls - c.fs + 1) / c.θ.tails + chaz = cumsum(haz) + survival = @. exp(-chaz) + CoxModel(c, times, haz, chaz, survival, β, -neg_ll, -grad, hes, pinv(hes)) end StatsModels.drop_intercept(::Type{CoxModel}) = true @@ -196,7 +258,7 @@ StatsModels.drop_intercept(::Type{CoxModel}) = true fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; kwargs...) Given a matrix `M` of predictors and a corresponding vector of events, compute the -Cox proportional hazard model estimate of coefficients. Returns a `CoxModel` +Cox proportional hazard model estimate of coefficients. Returns a [`CoxModel`](@ref) object. """ function StatsBase.fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; kwargs...) @@ -206,4 +268,10 @@ function StatsBase.fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; k _coxph(X, s; kwargs...) end + +""" + coxph(M, y; kwargs...) + +Short-hand for `fit(CoxModel, M, y; kwargs...)` +""" coxph(M, y; kwargs...) = fit(CoxModel, M, y; kwargs...) diff --git a/test/runtests.jl b/test/runtests.jl index 55b681b..80e97f0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -188,6 +188,16 @@ end rossi[:event] = EventTime.(rossi[:week],rossi[:arrest] .== 1) outcome = coxph(@formula(event ~ fin+age+race+wexp+mar+paro+prio), rossi; tol = 1e-8) + + times, hazard = baseline_hazard(outcome) + na = fit(NelsonAalen, rossi[:event]) + @test times == na.times[na.nevents .> 0] + @test hazard ≈ na.nevents[na.nevents .> 0]./outcome.model.aux.θ.tails atol = 1e-8 + times, cumulative_hazard = baseline_cumulative_hazard(outcome) + @test cumulative_hazard ≈ cumsum(hazard) atol = 1e-8 + times, survival = baseline_survival(outcome) + @test survival ≈ exp.(-cumulative_hazard) atol = 1e-8 + outcome_coefmat = coeftable(outcome) coef_matrix = ModelMatrix(ModelFrame(@formula(event ~ 0+fin+age+race+wexp+mar+paro+prio), rossi)).m