/
log_normal_fitter.py
91 lines (74 loc) · 3.08 KB
/
log_normal_fitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
from scipy.special import erfinv
import autograd.numpy as np
from autograd.scipy.stats import norm
from lifelines.fitters import KnownModelParametericUnivariateFitter
from lifelines.utils import CensoringType
class LogNormalFitter(KnownModelParametericUnivariateFitter):
r"""
This class implements an Log Normal model for univariate data. The model has parameterized
form:
.. math:: S(t) = 1 - \Phi\left(\frac{\log(t) - \mu}{\sigma}\right), \;\; \sigma >0
where :math:`\Phi` is the CDF of a standard normal random variable.
This implies the cumulative hazard rate is
.. math:: H(t) = -\log\left(1 - \Phi\left(\frac{\log(t) - \mu}{\sigma}\right)\right)
After calling the `.fit` method, you have access to properties like: ``survival_function_``, ``mu_``, ``sigma_``.
A summary of the fit is available with the method ``print_summary()``
Parameters
-----------
alpha: float, optional (default=0.05)
the level in the confidence intervals.
Attributes
----------
cumulative_hazard_ : DataFrame
The estimated cumulative hazard (with custom timeline if provided)
hazard_ : DataFrame
The estimated hazard (with custom timeline if provided)
survival_function_ : DataFrame
The estimated survival function (with custom timeline if provided)
cumumlative_density_ : DataFrame
The estimated cumulative density function (with custom timeline if provided)
variance_matrix_ : numpy array
The variance matrix of the coefficients
median_: float
The median time to event
mu_: float
The fitted parameter in the model
sigma_: float
The fitted parameter in the model
durations: array
The durations provided
event_observed: array
The event_observed variable provided
timeline: array
The time line to use for plotting and indexing
entry: array or None
The entry array provided, or None
"""
_fitted_parameter_names = ["mu_", "sigma_"]
_bounds = [(None, None), (0, None)]
def _create_initial_point(self, Ts, E, *args):
if CensoringType.is_right_censoring(self):
log_T = np.log(Ts[0])
elif CensoringType.is_left_censoring(self):
log_T = np.log(Ts[1])
elif CensoringType.is_interval_censoring(self):
log_T = np.log(Ts[1])
return np.array([np.median(log_T), 1.0])
@property
def median_(self):
return np.exp(self.mu_)
def percentile(self, p):
return np.exp(self.mu_ + np.sqrt(2 * self.sigma_ ** 2) * erfinv(1 - 2 * p))
def _cumulative_hazard(self, params, times):
mu_, sigma_ = params
Z = (np.log(times) - mu_) / sigma_
return -norm.logsf(Z)
def _log_hazard(self, params, times):
mu_, sigma_ = params
Z = (np.log(times) - mu_) / sigma_
return norm.logpdf(Z, loc=0, scale=1) - np.log(sigma_) - np.log(times) - norm.logsf(Z)
def _log_1m_sf(self, params, times):
mu_, sigma_ = params
Z = (np.log(times) - mu_) / sigma_
return norm.logcdf(Z)