Skip to content

Commit

Permalink
Merge pull request #231 from CamDavidsonPilon/summaries
Browse files Browse the repository at this point in the history
Summaries + datetime_to_duration bug fix
  • Loading branch information
CamDavidsonPilon committed Mar 20, 2016
2 parents 0539a21 + ad03ea3 commit 5ced989
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 3 deletions.
45 changes: 45 additions & 0 deletions lifelines/fitters/exponential_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,48 @@ def _bounds(self, alpha, ci_labels):
df[ci_labels[0]] = sv + alpha2 * error
df[ci_labels[1]] = sv - alpha2 * error
return df

def _compute_standard_errors(self):
n = self.durations.shape[0]
var_lambda_ = self.lambda_ ** 2 / n
return pd.DataFrame([[np.sqrt(var_lambda_)]],
index=['se'], columns=['lambda_'])

def _compute_confidence_bounds_of_parameters(self):
se = self._compute_standard_errors().ix['se']
alpha2 = inv_normal_cdf((1. + self.alpha) / 2.)
return pd.DataFrame([
np.array([self.lambda_]) + alpha2 * se,
np.array([self.lambda_]) - alpha2 * se,
], columns=['lambda_'], index=['upper-bound', 'lower-bound'])

@property
def summary(self):
"""Summary statistics describing the fit.
Set alpha property in the object before calling.
Returns
-------
df : pd.DataFrame
Contains columns coef, exp(coef), se(coef), z, p, lower, upper"""
lower_upper_bounds = self._compute_confidence_bounds_of_parameters()
df = pd.DataFrame(index=['lambda_'])
df['coef'] = [self.lambda_]
df['se(coef)'] = self._compute_standard_errors().ix['se']
df['lower %.2f' % self.alpha] = lower_upper_bounds.ix['lower-bound']
df['upper %.2f' % self.alpha] = lower_upper_bounds.ix['upper-bound']
return df

def print_summary(self):
"""
Print summary statistics describing the fit.
"""
df = self.summary

# Print information about data first
print('n={}, number of events={}'.format(self.durations.shape[0],
np.where(self.event_observed)[0].shape[0]),
end='\n\n')
print(df.to_string(float_format=lambda f: '{:.3e}'.format(f)))
return
44 changes: 44 additions & 0 deletions lifelines/fitters/weibull_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,47 @@ def sensitivity_analysis(lambda_, rho, var_lambda_, var_rho_, T):
df[ci_labels[0]] = self.cumulative_hazard_at_times(self.timeline) + alpha2 * std_cumulative_hazard
df[ci_labels[1]] = self.cumulative_hazard_at_times(self.timeline) - alpha2 * std_cumulative_hazard
return df

def _compute_standard_errors(self):
var_lambda_, var_rho_ = inv(self._jacobian).diagonal()
return pd.DataFrame([[np.sqrt(var_lambda_), np.sqrt(var_rho_)]],
index=['se'], columns=['lambda_', 'rho_'])

def _compute_confidence_bounds_of_parameters(self):
se = self._compute_standard_errors().ix['se']
alpha2 = inv_normal_cdf((1. + self.alpha) / 2.)
return pd.DataFrame([
np.array([self.lambda_, self.rho_]) + alpha2 * se,
np.array([self.lambda_, self.rho_]) - alpha2 * se,
], columns=['lambda_', 'rho_'], index=['upper-bound', 'lower-bound'])

@property
def summary(self):
"""Summary statistics describing the fit.
Set alpha property in the object before calling.
Returns
-------
df : pd.DataFrame
Contains columns coef, exp(coef), se(coef), z, p, lower, upper"""
lower_upper_bounds = self._compute_confidence_bounds_of_parameters()
df = pd.DataFrame(index=['lambda_', 'rho_'])
df['coef'] = [self.lambda_, self.rho_]
df['se(coef)'] = self._compute_standard_errors().ix['se']
df['lower %.2f' % self.alpha] = lower_upper_bounds.ix['lower-bound']
df['upper %.2f' % self.alpha] = lower_upper_bounds.ix['upper-bound']
return df

def print_summary(self):
"""
Print summary statistics describing the fit.
"""
df = self.summary

# Print information about data first
print('n={}, number of events={}'.format(self.durations.shape[0],
np.where(self.event_observed)[0].shape[0]),
end='\n\n')
print(df.to_string(float_format=lambda f: '{:.3e}'.format(f)))
return
4 changes: 2 additions & 2 deletions lifelines/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,10 +275,10 @@ def datetimes_to_durations(start_times, end_times, fill_date=datetime.today(), f
deaths_after_cutoff = end_times_ > fill_date
C[deaths_after_cutoff] = False

T = (end_times_ - start_times_).map(lambda x: x.astype(freq_string).astype(float))
T = (end_times_ - start_times_).values.astype(freq_string).astype(float)
if (T < 0).sum():
warnings.warn("Warning: some values of start_times are after end_times")
return T.values, C.values
return T, C.values


def l1_log_loss(event_times, predicted_event_times, event_observed=None):
Expand Down
2 changes: 1 addition & 1 deletion lifelines/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from __future__ import unicode_literals

__version__ = '0.9.0.0'
__version__ = '0.9.1.0'

0 comments on commit 5ced989

Please sign in to comment.