diff --git a/lifelines/fitters/exponential_fitter.py b/lifelines/fitters/exponential_fitter.py index 64b5828d0..1a9e7b84e 100644 --- a/lifelines/fitters/exponential_fitter.py +++ b/lifelines/fitters/exponential_fitter.py @@ -89,3 +89,48 @@ def _bounds(self, alpha, ci_labels): df[ci_labels[0]] = sv + alpha2 * error df[ci_labels[1]] = sv - alpha2 * error return df + + def _compute_standard_errors(self): + n = self.durations.shape[0] + var_lambda_ = self.lambda_ ** 2 / n + return pd.DataFrame([[np.sqrt(var_lambda_)]], + index=['se'], columns=['lambda_']) + + def _compute_confidence_bounds_of_parameters(self): + se = self._compute_standard_errors().ix['se'] + alpha2 = inv_normal_cdf((1. + self.alpha) / 2.) + return pd.DataFrame([ + np.array([self.lambda_]) + alpha2 * se, + np.array([self.lambda_]) - alpha2 * se, + ], columns=['lambda_'], index=['upper-bound', 'lower-bound']) + + @property + def summary(self): + """Summary statistics describing the fit. + Set alpha property in the object before calling. + + Returns + ------- + df : pd.DataFrame + Contains columns coef, exp(coef), se(coef), z, p, lower, upper""" + lower_upper_bounds = self._compute_confidence_bounds_of_parameters() + df = pd.DataFrame(index=['lambda_']) + df['coef'] = [self.lambda_] + df['se(coef)'] = self._compute_standard_errors().ix['se'] + df['lower %.2f' % self.alpha] = lower_upper_bounds.ix['lower-bound'] + df['upper %.2f' % self.alpha] = lower_upper_bounds.ix['upper-bound'] + return df + + def print_summary(self): + """ + Print summary statistics describing the fit. + + """ + df = self.summary + + # Print information about data first + print('n={}, number of events={}'.format(self.durations.shape[0], + np.where(self.event_observed)[0].shape[0]), + end='\n\n') + print(df.to_string(float_format=lambda f: '{:.3e}'.format(f))) + return diff --git a/lifelines/fitters/weibull_fitter.py b/lifelines/fitters/weibull_fitter.py index 8ff5cd629..f3100bb35 100644 --- a/lifelines/fitters/weibull_fitter.py +++ b/lifelines/fitters/weibull_fitter.py @@ -184,3 +184,47 @@ def sensitivity_analysis(lambda_, rho, var_lambda_, var_rho_, T): df[ci_labels[0]] = self.cumulative_hazard_at_times(self.timeline) + alpha2 * std_cumulative_hazard df[ci_labels[1]] = self.cumulative_hazard_at_times(self.timeline) - alpha2 * std_cumulative_hazard return df + + def _compute_standard_errors(self): + var_lambda_, var_rho_ = inv(self._jacobian).diagonal() + return pd.DataFrame([[np.sqrt(var_lambda_), np.sqrt(var_rho_)]], + index=['se'], columns=['lambda_', 'rho_']) + + def _compute_confidence_bounds_of_parameters(self): + se = self._compute_standard_errors().ix['se'] + alpha2 = inv_normal_cdf((1. + self.alpha) / 2.) + return pd.DataFrame([ + np.array([self.lambda_, self.rho_]) + alpha2 * se, + np.array([self.lambda_, self.rho_]) - alpha2 * se, + ], columns=['lambda_', 'rho_'], index=['upper-bound', 'lower-bound']) + + @property + def summary(self): + """Summary statistics describing the fit. + Set alpha property in the object before calling. + + Returns + ------- + df : pd.DataFrame + Contains columns coef, exp(coef), se(coef), z, p, lower, upper""" + lower_upper_bounds = self._compute_confidence_bounds_of_parameters() + df = pd.DataFrame(index=['lambda_', 'rho_']) + df['coef'] = [self.lambda_, self.rho_] + df['se(coef)'] = self._compute_standard_errors().ix['se'] + df['lower %.2f' % self.alpha] = lower_upper_bounds.ix['lower-bound'] + df['upper %.2f' % self.alpha] = lower_upper_bounds.ix['upper-bound'] + return df + + def print_summary(self): + """ + Print summary statistics describing the fit. + + """ + df = self.summary + + # Print information about data first + print('n={}, number of events={}'.format(self.durations.shape[0], + np.where(self.event_observed)[0].shape[0]), + end='\n\n') + print(df.to_string(float_format=lambda f: '{:.3e}'.format(f))) + return diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py index 565bd4982..919b87b1b 100644 --- a/lifelines/utils/__init__.py +++ b/lifelines/utils/__init__.py @@ -275,10 +275,10 @@ def datetimes_to_durations(start_times, end_times, fill_date=datetime.today(), f deaths_after_cutoff = end_times_ > fill_date C[deaths_after_cutoff] = False - T = (end_times_ - start_times_).map(lambda x: x.astype(freq_string).astype(float)) + T = (end_times_ - start_times_).values.astype(freq_string).astype(float) if (T < 0).sum(): warnings.warn("Warning: some values of start_times are after end_times") - return T.values, C.values + return T, C.values def l1_log_loss(event_times, predicted_event_times, event_observed=None): diff --git a/lifelines/version.py b/lifelines/version.py index f52b33a07..53f397ac2 100644 --- a/lifelines/version.py +++ b/lifelines/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '0.9.0.0' +__version__ = '0.9.1.0'