Skip to content

Commit

Permalink
Merge pull request #471 from CamDavidsonPilon/v0.14.3
Browse files Browse the repository at this point in the history
V0.14.3
  • Loading branch information
CamDavidsonPilon committed May 24, 2018
2 parents 3556625 + 94c6472 commit c1bc505
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 29 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
### Changelogs

#### 0.14.3
- fixes a bug when subtracting or dividing two `UnivariateFitters` with labels.
- fixes an import error with using `CoxTimeVaryingFitter` predict methods.
- adds a `column` argument to `CoxTimeVaryingFitter` and `CoxPHFitter` `plot` method to plot only a subset of columns.

#### 0.14.2
- some quality of life improvements for working with `CoxTimeVaryingFitter` including new `predict_` methods.

#### 0.14.1
- fixed bug with using weights and strata in `CoxPHFitter`
- fixed bug in using non-integer weights in `KaplanMeierFitter`
Expand Down
18 changes: 12 additions & 6 deletions lifelines/fitters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,12 @@ def subtract(other):
other_estimate = getattr(other, estimate)
new_index = np.concatenate((other_estimate.index, self_estimate.index))
new_index = np.unique(new_index)
return self_estimate.reindex(new_index, method='ffill') - \
other_estimate.reindex(new_index, method='ffill')

return pd.DataFrame(
self_estimate.reindex(new_index, method='ffill').values - \
other_estimate.reindex(new_index, method='ffill').values,
index=new_index,
columns=['diff']
)
subtract.__doc__ = doc_string
return subtract

Expand All @@ -67,9 +70,12 @@ def divide(other):
other_estimate = getattr(other, estimate)
new_index = np.concatenate((other_estimate.index, self_estimate.index))
new_index = np.unique(new_index)
return self_estimate.reindex(new_index, method='ffill') / \
other_estimate.reindex(new_index, method='ffill')

return pd.DataFrame(
self_estimate.reindex(new_index, method='ffill').values / \
other_estimate.reindex(new_index, method='ffill').values,
index=new_index,
columns=['ratio']
)
divide.__doc__ = doc_string
return divide

Expand Down
38 changes: 27 additions & 11 deletions lifelines/fitters/cox_time_varying_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
significance_code, normalize,\
pass_for_numeric_dtypes_or_raise, check_low_var,\
check_for_overlapping_intervals, check_complete_separation_low_variance,\
ConvergenceWarning, StepSizer
ConvergenceWarning, StepSizer, _get_index


class CoxTimeVaryingFitter(BaseFitter):
Expand Down Expand Up @@ -184,12 +184,13 @@ def _newton_rhaphson(self, df, stop_times_events, show_progress=False, step_size

# Save these as pending result
hessian, gradient = h, g
norm_delta = norm(delta)

if show_progress:
print("Iteration %d: norm_delta = %.6f, step_size = %.3f, ll = %.6f, seconds_since_start = %.1f" % (i, norm(delta), step_size, ll, time.time() - start))
print("Iteration %d: norm_delta = %.6f, step_size = %.3f, ll = %.6f, seconds_since_start = %.1f" % (i, norm_delta, step_size, ll, time.time() - start))

# convergence criteria
if norm(delta) < precision:
if norm_delta < precision:
converging, completed = False, True
elif i >= 50:
# 50 iterations steps with N-R is a lot.
Expand All @@ -199,12 +200,12 @@ def _newton_rhaphson(self, df, stop_times_events, show_progress=False, step_size
converging, completed = False, False
elif abs(ll - previous_ll) < precision:
converging, completed = False, True
elif abs(ll) < 0.0001 and norm(delta) > 1.0:
elif abs(ll) < 0.0001 and norm_delta > 1.0:
warnings.warn("The log-likelihood is getting suspciously close to 0 and the delta is still large. There may be complete separation in the dataset. This may result in incorrect inference of coefficients. \
See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ", ConvergenceWarning)
converging, completed = False, False

step_size = step_sizer.update(norm(delta)).next()
step_size = step_sizer.update(norm_delta).next()

beta += delta

Expand Down Expand Up @@ -358,20 +359,35 @@ def print_summary(self):
end='\n\n')
return

def plot(self, standardized=False, **kwargs):
def plot(self, standardized=False, columns=None, **kwargs):
"""
standardized: standardize each estimated coefficient and confidence interval endpoints by the standard error of the estimate.
Produces a visual representation of the fitted coefficients, including their standard errors and magnitudes.
Parameters:
standardized: standardize each estimated coefficient and confidence interval
endpoints by the standard error of the estimate.
columns : list-like, default None
Returns:
ax: the matplotlib axis that be edited.
"""
from matplotlib import pyplot as plt

ax = kwargs.get('ax', None) or plt.figure().add_subplot(111)
yaxis_locations = range(len(self.hazards_.columns))

summary = self.summary
lower_bound = self.confidence_intervals_.loc['lower-bound'].copy()
upper_bound = self.confidence_intervals_.loc['upper-bound'].copy()
hazards = self.hazards_.values[0].copy()
if columns is not None:
yaxis_locations = range(len(columns))
summary = self.summary.loc[columns]
lower_bound = self.confidence_intervals_[columns].loc['lower-bound'].copy()
upper_bound = self.confidence_intervals_[columns].loc['upper-bound'].copy()
hazards = self.hazards_[columns].values[0].copy()
else:
yaxis_locations = range(len(self.hazards_.columns))
summary = self.summary
lower_bound = self.confidence_intervals_.loc['lower-bound'].copy()
upper_bound = self.confidence_intervals_.loc['upper-bound'].copy()
hazards = self.hazards_.values[0].copy()

if standardized:
se = summary['se(coef)']
Expand Down
21 changes: 14 additions & 7 deletions lifelines/fitters/coxph_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,27 +609,34 @@ def _compute_baseline_survival(self):
survival_df.columns = ['baseline survival']
return survival_df

def plot(self, standardized=False, **kwargs):
def plot(self, standardized=False, columns=None, **kwargs):
"""
Produces a visual representation of the fitted coefficients, including their standard errors and magnitudes.
Parameters:
standardized: standardize each estimated coefficient and confidence interval
endpoints by the standard error of the estimate.
columns : list-like, default None
Returns:
ax: the matplotlib axis that be edited.
"""
from matplotlib import pyplot as plt

ax = kwargs.get('ax', None) or plt.figure().add_subplot(111)
yaxis_locations = range(len(self.hazards_.columns))

summary = self.summary
lower_bound = self.confidence_intervals_.loc['lower-bound'].copy()
upper_bound = self.confidence_intervals_.loc['upper-bound'].copy()
hazards = self.hazards_.values[0].copy()
if columns is not None:
yaxis_locations = range(len(columns))
summary = self.summary.loc[columns]
lower_bound = self.confidence_intervals_[columns].loc['lower-bound'].copy()
upper_bound = self.confidence_intervals_[columns].loc['upper-bound'].copy()
hazards = self.hazards_[columns].values[0].copy()
else:
yaxis_locations = range(len(self.hazards_.columns))
summary = self.summary
lower_bound = self.confidence_intervals_.loc['lower-bound'].copy()
upper_bound = self.confidence_intervals_.loc['upper-bound'].copy()
hazards = self.hazards_.values[0].copy()

if standardized:
se = summary['se(coef)']
Expand Down
2 changes: 1 addition & 1 deletion lifelines/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from __future__ import unicode_literals

__version__ = '0.14.2'
__version__ = '0.14.3'
67 changes: 66 additions & 1 deletion tests/test_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,20 @@ def test_subtraction_function(self, positive_sample_lifetimes, univariate_fitter

npt.assert_array_almost_equal(f1.subtract(f1).sum().values, 0.0)

def test_subtract_function_with_labelled_data(self, positive_sample_lifetimes, univariate_fitters):
T2 = np.arange(1, 50)
for fitter in univariate_fitters:
f1 = fitter()
f2 = fitter()

f1.fit(positive_sample_lifetimes[0], label='A')
f2.fit(T2, label='B')

result = f1.subtract(f2)
assert result.columns == ['diff']
assert result.shape[1] == 1


def test_divide_function(self, positive_sample_lifetimes, univariate_fitters):
T2 = np.arange(1, 50)
for fitter in univariate_fitters:
Expand All @@ -248,11 +262,25 @@ def test_divide_function(self, positive_sample_lifetimes, univariate_fitters):
f1.fit(positive_sample_lifetimes[0])
f2.fit(T2)

result = f1.subtract(f2)
result = f1.divide(f2)
assert result.shape[0] == (np.unique(np.concatenate((f1.timeline, f2.timeline))).shape[0])

npt.assert_array_almost_equal(np.log(f1.divide(f1)).sum().values, 0.0)

def test_divide_function_with_labelled_data(self, positive_sample_lifetimes, univariate_fitters):
T2 = np.arange(1, 50)
for fitter in univariate_fitters:
f1 = fitter()
f2 = fitter()

f1.fit(positive_sample_lifetimes[0], label='A')
f2.fit(T2, label='B')

result = f1.divide(f2)
assert result.columns == ['ratio']
assert result.shape[1] == 1


def test_valueerror_is_thrown_if_alpha_out_of_bounds(self, univariate_fitters):
for fitter in univariate_fitters:
with pytest.raises(ValueError):
Expand Down Expand Up @@ -1532,3 +1560,40 @@ def test_output_versus_Rs_against_standford_heart_transplant(self, ctv, heart):
npt.assert_almost_equal(ctv.summary['se(coef)'].values, [0.0137, 0.0705, 0.3672, 0.3138], decimal=3)
npt.assert_almost_equal(ctv.summary['p'].values, [0.048, 0.038, 0.083, 0.974], decimal=3)


def test_error_is_raised_if_using_non_numeric_data(self, ctv):
df = pd.DataFrame.from_dict({
'id': [1, 2, 3,],
'start': [0., 0., 0.],
'end': [1., 2., 3.],
'e': [1, 1, 1],
'bool_': [True, True, False],
'int_': [1, -1, 0],
'uint8_': pd.Series([1, -1, 0], dtype="uint8"),
'string_': ['test', 'a', '2.5'],
'float_': [1.2, -0.5, 0.0],
'categorya_': pd.Series([1, 2, 3], dtype='category'),
'categoryb_': pd.Series(['a', 'b', 'a'], dtype='category'),

})

for subset in [
['start', 'end', 'e', 'id', 'categorya_'],
['start', 'end', 'e', 'id', 'categoryb_'],
['start', 'end', 'e', 'id', 'string_'],
]:
with pytest.raises(TypeError):
ctv.fit(df[subset], id_col='id', event_col='e', stop_col='end')

for subset in [
['start', 'end', 'e', 'id', 'bool_'],
['start', 'end', 'e', 'id', 'int_'],
['start', 'end', 'e', 'id', 'float_'],
['start', 'end', 'e', 'id', 'uint8_'],
]:
ctv.fit(df[subset], id_col='id', event_col='e', stop_col='end')

def test_ctv_prediction_methods(self, ctv, heart):
ctv.fit(heart, id_col='id', event_col='event')
assert ctv.predict_log_partial_hazard(heart).shape[0] == heart.shape[0]
assert ctv.predict_partial_hazard(heart).shape[0] == heart.shape[0]
30 changes: 27 additions & 3 deletions tests/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
import pandas as pd
import numpy as np
from lifelines.estimation import NelsonAalenFitter, KaplanMeierFitter, AalenAdditiveFitter,\
CoxPHFitter
CoxPHFitter, CoxTimeVaryingFitter
from lifelines.generate_datasets import generate_random_lifetimes, generate_hazard_rates
from lifelines.plotting import plot_lifetimes
from lifelines.datasets import load_waltons, load_regression_dataset, load_lcd,\
load_panel_test
load_panel_test, load_stanford_heart_transplants
from lifelines.generate_datasets import cumulative_integral


Expand Down Expand Up @@ -233,12 +233,36 @@ def test_coxph_plotting(self, block):
self.plt.title('test_coxph_plotting')
self.plt.show(block=block)

def test_coxph_plotting_with_subset_of_columns(self, block):
df = load_regression_dataset()
cp = CoxPHFitter()
cp.fit(df, "T", "E")
cp.plot(columns=['var1', 'var2'])
self.plt.title('test_coxph_plotting_with_subset_of_columns')
self.plt.show(block=block)

def test_coxph_plotting_with_subset_of_columns_and_standardized(self, block):
df = load_regression_dataset()
cp = CoxPHFitter()
cp.fit(df, "T", "E")
cp.plot(True, columns=['var1', 'var2'])
self.plt.title('test_coxph_plotting_with_subset_of_columns_and_standardized')
self.plt.show(block=block)

def test_coxph_plotting_normalized(self, block):
df = load_regression_dataset()
cp = CoxPHFitter()
cp.fit(df, "T", "E")
cp.plot(True)
self.plt.title('test_coxph_plotting')
self.plt.title('test_coxph_plotting_normalized')
self.plt.show(block=block)

def test_coxtv_plotting_with_subset_of_columns_and_standardized(self, block):
df = load_stanford_heart_transplants()
ctv = CoxTimeVaryingFitter()
ctv.fit(df, id_col='id', event_col='event')
ctv.plot(True, columns=['age', 'year'])
self.plt.title('test_coxtv_plotting_with_subset_of_columns_and_standardized')
self.plt.show(block=block)

def test_kmf_left_censorship_plots(self, block):
Expand Down

0 comments on commit c1bc505

Please sign in to comment.