Skip to content

Commit

Permalink
Merge pull request #549 from CamDavidsonPilon/0.15.1
Browse files Browse the repository at this point in the history
bump to 0.15.1
  • Loading branch information
CamDavidsonPilon committed Nov 23, 2018
2 parents 65a4a51 + 0ec1c0f commit 8d7343e
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 15 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,9 @@
### Changelogs

#### 0.15.1
- Bug fixes for v0.15.0
- Raise NotImplementedError if the `robust` flag is used in `CoxTimeVaryingFitter` - that's not ready yet.

#### 0.15.0
- adding `robust` params to `CoxPHFitter`'s `fit`. This enables atleast i) using non-integer weights in the model (these could be sampling weights like IPTW), and ii) mis-specified models (ex: non-proportional hazards). Under the hood it's a sandwich estimator. This does not handle ties, so if there are high number of ties, results may significantly differ from other software.
- `standard_errors_` is now a property on fitted `CoxPHFitter` which describes the standard errors of the coefficients.
Expand Down
2 changes: 1 addition & 1 deletion docs/Examples.rst
Expand Up @@ -634,7 +634,7 @@ The fitting should be faster, and the results identical to the unweighted datase

The second use of weights is sampling weights. These are typically positive, non-integer weights that represent some artifical under/over sampling of observations (ex: inverse probability of treatment weights). It is recommened to set ``robust=True`` in the call to the ``fit`` as the usual standard error is incorrect for sampling weights. The ``robust`` flag will use the sandwich estimator for the standard error.

.. warning:: The implementation of the sandwich estimator does not handle ties correctly (under the Efron handling of ties), and will give slightly or significantly different results from other software depending on the frequeny of ties. g
.. warning:: The implementation of the sandwich estimator does not handle ties correctly (under the Efron handling of ties), and will give slightly or significantly different results from other software depending on the frequeny of ties.


Correlations between subjects in a Cox model
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Expand Up @@ -55,9 +55,9 @@
# built documents.
#
# The short X.Y version.
version = '0.15.0'
version = '0.15.1'
# The full version, including alpha/beta/rc tags.
release = '0.15.0'
release = '0.15.1'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
Binary file modified docs/images/invert_y_axis.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions lifelines/fitters/cox_time_varying_fitter.py
Expand Up @@ -38,6 +38,11 @@ class CoxTimeVaryingFitter(BaseFitter):
"""

def __init__(self, alpha=0.95, penalizer=0.0):
if not (0 < alpha <= 1.):
raise ValueError('alpha parameter must be between 0 and 1.')
if penalizer < 0:
raise ValueError("penalizer parameter must be >= 0.")

self.alpha = alpha
self.penalizer = penalizer

Expand Down Expand Up @@ -72,6 +77,9 @@ def fit(self, df, id_col, event_col, start_col='start', stop_col='stop', weights
"""

self.robust = robust
if self.robust:
raise NotImplementedError("Not available yet.")

self.event_col = event_col
self._time_fit_was_called = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")

Expand Down
2 changes: 1 addition & 1 deletion lifelines/fitters/coxph_fitter.py
Expand Up @@ -681,7 +681,7 @@ def predict_log_partial_hazard(self, X):
pass_for_numeric_dtypes_or_raise(X)
elif isinstance(X, pd.Series):
assert len(hazard_names) == 1, 'Series not the correct arugment'
X = pd.DataFrame(series).T
X = pd.DataFrame(X)
pass_for_numeric_dtypes_or_raise(X)

X = X.astype(float)
Expand Down
2 changes: 1 addition & 1 deletion lifelines/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals

__version__ = '0.15.0'
__version__ = '0.15.1'
16 changes: 6 additions & 10 deletions setup.py
Expand Up @@ -9,14 +9,8 @@ def filepath(fname):
exec(compile(open('lifelines/version.py').read(),
'lifelines/version.py', 'exec'))

readme_md = filepath('README.md')

try:
import pypandoc
readme_rst = pypandoc.convert_file(readme_md, 'rst')
except(ImportError):
readme_rst = open(readme_md).read()

with open('README.md') as f:
long_description = f.read()

setup(
name="lifelines",
Expand All @@ -32,7 +26,8 @@ def filepath(fname):
'lifelines.fitters',
'lifelines.utils',
],
long_description=readme_rst,
long_description=long_description,
long_description_content_type='text/markdown',
classifiers=[
"Development Status :: 4 - Beta",
"License :: OSI Approved :: MIT License",
Expand All @@ -45,8 +40,9 @@ def filepath(fname):
],
install_requires=[
"numpy",
"scipy",
"scipy>=1.0",
"pandas>=0.18",
"matplotlib>=2.0",
],
package_data={
"lifelines": [
Expand Down
24 changes: 24 additions & 0 deletions tests/test_estimation.py
Expand Up @@ -1010,6 +1010,14 @@ def test_cox_ph_prediction_with_series(self, rossi):
result = cf.predict_survival_function(rossi_mean)
assert_series_equal(cf.baseline_survival_['baseline survival'], result[0], check_names=False)

def test_cox_ph_prediction_with_series_of_longer_length(self, rossi):
rossi = rossi[['week', 'arrest', 'age']]
cf = CoxPHFitter()
cf.fit(rossi, duration_col='week', event_col='arrest')

X = pd.Series([1,2,3,4,5])
result = cf.predict_survival_function(X)

@pytest.mark.xfail
def test_cox_ph_prediction_monotonicity(self, data_pred2):
# Concordance wise, all prediction methods should be monotonic versions
Expand Down Expand Up @@ -1741,6 +1749,13 @@ def test_baseline_survival_is_the_same_indp_of_scale(self, regression_dataset):
cp2.fit(df_descaled, event_col='E', duration_col='T')
assert_frame_equal(cp2.baseline_survival_, cp1.baseline_survival_)

def test_error_thrown_weights_are_nonpositive(self, regression_dataset):
regression_dataset['weights'] = -1
cph = CoxPHFitter()
with pytest.raises(ValueError):
cph.fit(regression_dataset, event_col='E', duration_col='T', weights_col='weights')


def test_survival_prediction_is_the_same_indp_of_scale(self, regression_dataset):
df = regression_dataset.copy()

Expand Down Expand Up @@ -2385,6 +2400,15 @@ def test_likelihood_ratio_test_against_R(self, ctv, heart):
assert abs(p_value - 0.00448) < 0.001
assert deg_of_freedom == 4

def test_error_thrown_weights_are_nonpositive(self, ctv, heart):
heart['weights'] = -1
with pytest.raises(ValueError):
ctv.fit(heart, id_col='id', event_col='event', weights_col='weights')


def test_error_thrown_if_column_doesnt_exist(self, ctv, heart):
with pytest.raises(KeyError):
ctv.fit(heart, id_col='_id_', event_col='event')

def test_print_summary(self, ctv, heart):

Expand Down
2 changes: 2 additions & 0 deletions tests/test_plotting.py
Expand Up @@ -92,6 +92,7 @@ def test_aalen_additive_plot(self, block):
timeline = np.linspace(0, 70, 10000)
hz, coef, X = generate_hazard_rates(n, d, timeline)
T = generate_random_lifetimes(hz, timeline)
T[np.isinf(T)] = 10
C = np.random.binomial(1, 1., size=n)
X['T'] = T
X['E'] = C
Expand Down Expand Up @@ -336,6 +337,7 @@ def test_aalen_additive_fit_with_censor(self, block):
cumulative_hazards = pd.DataFrame(cumulative_integral(coef.values, timeline),
index=timeline, columns=coef.columns)
T = generate_random_lifetimes(hz, timeline)
T[np.isinf(T)] = 10
X['T'] = T
X['E'] = np.random.binomial(1, 0.99, n)

Expand Down

0 comments on commit 8d7343e

Please sign in to comment.