Merge pull request #549 from CamDavidsonPilon/0.15.1

bump to 0.15.1
CamDavidsonPilon · Nov 23, 2018 · 8d7343e · 8d7343e
2 parents 65a4a51 + 0ec1c0f
commit 8d7343e
Show file tree

Hide file tree

Showing 10 changed files with 49 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 ### Changelogs
 
+#### 0.15.1
+ - Bug fixes for v0.15.0
+ - Raise NotImplementedError if the `robust` flag is used in `CoxTimeVaryingFitter` - that's not ready yet.
+
 #### 0.15.0
  - adding `robust` params to `CoxPHFitter`'s `fit`. This enables atleast i) using non-integer weights in the model (these could be sampling weights like IPTW), and ii) mis-specified models (ex: non-proportional hazards). Under the hood it's a sandwich estimator. This does not handle ties, so if there are high number of ties, results may significantly differ from other software.
  - `standard_errors_` is now a property on fitted `CoxPHFitter` which describes the standard errors of the coefficients.

diff --git a/docs/Examples.rst b/docs/Examples.rst
@@ -634,7 +634,7 @@ The fitting should be faster, and the results identical to the unweighted datase
 
 The second use of weights is sampling weights. These are typically positive, non-integer weights that represent some artifical under/over sampling of observations (ex: inverse probability of treatment weights). It is recommened to set ``robust=True`` in the call to the ``fit`` as the usual standard error is incorrect for sampling weights. The ``robust`` flag will use the sandwich estimator for the standard error. 
 
-.. warning:: The implementation of the sandwich estimator does not handle ties correctly (under the Efron handling of ties), and will give slightly or significantly different results from other software depending on the frequeny of ties. g
+.. warning:: The implementation of the sandwich estimator does not handle ties correctly (under the Efron handling of ties), and will give slightly or significantly different results from other software depending on the frequeny of ties.
 
 
 Correlations between subjects in a Cox model

diff --git a/docs/conf.py b/docs/conf.py
@@ -55,9 +55,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.15.0'
+version = '0.15.1'
 # The full version, including alpha/beta/rc tags.
-release = '0.15.0'
+release = '0.15.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/docs/images/invert_y_axis.png b/docs/images/invert_y_axis.png
diff --git a/lifelines/fitters/cox_time_varying_fitter.py b/lifelines/fitters/cox_time_varying_fitter.py
@@ -38,6 +38,11 @@ class CoxTimeVaryingFitter(BaseFitter):
     """
 
     def __init__(self, alpha=0.95, penalizer=0.0):
+        if not (0 < alpha <= 1.):
+            raise ValueError('alpha parameter must be between 0 and 1.')
+        if penalizer < 0:
+            raise ValueError("penalizer parameter must be >= 0.")
+
         self.alpha = alpha
         self.penalizer = penalizer
 
@@ -72,6 +77,9 @@ def fit(self, df, id_col, event_col, start_col='start', stop_col='stop', weights
         """
 
         self.robust = robust
+        if self.robust:
+            raise NotImplementedError("Not available yet.")
+
         self.event_col = event_col
         self._time_fit_was_called = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
 

diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py
@@ -681,7 +681,7 @@ def predict_log_partial_hazard(self, X):
             pass_for_numeric_dtypes_or_raise(X)
         elif isinstance(X, pd.Series):
             assert len(hazard_names) == 1, 'Series not the correct arugment'
-            X = pd.DataFrame(series).T
+            X = pd.DataFrame(X)
             pass_for_numeric_dtypes_or_raise(X)
 
         X = X.astype(float)

diff --git a/lifelines/version.py b/lifelines/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '0.15.0'
+__version__ = '0.15.1'
diff --git a/setup.py b/setup.py
@@ -9,14 +9,8 @@ def filepath(fname):
 exec(compile(open('lifelines/version.py').read(),
                   'lifelines/version.py', 'exec'))
 
-readme_md = filepath('README.md')
-
-try:
-    import pypandoc
-    readme_rst = pypandoc.convert_file(readme_md, 'rst')
-except(ImportError):
-    readme_rst = open(readme_md).read()
-
+with open('README.md') as f:
+    long_description = f.read()
 
 setup(
     name="lifelines",
@@ -32,7 +26,8 @@ def filepath(fname):
               'lifelines.fitters',
               'lifelines.utils',
               ],
-    long_description=readme_rst,
+    long_description=long_description,
+    long_description_content_type='text/markdown',
     classifiers=[
         "Development Status :: 4 - Beta",
         "License :: OSI Approved :: MIT License",
@@ -45,8 +40,9 @@ def filepath(fname):
         ],
     install_requires=[
         "numpy",
-        "scipy",
+        "scipy>=1.0",
         "pandas>=0.18",
+        "matplotlib>=2.0",
     ],
     package_data={
         "lifelines": [

diff --git a/tests/test_estimation.py b/tests/test_estimation.py
@@ -1010,6 +1010,14 @@ def test_cox_ph_prediction_with_series(self, rossi):
         result = cf.predict_survival_function(rossi_mean)
         assert_series_equal(cf.baseline_survival_['baseline survival'], result[0], check_names=False)
 
+    def test_cox_ph_prediction_with_series_of_longer_length(self, rossi):
+        rossi = rossi[['week', 'arrest', 'age']]
+        cf = CoxPHFitter()
+        cf.fit(rossi, duration_col='week', event_col='arrest')
+
+        X = pd.Series([1,2,3,4,5])
+        result = cf.predict_survival_function(X)
+
     @pytest.mark.xfail
     def test_cox_ph_prediction_monotonicity(self, data_pred2):
         # Concordance wise, all prediction methods should be monotonic versions
@@ -1741,6 +1749,13 @@ def test_baseline_survival_is_the_same_indp_of_scale(self, regression_dataset):
         cp2.fit(df_descaled, event_col='E', duration_col='T')
         assert_frame_equal(cp2.baseline_survival_, cp1.baseline_survival_)
 
+    def test_error_thrown_weights_are_nonpositive(self, regression_dataset):
+        regression_dataset['weights'] = -1
+        cph = CoxPHFitter()
+        with pytest.raises(ValueError):
+            cph.fit(regression_dataset, event_col='E', duration_col='T', weights_col='weights')
+
+
     def test_survival_prediction_is_the_same_indp_of_scale(self, regression_dataset):
         df = regression_dataset.copy()
 
@@ -2385,6 +2400,15 @@ def test_likelihood_ratio_test_against_R(self, ctv, heart):
         assert abs(p_value - 0.00448) < 0.001
         assert deg_of_freedom == 4
 
+    def test_error_thrown_weights_are_nonpositive(self, ctv, heart):
+        heart['weights'] = -1
+        with pytest.raises(ValueError):
+            ctv.fit(heart, id_col='id', event_col='event', weights_col='weights')
+
+
+    def test_error_thrown_if_column_doesnt_exist(self, ctv, heart):
+        with pytest.raises(KeyError):
+            ctv.fit(heart, id_col='_id_', event_col='event')
 
     def test_print_summary(self, ctv, heart):
 

diff --git a/tests/test_plotting.py b/tests/test_plotting.py
@@ -92,6 +92,7 @@ def test_aalen_additive_plot(self, block):
         timeline = np.linspace(0, 70, 10000)
         hz, coef, X = generate_hazard_rates(n, d, timeline)
         T = generate_random_lifetimes(hz, timeline)
+        T[np.isinf(T)] = 10
         C = np.random.binomial(1, 1., size=n)
         X['T'] = T
         X['E'] = C
@@ -336,6 +337,7 @@ def test_aalen_additive_fit_with_censor(self, block):
         cumulative_hazards = pd.DataFrame(cumulative_integral(coef.values, timeline),
                                           index=timeline, columns=coef.columns)
         T = generate_random_lifetimes(hz, timeline)
+        T[np.isinf(T)] = 10
         X['T'] = T
         X['E'] = np.random.binomial(1, 0.99, n)