lint and add test for log time ph test

CamDavidsonPilon · Dec 21, 2018 · 6ed384a · 6ed384a
1 parent b2b83d0
commit 6ed384a
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 15 deletions.
diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py
@@ -628,14 +628,16 @@ def _compute_martingale(self, X, T, E, _weights, index=None):
                 baseline_at_T = np.append(baseline_at_T, self.baseline_cumulative_hazard_.loc[T_, name])
 
         martingale = E - (partial_hazard * baseline_at_T)
-        return pd.DataFrame({self.duration_col: T.values, self.event_col: E.values, "martingale": martingale.values}, index=index)
+        return pd.DataFrame(
+            {self.duration_col: T.values, self.event_col: E.values, "martingale": martingale.values}, index=index
+        )
 
     def _compute_deviance(self, X, T, E, weights, index=None):
         df = self._compute_martingale(X, T, E, weights, index)
         rmart = df.pop("martingale")
         log_term = np.where((E.values - rmart.values) <= 0, 0, E.values * np.log(E.values - rmart.values))
         deviance = np.sign(rmart) * np.sqrt(-2 * (rmart + log_term))
-        df['deviance'] = deviance
+        df["deviance"] = deviance
         return df
 
     def _compute_scaled_schoenfeld(self, X, T, E, weights, index=None):
@@ -764,14 +766,13 @@ def _compute_delta_beta(self, X, T, E, weights, index=None):
         subjects that influence the model disproportinately. Good advice: don't drop these outliers, model them.
         """
         score_residuals = self._compute_score(X, T, E, weights, index=index)
-
 
         d = X.shape[1]
         scaled_variance_matrix = self.variance_matrix_ * np.tile(self._norm_std.values, (d, 1)).T
 
-        delta_betas = score_residuals.dot(scaled_variance_matrix) 
+        delta_betas = score_residuals.dot(scaled_variance_matrix)
         delta_betas.columns = self.hazards_.columns
-        
+
         return delta_betas
 
     def _compute_score(self, X, T, E, weights, index=None):

diff --git a/lifelines/plotting.py b/lifelines/plotting.py
@@ -332,7 +332,9 @@ def plot_estimate(
       ax: a pyplot axis object
     """
 
-    plot_estimate_config = PlotEstimateConfig(cls, estimate, loc, iloc, show_censors, censor_styles, bandwidth, **kwargs)
+    plot_estimate_config = PlotEstimateConfig(
+        cls, estimate, loc, iloc, show_censors, censor_styles, bandwidth, **kwargs
+    )
 
     dataframe_slicer = create_dataframe_slicer(iloc, loc)
 
@@ -361,7 +363,15 @@ def plot_estimate(
             x = dataframe_slicer(plot_estimate_config.confidence_interval_).index.values.astype(float)
             lower = dataframe_slicer(plot_estimate_config.confidence_interval_.filter(like="lower")).values[:, 0]
             upper = dataframe_slicer(plot_estimate_config.confidence_interval_.filter(like="upper")).values[:, 0]
-            fill_between_steps(x, lower, y2=upper, ax=plot_estimate_config.ax, alpha=ci_alpha, color=plot_estimate_config.colour, linewidth=1.0)
+            fill_between_steps(
+                x,
+                lower,
+                y2=upper,
+                ax=plot_estimate_config.ax,
+                alpha=ci_alpha,
+                color=plot_estimate_config.colour,
+                linewidth=1.0,
+            )
 
     if at_risk_counts:
         add_at_risk_counts(cls, ax=plot_estimate_config.ax)

diff --git a/tests/test_estimation.py b/tests/test_estimation.py
@@ -1038,9 +1038,9 @@ def test_martingale_residuals(self, regression_dataset):
 
         results = cph.compute_residuals(regression_dataset, "martingale")
         print(results)
-        npt.assert_allclose(results.loc[0, 'martingale'], -2.315035744901, rtol=1e-05)
-        npt.assert_allclose(results.loc[1, 'martingale'], 0.774216356429, rtol=1e-05)
-        npt.assert_allclose(results.loc[199, 'martingale'], 0.868510420157, rtol=1e-05)
+        npt.assert_allclose(results.loc[0, "martingale"], -2.315035744901, rtol=1e-05)
+        npt.assert_allclose(results.loc[1, "martingale"], 0.774216356429, rtol=1e-05)
+        npt.assert_allclose(results.loc[199, "martingale"], 0.868510420157, rtol=1e-05)
 
     def test_error_is_raised_if_using_non_numeric_data_in_prediction(self):
         df = pd.DataFrame({"t": [1.0, 2.0, 3.0, 4.0], "int_": [1, -1, 0, 0], "float_": [1.2, -0.5, 0.0, 0.1]})

diff --git a/tests/test_statistics.py b/tests/test_statistics.py
@@ -234,12 +234,21 @@ def test_valueerror_is_raised_if_alpha_out_of_bounds():
 def test_proportional_hazard_test():
     cph = CoxPHFitter()
     df = load_regression_dataset()
-    cph.fit(df, 'T', 'E')
+    cph.fit(df, "T", "E")
     results = stats.proportional_hazard_test(cph, df)
-    npt.assert_allclose(results.summary.loc['var1']['test_statistic'], 1.241649, rtol=1e-3)
-    npt.assert_allclose(results.summary.loc['var2']['test_statistic'], 0.992358, rtol=1e-3)
-    npt.assert_allclose(results.summary.loc['var3']['test_statistic'], 1.445049, rtol=1e-3)
-    npt.assert_allclose(results.summary.loc['var3']['p'], 0.229324, rtol=1e-3)
+    npt.assert_allclose(results.summary.loc["var1"]["test_statistic"], 1.241649, rtol=1e-3)
+    npt.assert_allclose(results.summary.loc["var2"]["test_statistic"], 0.992358, rtol=1e-3)
+    npt.assert_allclose(results.summary.loc["var3"]["test_statistic"], 1.445049, rtol=1e-3)
+    npt.assert_allclose(results.summary.loc["var3"]["p"], 0.229324, rtol=1e-3)
 
 
+def test_proportional_hazard_test_with_log_transform():
+    cph = CoxPHFitter()
+    df = load_regression_dataset()
+    cph.fit(df, "T", "E")
 
+    results = stats.proportional_hazard_test(cph, df, time_transform=np.log)
+    npt.assert_allclose(results.summary.loc["var1"]["test_statistic"], 2.227627, rtol=1e-3)
+    npt.assert_allclose(results.summary.loc["var2"]["test_statistic"], 0.714427, rtol=1e-3)
+    npt.assert_allclose(results.summary.loc["var3"]["test_statistic"], 1.466321, rtol=1e-3)
+    npt.assert_allclose(results.summary.loc["var3"]["p"], 0.225927, rtol=1e-3)