Merge pull request #429 from CamDavidsonPilon/cp-grouped-fitter

Cp grouped fitter
CamDavidsonPilon · Mar 21, 2018 · 1f2315f · 1f2315f
2 parents 4d22dd9 + e03fe46
commit 1f2315f
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 37 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 ### Changelogs
 
+#### 0.15.0
+ - even smarter `step_size` calculations for iterative optimizations. 
+ - fixed bug with using weights and strata in `CoxPHFitter`
+
+
 #### 0.14.0
  - adding `plot_covariate_groups` to `CoxPHFitter` to visualize what happens to survival as we vary a covariate, all else being equal.
  - `utils` functions like `qth_survival_times` and `median_survival_times` now return the transpose of the DataFrame compared to previous version of lifelines. The reason for this is that we often treat survival curves as columns in DataFrames, and functions of the survival curve as index (ex: KaplanMeierFitter.survival_function_ returns a survival curve _at_ time _t_).

diff --git a/lifelines/fitters/cox_time_varying_fitter.py b/lifelines/fitters/cox_time_varying_fitter.py
@@ -14,7 +14,7 @@
     significance_code, normalize,\
     pass_for_numeric_dtypes_or_raise, check_low_var,\
     check_for_overlapping_intervals, check_complete_separation_low_variance,\
-    ConvergenceWarning
+    ConvergenceWarning, StepSizer
 
 
 class CoxTimeVaryingFitter(BaseFitter):
@@ -138,9 +138,8 @@ def _newton_rhaphson(self, df, stop_times_events, show_progress=False, step_size
         converging = True
         ll, previous_ll = 0, 0
 
-        if step_size is None:
-            # empirically determined
-            step_size = min(0.98, 3.0 / np.log10(n))
+        step_sizer = StepSizer(step_size)
+        step_size = step_sizer.next()
 
         while converging:
             i += 1
@@ -177,12 +176,7 @@ def _newton_rhaphson(self, df, stop_times_events, show_progress=False, step_size
 See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ", ConvergenceWarning)
                 converging, completed = False, False
 
-            # Only allow small steps
-            if norm(delta) > 10:
-                step_size *= 0.5
-
-            # temper the step size down.
-            step_size *= 0.995
+            step_size = step_sizer.update(norm(delta)).next()
 
             beta += delta
 
@@ -213,7 +207,7 @@ def _get_gradients(self, df, stops_events, beta):
         gradient = np.zeros((1, d))
         log_lik = 0
 
-        unique_death_times = np.sort(stops_events['stop'].loc[stops_events['event']].unique())
+        unique_death_times = np.unique(stops_events['stop'].loc[stops_events['event']])
 
         for t in unique_death_times:
             x_death_sum = np.zeros((1, d))

diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py
@@ -2,6 +2,7 @@
 from __future__ import print_function
 from __future__ import division
 
+import time
 import warnings
 import numpy as np
 import pandas as pd
@@ -15,7 +16,10 @@
 from lifelines.utils import survival_table_from_events, inv_normal_cdf, normalize,\
     significance_code, concordance_index, _get_index, qth_survival_times,\
     pass_for_numeric_dtypes_or_raise, check_low_var, coalesce,\
-    check_complete_separation, check_nans, StatError, ConvergenceWarning
+    check_complete_separation, check_nans, StatError, ConvergenceWarning,\
+    StepSizer
+
+
 
 
 class CoxPHFitter(BaseFitter):
@@ -84,6 +88,8 @@ def fit(self, df, duration_col, event_col=None,
             self, with additional properties: hazards_
 
         """
+        #import pdb
+        #pdb.set_trace()
         df = df.copy()
 
         # Sort on time
@@ -105,15 +111,15 @@ def fit(self, df, duration_col, event_col=None,
             del df[event_col]
 
         if weights_col:
-            weights = df.pop(weights_col).values
+            weights = df.pop(weights_col)
             if (weights.astype(int) != weights).any():
-                warnings.warn("""It looks like your weights are not integers, possibly prospenity scores then?
+                warnings.warn("""It looks like your weights are not integers, possibly propensity scores then?
 It's important to know that the naive variance estimates of the coefficients are biased. Instead use Monte Carlo to
 estimate the variances. See paper "Variance estimation when using inverse probability of treatment weighting (IPTW) with survival analysis"
                     """, RuntimeWarning)
 
         else:
-            weights = np.ones(self._n_examples)
+            weights = pd.DataFrame(np.ones((self._n_examples,1)), index=df.index)
 
         self._check_values(df, T, E)
         df = df.astype(float)
@@ -184,9 +190,8 @@ def _newton_rhaphson(self, X, T, E, weights=None, initial_beta=None, step_size=N
         else:
             beta = np.zeros((d, 1))
 
-        if step_size is None:
-            # "empirically" determined
-            step_size = min(0.98, 3.0 / np.log10(n))
+        step_sizer = StepSizer(step_size)
+        step_size = step_sizer.next()
 
         # Method of choice is just efron right now
         if self.tie_method == 'Efron':
@@ -198,19 +203,20 @@ def _newton_rhaphson(self, X, T, E, weights=None, initial_beta=None, step_size=N
         converging = True
         warn_ll = True
         ll, previous_ll = 0, 0
+        start = time.time()
 
         while converging:
             self.path.append(beta.copy())
             i += 1
             if self.strata is None:
-                h, g, ll = get_gradients(X.values, beta, T.values, E.values, weights)
+                h, g, ll = get_gradients(X.values, beta, T.values, E.values, weights.values)
             else:
                 g = np.zeros_like(beta).T
                 h = np.zeros((beta.shape[0], beta.shape[0]))
                 ll = 0
                 for strata in np.unique(X.index):
-                    stratified_X, stratified_T, stratified_E = X.loc[[strata]], T.loc[[strata]], E.loc[[strata]]
-                    _h, _g, _ll = get_gradients(stratified_X.values, beta, stratified_T.values, stratified_E.values, weights)
+                    stratified_X, stratified_T, stratified_E, stratified_W = X.loc[[strata]], T.loc[[strata]], E.loc[[strata]], weights.loc[[strata]]
+                    _h, _g, _ll = get_gradients(stratified_X.values, beta, stratified_T.values, stratified_E.values, stratified_W.values)
                     g += _g
                     h += _h
                     ll += _ll
@@ -230,7 +236,7 @@ def _newton_rhaphson(self, X, T, E, weights=None, initial_beta=None, step_size=N
             hessian, gradient = h, g
 
             if show_progress:
-                print("Iteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f" % (i, norm(delta), step_size, ll))
+                print("Iteration %d: norm_delta = %.5f, step_size = %.5f, ll = %.5f, seconds_since_start = %.1f" % (i, norm(delta), step_size, ll, time.time() - start))
             # convergence criteria
             if norm(delta) < precision:
                 converging, completed = False, True
@@ -247,12 +253,7 @@ def _newton_rhaphson(self, X, T, E, weights=None, initial_beta=None, step_size=N
 See https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/ ", ConvergenceWarning)
                 converging, completed = False, False
 
-            # Only allow small steps
-            if norm(delta) > 10.0:
-                step_size *= 0.5
-
-            # temper the step size down.
-            step_size *= 0.995
+            step_size = step_sizer.update(norm(delta)).next()
 
             beta += delta
             previous_ll = ll
@@ -272,17 +273,13 @@ def _get_efron_values(self, X, beta, T, E, weights):
         """
         Calculates the first and second order vector differentials,
         with respect to beta.
-
         Note that X, T, E are assumed to be sorted on T!
-
         Parameters:
             X: (n,d) numpy array of observations.
             beta: (1, d) numpy array of coefficients.
             T: (n) numpy array representing observed durations.
             E: (n) numpy array representing death events.
             weights: (n) an array representing weights per observation.
-
-
         Returns:
             hessian: (d, d) numpy array,
             gradient: (1, d) numpy array
@@ -367,11 +364,14 @@ def _get_efron_values(self, X, beta, T, E, weights):
             tie_phi_x_x = np.zeros((d, d))
         return hessian, gradient, log_lik
 
+
     def _compute_baseline_cumulative_hazard(self):
         return self.baseline_hazard_.cumsum()
 
     @staticmethod
     def _check_values(df, T, E):
+        #import pdb
+        #pdb.set_trace()
         pass_for_numeric_dtypes_or_raise(df)
         check_nans(T)
         check_nans(E)
@@ -463,13 +463,13 @@ def predict_log_partial_hazard(self, X):
             can be in any order. If a numpy array, columns must be in the
             same order as the training data.
 
+        This is equivalent to R's linear.predictors.
+        Returns the log of the partial hazard for the individuals, partial since the
+        baseline hazard is not included. Equal to \beta (X - \bar{X})
 
         If X is a dataframe, the order of the columns do not matter. But
         if X is an array, then the column ordering is assumed to be the
         same as the training dataset.
-
-        Returns the log of the partial hazard for the individuals, partial since the
-        baseline hazard is not included. Equal to \beta X
         """
         if isinstance(X, pd.DataFrame):
             order = self.hazards_.columns
@@ -486,7 +486,7 @@ def predict_log_hazard_relative_to_mean(self, X):
             same order as the training data.
 
         Returns the log hazard relative to the hazard of the mean covariates. This is the behaviour
-        of R's predict.coxph. Equal to \beta X - \beta \bar{X}
+        of R's predict.coxph. Equal to \beta X - \beta \bar{X_{train}}
         """
 
         return self.predict_log_partial_hazard(X) - self._train_log_partial_hazard.squeeze()

diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
@@ -1038,6 +1038,7 @@ def check_complete_separation_low_variance(df, events):
         warnings.warn(warning_text, ConvergenceWarning)
 
 def check_complete_separation_close_to_perfect_correlation(df, durations):
+    # slow for many columns
     THRESHOLD = 0.99
     for col, series in df.iteritems():
         if abs(stats.spearmanr(series, durations).correlation) >= THRESHOLD:
@@ -1214,3 +1215,47 @@ def covariates_from_event_matrix(df, id_col):
     df.columns = [id_col, 'event', 'duration']
     df['_counter'] = 1
     return df.pivot_table(index=[id_col, 'duration'], columns='event', fill_value=0)['_counter'].reset_index()
+
+
+
+class StepSizer():
+
+    def __init__(self, initial_step_size):
+        initial_step_size = coalesce(initial_step_size, 0.95)
+        self.initial_step_size = initial_step_size
+        self.step_size = initial_step_size
+        self.temper_back_up = False
+        self.norm_of_deltas = []
+
+    def update(self, norm_of_delta):
+        SCALE = 1.2
+        LOOKBACK = 3
+
+        self.norm_of_deltas.append(norm_of_delta)
+
+        # Only allow small steps
+        if norm_of_delta >= 15.0:
+            self.step_size *= 0.25
+            self.temper_back_up = True
+        elif 15.0 > norm_of_delta > 5.0:
+            self.step_size *= 0.75
+            self.temper_back_up = True
+
+        # speed up convergence by increasing step size again
+        if self.temper_back_up:
+            self.step_size = min(self.step_size * SCALE, self.initial_step_size)
+
+        # recent non-monotonically decreasing is a concern
+        if len(self.norm_of_deltas) >= LOOKBACK and \
+            np.any(np.diff(self.norm_of_deltas[-LOOKBACK:]) > 0):
+            self.step_size *= 0.98
+
+        # recent monotonically decreasing is good though
+        if len(self.norm_of_deltas) >= LOOKBACK and \
+            np.all(np.diff(self.norm_of_deltas[-LOOKBACK:]) < 0):
+            self.step_size = min(self.step_size * SCALE, 0.95)
+
+        return self
+
+    def next(self):
+        return self.step_size
diff --git a/tests/test_estimation.py b/tests/test_estimation.py
@@ -806,7 +806,7 @@ def test_efron_computed_by_hand_examples(self, data_nus):
 
     def test_efron_newtons_method(self, data_nus):
         newton = CoxPHFitter()._newton_rhaphson
-        X, T, E, W = data_nus[['x']], data_nus['t'], data_nus['E'], np.ones_like(data_nus['t'])
+        X, T, E, W = data_nus[['x']], data_nus['t'], data_nus['E'], pd.Series(np.ones_like(data_nus['t']))
         assert np.abs(newton(X, T, E, W)[0][0] - -0.0335) < 0.0001
 
     def test_fit_method(self, data_nus):