Skip to content

Commit

Permalink
Merge ceccebb into 70999a0
Browse files Browse the repository at this point in the history
  • Loading branch information
CamDavidsonPilon committed Sep 2, 2019
2 parents 70999a0 + ceccebb commit afbbf33
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 60 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Expand Up @@ -4,7 +4,11 @@
#### 0.22.4

##### New features
- Some performance improvements to parametric regression models.
- Some performance improvements to regression models.
- lifelines will avoid penalizing the intercept (aka bias) variables in regression models.

##### Bug fixes
- Fixed issue where `concordance_index` would never exit if NaNs in dataset.


#### 0.22.3
Expand Down
154 changes: 105 additions & 49 deletions examples/SaaS churn and piecewise regression models.ipynb

Large diffs are not rendered by default.

23 changes: 16 additions & 7 deletions lifelines/fitters/__init__.py
Expand Up @@ -1413,9 +1413,6 @@ def _fit(
df = df.astype(float)
self._check_values(df, utils.coalesce(Ts[1], Ts[0]), E, weights, entries)

_norm_std = df.std(0)
_norm_std[_norm_std < 1e-8] = 1.0

_index = pd.MultiIndex.from_tuples(
sum(([(name, col) for col in columns] for name, columns in self.regressors.items()), [])
)
Expand All @@ -1426,7 +1423,13 @@ def _fit(
self._norm_mean_ = df[self.regressors[self._primary_parameter_name]].mean(0)
self._norm_mean_ancillary = df[self.regressors[self._ancillary_parameter_name]].mean(0)

self._norm_std = pd.Series([_norm_std.loc[variable_name] for _, variable_name in _index], index=_index)
_norm_std = df.std(0)
self._constant_cols = pd.Series(
[(_norm_std.loc[variable_name] < 1e-8) for (_, variable_name) in _index], index=_index
)
self._norm_std = pd.Series([_norm_std.loc[variable_name] for (_, variable_name) in _index], index=_index)
self._norm_std[self._constant_cols] = 1.0
_norm_std[_norm_std < 1e-8] = 1.0

_params, self.log_likelihood_, self._hessian_ = self._fit_model(
log_likelihood_function,
Expand Down Expand Up @@ -1466,6 +1469,8 @@ def _log_likelihood(self):

def _add_penalty(self, params, neg_ll):
params, _ = flatten(params)
# remove constant cols from being penalized
params = params[~self._constant_cols]
if self.penalizer > 0:
penalty = (params ** 2).sum()
else:
Expand All @@ -1487,7 +1492,7 @@ def _fit_model(self, likelihood, Ts, Xs, E, weights, entries, show_progress=Fals
initial_point_array, unflatten = flatten(initial_point_dict)

if initial_point is None and isinstance(initial_point, dict):
initial_point_array = flatten(initial_point)
initial_point_array, _ = flatten(initial_point) # TODO: test

assert initial_point_array.shape[0] == Xs.size, "initial_point is not the correct shape."

Expand Down Expand Up @@ -2577,7 +2582,7 @@ def _create_initial_point(self, Ts, E, entries, weights, Xs):
"""
See https://github.com/CamDavidsonPilon/lifelines/issues/664
"""
constant_col = (Xs.df.var(0) < 1e-8).idxmax()
constant_col = (Xs.df.std(0) < 1e-8).idxmax()

def _transform_ith_param(param):
if param <= 0:
Expand Down Expand Up @@ -2614,8 +2619,12 @@ def _transform_ith_param(param):

def _add_penalty(self, params, neg_ll):
params, _ = flatten(params)
if self.penalizer > 0:
# remove intercepts from being penalized
params = params[~self._constant_cols]
if self.penalizer > 0 and self.l1_ratio > 0:
penalty = self.l1_ratio * anp.abs(params).sum() + 0.5 * (1.0 - self.l1_ratio) * (params ** 2).sum()
elif self.penalizer > 0 and self.l1_ratio <= 0:
penalty = 0.5 * (params ** 2).sum()
else:
penalty = 0
return neg_ll + self.penalizer * penalty
Expand Down
5 changes: 3 additions & 2 deletions lifelines/fitters/piecewise_exponential_regression_fitter.py
Expand Up @@ -45,8 +45,9 @@ def _add_penalty(self, params, neg_ll):
coef_penalty = 0

if self.penalizer > 0:
for i in range(params_stacked.shape[1] - 1): # assuming the intercept col is the last column...
coef_penalty = coef_penalty + (params_stacked[:, i]).var()
for i in range(params_stacked.shape[1]):
if not self._constant_cols[i]:
coef_penalty = coef_penalty + (params_stacked[:, i]).var()

return neg_ll + self.penalizer * coef_penalty

Expand Down
5 changes: 5 additions & 0 deletions lifelines/utils/concordance.py
Expand Up @@ -263,4 +263,9 @@ def _preprocess_scoring_data(event_times, predicted_scores, event_observed):
if event_observed.shape != event_times.shape:
raise ValueError("Observed events must be 1-dimensional of same length as event times")

# check for NaNs
for a in [event_times, predicted_scores, event_observed]:
if np.isnan(a).any():
raise ValueError("NaNs detected in inputs, please correct or drop.")

return event_times, predicted_scores, event_observed
28 changes: 28 additions & 0 deletions tests/test_estimation.py
Expand Up @@ -1392,6 +1392,34 @@ def test_BHF_fit_when_KMF_throws_an_error(self):
bfh.fit(observations, entry=births)


class TestParametricRegressionFitter:
@pytest.fixture
def rossi(self):
rossi = load_rossi()
rossi["_int"] = 1.0
return rossi

def test_custom_weibull_model_gives_the_same_data_as_implemented_weibull_model(self, rossi):
class CustomWeibull(ParametricRegressionFitter):

_fitted_parameter_names = ["lambda_", "rho_"]

def _cumulative_hazard(self, params, T, Xs):
lambda_ = anp.exp(anp.dot(Xs["lambda_"], params["lambda_"]))
rho_ = anp.exp(anp.dot(Xs["rho_"], params["rho_"]))

return (T / lambda_) ** rho_

cb = CustomWeibull()
wf = WeibullAFTFitter(fit_intercept=False)

cb.fit(rossi, "week", "arrest", regressors={"lambda_": rossi.columns, "rho_": ["_int"]})
wf.fit(rossi, "week", "arrest")

assert_frame_equal(cb.summary.loc["lambda_"], wf.summary.loc["lambda_"], check_less_precise=2)
npt.assert_allclose(cb.log_likelihood_, wf.log_likelihood_)


class TestRegressionFitters:
@pytest.fixture
def rossi(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/utils/test_utils.py
Expand Up @@ -961,7 +961,7 @@ def test_sklearn_GridSearchCV_accept_model(self, X, Y):
clf = GridSearchCV(base_model(), grid_params, cv=4)
clf.fit(X, Y)

assert clf.best_params_ == {"model_ancillary": False, "penalizer": 0.01}
assert clf.best_params_ == {"model_ancillary": False, "penalizer": 100.0}
assert clf.predict(X).shape[0] == X.shape[0]

def test_model_can_accept_things_like_strata(self, X, Y):
Expand Down

0 comments on commit afbbf33

Please sign in to comment.