Skip to content

Commit

Permalink
Merge pull request #238 from CamDavidsonPilon/use-new-pandas-versions
Browse files Browse the repository at this point in the history
0.9.2
  • Loading branch information
CamDavidsonPilon committed May 23, 2016
2 parents 1fa4b38 + ce2135a commit 4817d39
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 36 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
### Changelogs

#### Forthcoming 0.9.0
#### 0.9.2
- deprecates Pandas versions before 0.18.
- throw an error if no admissable pairs in the c-index calculation. Previosly a NaN was returned.

#### 0.9.1
- add two summary functions to Weibull and Exponential fitter, solves #224

#### 0.9.0
- new prediction function in `CoxPHFitter`, `predict_log_hazard_relative_to_mean`, that mimics what R's `predict.coxph` does.
- removing the `predict` method in CoxPHFitter and AalenAdditiveFitter. This is because the choice of `predict_median` as a default was causing too much confusion, and no other natual choice as a default was available. All other `predict_` methods remain.
- Default predict method in `k_fold_cross_validation` is now `predict_expectation`
Expand Down
2 changes: 1 addition & 1 deletion lifelines/fitters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,6 @@ def _conditional_time_to_event_(self):
"""
age = self.survival_function_.index.values[:, None]
columns = ['%s - Conditional time remaining to event' % self._label]
return pd.DataFrame(qth_survival_times(self.survival_function_[self._label] * 0.5, self.survival_function_).T.sort(ascending=False).values,
return pd.DataFrame(qth_survival_times(self.survival_function_[self._label] * 0.5, self.survival_function_).T.sort_index(ascending=False).values,
index=self.survival_function_.index,
columns=columns) - age
6 changes: 2 additions & 4 deletions lifelines/fitters/aalen_additive_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class AalenAdditiveFitter(BaseFitter):
For example, this shrinks the absolute value of c_{i,t}. Recommended, even if a small value.
smoothing_penalizer: Attach a L2 penalizer to difference between adjacent (over time) coefficents. For
example, this shrinks the absolute value of c_{i,t} - c_{i,t+1}.
nn_cumulative_hazard: If True, forces the negative values in cumulative hazards to be 0 instead. Default True.
nn_cumulative_hazard: If True, forces the negative values in cumulative hazards to be 0 instead. Default True.
"""

Expand Down Expand Up @@ -428,13 +428,11 @@ def plot(self, ix=None, iloc=None, columns=[], legend=True, **kwargs):
"""
from matplotlib import pyplot as plt


def shaded_plot(ax, x, y, y_upper, y_lower, **kwargs):
base_line, = ax.plot(x, y, drawstyle='steps-post', **kwargs)
fill_between_steps(x, y_lower, y2=y_upper, ax=ax, alpha=0.25,
color=base_line.get_color(), linewidth=1.0)


assert (ix is None or iloc is None), 'Cannot set both ix and iloc in call to .plot'

get_method = "ix" if ix is not None else "iloc"
Expand All @@ -448,7 +446,7 @@ def shaded_plot(ax, x, y, y_upper, y_lower, **kwargs):
columns = self.cumulative_hazards_.columns

if 'ax' in kwargs:
# don't use a .get here, as the default parameter will be called. In this case,
# don't use a .get here, as the default parameter will be called. In this case,
# plt.figure().add_subplot(111), which instantiates a new window
ax = kwargs['ax']
else:
Expand Down
23 changes: 11 additions & 12 deletions lifelines/fitters/coxph_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def _newton_rhaphson(self, X, T, E, initial_beta=None, step_size=1.,
delta = solve(-h, step_size * g.T)
if np.any(np.isnan(delta)):
raise ValueError("delta contains nan value(s). Convergence halted.")

# Save these as pending result
hessian, gradient = h, g

Expand Down Expand Up @@ -403,7 +403,7 @@ def predict_partial_hazard(self, X):
"""
X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
can be in any order. If a numpy array, columns must be in the
same order as the training data.
same order as the training data.
If covariates were normalized during fitting, they are normalized
in the same way here.
Expand Down Expand Up @@ -431,20 +431,19 @@ def predict_log_hazard_relative_to_mean(self, X):
"""
X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
can be in any order. If a numpy array, columns must be in the
same order as the training data.
same order as the training data.
Returns the log hazard relative to the hazard of the mean covariates. This is the behaviour
Returns the log hazard relative to the hazard of the mean covariates. This is the behaviour
of R's predict.coxph.
"""
mean_covariates = self.data.mean(0).to_frame().T
return np.log(self.predict_partial_hazard(X)/self.predict_partial_hazard(mean_covariates).squeeze())

return np.log(self.predict_partial_hazard(X) / self.predict_partial_hazard(mean_covariates).squeeze())

def predict_cumulative_hazard(self, X):
"""
X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
can be in any order. If a numpy array, columns must be in the
same order as the training data.
same order as the training data.
Returns the cumulative hazard for the individuals.
"""
Expand All @@ -457,7 +456,7 @@ def predict_survival_function(self, X):
"""
X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
can be in any order. If a numpy array, columns must be in the
same order as the training data.
same order as the training data.
Returns the estimated survival functions for the individuals
"""
Expand All @@ -467,7 +466,7 @@ def predict_percentile(self, X, p=0.5):
"""
X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
can be in any order. If a numpy array, columns must be in the
same order as the training data.
same order as the training data.
By default, returns the median lifetimes for the individuals.
http://stats.stackexchange.com/questions/102986/percentile-loss-functions
Expand All @@ -479,7 +478,7 @@ def predict_median(self, X):
"""
X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
can be in any order. If a numpy array, columns must be in the
same order as the training data.
same order as the training data.
Returns the median lifetimes for the individuals
"""
Expand All @@ -489,8 +488,8 @@ def predict_expectation(self, X):
"""
X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
can be in any order. If a numpy array, columns must be in the
same order as the training data.
same order as the training data.
Compute the expected lifetime, E[T], using covarites X.
"""
index = _get_index(X)
Expand Down
6 changes: 3 additions & 3 deletions lifelines/fitters/exponential_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ def _compute_confidence_bounds_of_parameters(self):
se = self._compute_standard_errors().ix['se']
alpha2 = inv_normal_cdf((1. + self.alpha) / 2.)
return pd.DataFrame([
np.array([self.lambda_]) + alpha2 * se,
np.array([self.lambda_]) - alpha2 * se,
], columns=['lambda_'], index=['upper-bound', 'lower-bound'])
np.array([self.lambda_]) + alpha2 * se,
np.array([self.lambda_]) - alpha2 * se,
], columns=['lambda_'], index=['upper-bound', 'lower-bound'])

@property
def summary(self):
Expand Down
6 changes: 3 additions & 3 deletions lifelines/fitters/weibull_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,9 @@ def _compute_confidence_bounds_of_parameters(self):
se = self._compute_standard_errors().ix['se']
alpha2 = inv_normal_cdf((1. + self.alpha) / 2.)
return pd.DataFrame([
np.array([self.lambda_, self.rho_]) + alpha2 * se,
np.array([self.lambda_, self.rho_]) - alpha2 * se,
], columns=['lambda_', 'rho_'], index=['upper-bound', 'lower-bound'])
np.array([self.lambda_, self.rho_]) + alpha2 * se,
np.array([self.lambda_, self.rho_]) - alpha2 * se,
], columns=['lambda_', 'rho_'], index=['upper-bound', 'lower-bound'])

@property
def summary(self):
Expand Down
4 changes: 2 additions & 2 deletions lifelines/generate_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ def generate_random_lifetimes(hazard_rates, timelines, size=1, censor=None):
def generate_observational_matrix(n, d, timelines, constant=False, independent=0, n_binary=0, model="aalen"):
hz, coeff, covariates = generate_hazard_rates(n, d, timelines, constant=False, independent=0, n_binary=0, model=model)
R = generate_random_lifetimes(hz, timelines)
covariates["event_at"] = R.T
return covariates.sort("event_at"), pd.DataFrame(cumulative_integral(coeff.values, timelines), columns=coeff.columns, index=timelines)
covariates["event_at"] = R.T[0]
return covariates.sort_values(by="event_at"), pd.DataFrame(cumulative_integral(coeff.values, timelines), columns=coeff.columns, index=timelines)


def cumulative_integral(fx, x):
Expand Down
15 changes: 8 additions & 7 deletions lifelines/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def set_kwargs_ax(kwargs):
if "ax" not in kwargs:
kwargs["ax"] = plt.figure().add_subplot(111)


def set_kwargs_color(kwargs):
import matplotlib as mpl
if int(mpl.__version__.split('.')[1]) > 4:
Expand All @@ -226,6 +227,7 @@ def set_kwargs_color(kwargs):
kwargs['color'] = coalesce(kwargs.get('c'), kwargs.get('color'),
next(kwargs["ax"]._get_lines.color_cycle))


def set_kwargs_drawstyle(kwargs):
kwargs['drawstyle'] = kwargs.get('drawstyle', 'steps-post')

Expand Down Expand Up @@ -304,7 +306,6 @@ def plot(ix=None, iloc=None, flat=False, show_censors=False,
estimate_ = getattr(cls, estimate)
confidence_interval_ = getattr(cls, 'confidence_interval_')


dataframe_slicer = create_dataframe_slicer(iloc, ix)

# plot censors
Expand All @@ -313,15 +314,15 @@ def plot(ix=None, iloc=None, flat=False, show_censors=False,

if show_censors and cls.event_table['censored'].sum() > 0:
cs = {
'marker': '+',
'ms': 12,
'marker': '+',
'ms': 12,
'mew': 1
}
cs.update(censor_styles)
times = dataframe_slicer(cls.event_table.ix[(cls.event_table['censored'] > 0)]).index.values.astype(float)
v = cls.predict(times)
ax.plot(times, v, linestyle='None',
color=colour, **cs)
color=colour, **cs)

# plot estimate
dataframe_slicer(estimate_).plot(**kwargs)
Expand All @@ -330,9 +331,9 @@ def plot(ix=None, iloc=None, flat=False, show_censors=False,
if ci_show:
if ci_force_lines:
dataframe_slicer(confidence_interval_).plot(linestyle="-", linewidth=1,
color=[colour], legend=True,
drawstyle=kwargs.get('drawstyle', 'default'),
ax=ax, alpha=0.6)
color=[colour], legend=True,
drawstyle=kwargs.get('drawstyle', 'default'),
ax=ax, alpha=0.6)
else:
x = dataframe_slicer(confidence_interval_).index.values.astype(float)
lower = dataframe_slicer(confidence_interval_.filter(like='lower')).values[:, 0]
Expand Down
9 changes: 7 additions & 2 deletions lifelines/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,12 +465,12 @@ def k_fold_cross_validation(fitters, df, duration_col, event_col=None,
event_col = 'E'
df[event_col] = 1.

df = df.reindex(np.random.permutation(df.index)).sort(event_col)
df = df.reindex(np.random.permutation(df.index)).sort_values(event_col)

assignments = np.array((n // k + 1) * list(range(1, k + 1)))
assignments = assignments[:n]

testing_columns = df.columns - [duration_col, event_col]
testing_columns = df.columns.difference([duration_col, event_col])

for i in range(1, k + 1):

Expand Down Expand Up @@ -864,6 +864,9 @@ def handle_pairs(truth, pred, first_ix):
num_correct += correct
num_tied += tied

if num_pairs == 0:
raise ZeroDivisionError("No admissable pairs in the dataset.")

return (num_correct + num_tied / 2) / num_pairs


Expand Down Expand Up @@ -913,4 +916,6 @@ def concordance_value(time_a, time_b, pred_a, pred_b):
paircount += 1.0
csum += concordance_value(time_a, time_b, pred_a, pred_b)

if paircount == 0:
raise ZeroDivisionError("No admissable pairs in the dataset.")
return csum / paircount
2 changes: 1 addition & 1 deletion lifelines/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from __future__ import unicode_literals

__version__ = '0.9.1.0'
__version__ = '0.9.2'
7 changes: 7 additions & 0 deletions tests/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,13 @@ def test_concordance_index_returns_same_after_shifting():
assert utils.concordance_index(T, T_) == utils.concordance_index(T - 5, T_ - 5) == utils.concordance_index(T, T_ - 5) == utils.concordance_index(T - 5, T_)


def test_both_concordance_index_function_deal_with_ties_the_same_way():
actual_times = np.array([1, 1, 2])
predicted_times = np.array([1, 2, 3])
obs = np.ones(3)
assert fast_cindex(actual_times, predicted_times, obs) == slow_cindex(actual_times, predicted_times, obs) == 1.0


def test_survival_table_from_events_with_non_negative_T_and_no_lagged_births():
n = 10
T = np.arange(n)
Expand Down

0 comments on commit 4817d39

Please sign in to comment.