Skip to content

Commit

Permalink
Merge 75a051e into d7f19d2
Browse files Browse the repository at this point in the history
  • Loading branch information
CamDavidsonPilon committed Aug 1, 2015
2 parents d7f19d2 + 75a051e commit 870ce6a
Show file tree
Hide file tree
Showing 27 changed files with 204 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Expand Up @@ -28,7 +28,7 @@ install:
- conda create -n test-environment python=$TRAVIS_PYTHON_VERSION pip numpy scipy pandas matplotlib pytest
- source activate test-environment
# Build in place so we can run tests
- python setup.py build_ext --inplace
- python setup.py install
- pip install coveralls
- pip install pytest-cov
# command to run tests
Expand Down
21 changes: 21 additions & 0 deletions docs/Examples.rst
Expand Up @@ -156,6 +156,27 @@ time (months, days, ...) observed deaths censored
print C # np.array([1,1,1,1,1,1,1,0,1,1, ...])
Alternatively, perhaps you are interested in viewing the survival table given some durations and censorship vectors.


.. code:: python
from lifelines.utils import survival_table_from_events
table = survival_table_from_events(T, C)
print table.head()
"""
removed observed censored entrance at_risk
event_at
0 0 0 0 60 60
2 2 1 1 0 60
3 3 1 2 0 58
4 5 3 2 0 55
5 12 6 6 0 50
"""
Plotting multiple figures on an plot
##############################################
Expand Down
21 changes: 21 additions & 0 deletions docs/Quickstart.rst
Expand Up @@ -114,6 +114,27 @@ Lifelines has some utility functions to transform this dataset into durations an
T, C = datetimes_to_durations(start_times, end_times, freq='h')
Alternatively, perhaps you are interested in viewing the survival table given some durations and censorship vectors.


.. code:: python
from lifelines.utils import survival_table_from_events
table = survival_table_from_events(T, C)
print table.head()
"""
removed observed censored entrance at_risk
event_at
0 0 0 0 60 60
2 2 1 1 0 60
3 3 1 2 0 58
4 5 3 2 0 55
5 12 6 6 0 50
"""
Survival Regression
---------------------------------

Expand Down
16 changes: 8 additions & 8 deletions lifelines/estimation.py
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
from lifelines._base_fitter import BaseFitter
from lifelines.weibull_fitter import WeibullFitter
from lifelines.exponential_fitter import ExponentialFitter
from lifelines.nelson_aalen_fitter import NelsonAalenFitter
from lifelines.kaplan_meier_fitter import KaplanMeierFitter
from lifelines.breslow_fleming_harrington_fitter import BreslowFlemingHarringtonFitter
from lifelines.coxph_fitter import CoxPHFitter
from lifelines.aalen_additive_fitter import AalenAdditiveFitter
from lifelines.fitters import BaseFitter
from lifelines.fitters.weibull_fitter import WeibullFitter
from lifelines.fitters.exponential_fitter import ExponentialFitter
from lifelines.fitters.nelson_aalen_fitter import NelsonAalenFitter
from lifelines.fitters.kaplan_meier_fitter import KaplanMeierFitter
from lifelines.fitters.breslow_fleming_harrington_fitter import BreslowFlemingHarringtonFitter
from lifelines.fitters.coxph_fitter import CoxPHFitter
from lifelines.fitters.aalen_additive_fitter import AalenAdditiveFitter
2 changes: 2 additions & 0 deletions lifelines/_base_fitter.py → lifelines/fitters/__init__.py
@@ -1,7 +1,9 @@
# -*- coding: utf-8 -*-
from __future__ import print_function

import numpy as np
import pandas as pd

from lifelines.plotting import plot_estimate
from lifelines.utils import qth_survival_times

Expand Down
@@ -1,13 +1,15 @@
# -*- coding: utf-8 -*-
from __future__ import print_function

import numpy as np
import pandas as pd
from numpy.linalg import LinAlgError
from scipy.integrate import trapz
from lifelines._base_fitter import BaseFitter

from lifelines.fitters import BaseFitter
from lifelines.utils import _get_index, inv_normal_cdf, epanechnikov_kernel, \
ridge_regression as lr, qth_survival_times
from lifelines.progress_bar import progress_bar
from lifelines.utils.progress_bar import progress_bar
from lifelines.plotting import plot_regressions


Expand Down
Expand Up @@ -2,8 +2,8 @@
from __future__ import print_function
import numpy as np

from lifelines._base_fitter import UnivariateFitter
from lifelines.nelson_aalen_fitter import NelsonAalenFitter
from lifelines.fitters import UnivariateFitter
from lifelines.fitters.nelson_aalen_fitter import NelsonAalenFitter
from lifelines.utils import median_survival_times


Expand Down
Expand Up @@ -8,7 +8,7 @@
from scipy.integrate import trapz
import scipy.stats as stats

from lifelines._base_fitter import BaseFitter
from lifelines.fitters import BaseFitter
from lifelines.utils import survival_table_from_events, inv_normal_cdf, normalize,\
significance_code, concordance_index, _get_index, qth_survival_times

Expand Down
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pandas as pd

from lifelines._base_fitter import UnivariateFitter
from lifelines.fitters import UnivariateFitter
from lifelines.utils import inv_normal_cdf


Expand Down
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pandas as pd

from lifelines._base_fitter import UnivariateFitter
from lifelines.fitters import UnivariateFitter
from lifelines.utils import _preprocess_inputs, _additive_estimate, StatError, inv_normal_cdf,\
median_survival_times

Expand Down
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pandas as pd

from lifelines._base_fitter import UnivariateFitter
from lifelines.fitters import UnivariateFitter
from lifelines.utils import _preprocess_inputs, _additive_estimate, epanechnikov_kernel,\
inv_normal_cdf

Expand Down
Expand Up @@ -2,10 +2,12 @@
from __future__ import print_function, division
import numpy as np
import pandas as pd

from numpy.linalg import solve, norm, inv
from lifelines._base_fitter import UnivariateFitter
from lifelines.fitters import UnivariateFitter
from lifelines.utils import inv_normal_cdf


def _negative_log_likelihood(lambda_rho, T, E):
if np.any(lambda_rho < 0):
return np.inf
Expand Down
2 changes: 1 addition & 1 deletion lifelines/plotting.py
Expand Up @@ -2,7 +2,7 @@
from __future__ import print_function

import numpy as np
from lifelines.utils import coalesce
from .utils import coalesce


def is_latex_enabled():
Expand Down
58 changes: 58 additions & 0 deletions lifelines/statistics.py
Expand Up @@ -9,6 +9,64 @@
from lifelines.utils import group_survival_table_from_events


def sample_size_necessary_under_cph(power, ratio_of_participants, p_exp, p_con,
postulated_hazard_ratio, alpha=0.05):
"""
This computes the sample size for needed power to compare two groups under a Cox
Proportional Hazard model.
References:
https://cran.r-project.org/web/packages/powerSurvEpi/powerSurvEpi.pdf
Parameters:
power: power to detect the magnitude of the hazard ratio as small as that specified by postulated_hazard_ratio.
ratio_of_participants: ratio of participants in experimental group over control group.
p_exp: probability of failure in experimental group over period of study.
p_con: probability of failure in control group over period of study
postulated_hazard_ratio: the postulated hazard ratio
alpha: type I error rate
Returns:
n_exp, n_con: the samples sizes need for the experiment and control group, respectively, to achieve desired power
"""
z = lambda p: stats.norm.ppf(p)

m = 1.0 / ratio_of_participants \
* ((ratio_of_participants * postulated_hazard_ratio + 1.0) / (postulated_hazard_ratio - 1.0)) ** 2 \
* (z(1. - alpha / 2.) + z(power)) ** 2

n_exp = m * ratio_of_participants / (ratio_of_participants * p_exp + p_con)
n_con = m / (ratio_of_participants * p_exp + p_con)

return int(np.ceil(n_exp)), int(np.ceil(n_con))


def power_under_cph(n_exp, n_con, p_exp, p_con, postulated_hazard_ratio, alpha=0.05):
"""
This computes the sample size for needed power to compare two groups under a Cox
Proportional Hazard model.
References:
https://cran.r-project.org/web/packages/powerSurvEpi/powerSurvEpi.pdf
Parameters:
n_exp: size of the experiment group.
n_con: size of the control group.
p_exp: probability of failure in experimental group over period of study.
p_con: probability of failure in control group over period of study
postulated_hazard_ratio: the postulated hazard ratio
alpha: type I error rate
Returns:
power: power to detect the magnitude of the hazard ratio as small as that specified by postulated_hazard_ratio.
"""
z = lambda p: stats.norm.ppf(p)

m = n_exp * p_exp + n_con * p_con
k = float(n_exp) / float(n_con)
return stats.norm.cdf(np.sqrt(k * m) * abs(postulated_hazard_ratio - 1) / (k * postulated_hazard_ratio + 1) - z(1 - alpha / 2.))


def logrank_test(event_times_A, event_times_B, event_observed_A=None, event_observed_B=None,
alpha=0.95, t_0=-1, **kwargs):
"""
Expand Down
Empty file removed lifelines/tests/__init__.py
Empty file.
77 changes: 37 additions & 40 deletions lifelines/utils.py → lifelines/utils/__init__.py
Expand Up @@ -108,46 +108,43 @@ def group_survival_table_from_events(groups, durations, event_observed, birth_ti
]
"""

n = np.max(groups.shape)
assert n == np.max(durations.shape) == np.max(event_observed.shape), "inputs must be of the same length."

if birth_times is None:
# Create some birth times
birth_times = np.zeros(np.max(durations.shape))
birth_times[:] = np.min(durations)

assert n == np.max(birth_times.shape), "inputs must be of the same length."

groups, durations, event_observed, birth_times = map(lambda x: pd.Series(np.reshape(x, (n,))), [groups, durations, event_observed, birth_times])
groups, durations, event_observed, birth_times = [pd.Series(np.reshape(data, (n,))) for data in [groups, durations, event_observed, birth_times]]
unique_groups = groups.unique()

# set first group
g = unique_groups[0]
ix = (groups == g)
T = durations[ix]
C = event_observed[ix]
B = birth_times[ix]

g_name = str(g)
data = survival_table_from_events(T, C, B,
columns=['removed:' + g_name, "observed:" + g_name, 'censored:' + g_name, 'entrance' + g_name])
for g in unique_groups[1:]:
ix = groups == g
for i, group in enumerate(unique_groups):
ix = groups == group
T = durations[ix]
C = event_observed[ix]
B = birth_times[ix]
g_name = str(g)
data = data.join(survival_table_from_events(T, C, B,
columns=['removed:' + g_name, "observed:" + g_name, 'censored:' + g_name, 'entrance' + g_name]),
how='outer')
group_name = str(group)
columns = [event_name + ":" + group_name for event_name in ['removed', 'observed', 'censored', 'entrance', 'at_risk']]
if i == 0:
data = survival_table_from_events(T, C, B, columns=columns)
else:
data = data.join(survival_table_from_events(T, C, B, columns=columns), how='outer')

data = data.fillna(0)
# hmmm pandas its too bad I can't do data.ix[:limit] and leave out the if.
if int(limit) != -1:
data = data.ix[:limit]

return unique_groups, data.filter(like='removed:'), data.filter(like='observed:'), data.filter(like='censored:')


def survival_table_from_events(death_times, event_observed, birth_times=None,
columns=["removed", "observed", "censored", "entrance"], weights=None):
columns=["removed", "observed", "censored", "entrance", "at_risk"],
weights=None):
"""
Parameters:
death_times: (n,) array of event times
Expand All @@ -167,21 +164,18 @@ def survival_table_from_events(death_times, event_observed, birth_times=None,
left the population due to event_observed)
Example:
#input
survival_table_from_events( waltonT, np.ones_like(waltonT)) #available in test suite
#output
removed observed censored entrance
removed observed censored entrance at_risk
event_at
0 0 0 0 11
6 1 1 0 0
7 2 2 0 0
9 3 3 0 0
13 3 3 0 0
15 2 2 0 0
0 0 0 0 11 11
6 1 1 0 0 11
7 2 2 0 0 10
9 3 3 0 0 8
13 3 3 0 0 5
15 2 2 0 0 2
"""
removed, observed, censored, entrance, at_risk = columns
death_times = np.asarray(death_times)
if birth_times is None:
birth_times = min(0, death_times.min()) * np.ones(death_times.shape[0])
Expand All @@ -192,17 +186,18 @@ def survival_table_from_events(death_times, event_observed, birth_times=None,

# deal with deaths and censorships
df = pd.DataFrame(death_times, columns=["event_at"])
df[columns[0]] = 1 if weights is None else weights
df[columns[1]] = np.asarray(event_observed)
df[removed] = 1 if weights is None else weights
df[observed] = np.asarray(event_observed)
death_table = df.groupby("event_at").sum()
death_table[columns[2]] = (death_table[columns[0]] - death_table[columns[1]]).astype(int)
death_table[censored] = (death_table[removed] - death_table[observed]).astype(int)

# deal with late births
births = pd.DataFrame(birth_times, columns=['event_at'])
births[columns[3]] = 1
births[entrance] = 1
births_table = births.groupby('event_at').sum()

event_table = death_table.join(births_table, how='outer', sort=True).fillna(0) # http://wesmckinney.com/blog/?p=414
event_table[at_risk] = event_table[entrance].cumsum() - event_table[removed].cumsum().shift(1).fillna(0)
return event_table.astype(float)


Expand Down Expand Up @@ -587,15 +582,17 @@ def _additive_estimate(events, timeline, _additive_f, _additive_var, reverse):
"""
if reverse:
events = events.sort_index(ascending=False)
population = events['entrance'].sum() - events['removed'].cumsum().shift(1).fillna(0)
deaths = events['observed'].shift(1).fillna(0)
estimate_ = np.cumsum(_additive_f(population, deaths)).ffill().sort_index()
var_ = np.cumsum(_additive_var(population, deaths)).ffill().sort_index()
at_risk = events['entrance'].sum() - events['removed'].cumsum().shift(1).fillna(0)

deaths = events['observed']

estimate_ = np.cumsum(_additive_f(at_risk, deaths)).sort_index().shift(-1).fillna(0)
var_ = np.cumsum(_additive_var(at_risk, deaths)).sort_index().shift(-1).fillna(0)
else:
deaths = events['observed']
population = events['entrance'].cumsum() - events['removed'].cumsum().shift(1).fillna(0) # slowest line here.
estimate_ = np.cumsum(_additive_f(population, deaths))
var_ = np.cumsum(_additive_var(population, deaths))
at_risk = events['at_risk']
estimate_ = np.cumsum(_additive_f(at_risk, deaths))
var_ = np.cumsum(_additive_var(at_risk, deaths))

timeline = sorted(timeline)
estimate_ = estimate_.reindex(timeline, method='pad').fillna(0)
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion lifelines/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals

__version__ = '0.7.1.0'
__version__ = '0.8.0.0'

0 comments on commit 870ce6a

Please sign in to comment.