Skip to content

Commit

Permalink
this function wasn't showing up in docs, and I've made modifications …
Browse files Browse the repository at this point in the history
…to return a weight array too
  • Loading branch information
CamDavidsonPilon committed Apr 8, 2019
1 parent 3e63f74 commit 085f744
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 26 deletions.
1 change: 1 addition & 0 deletions lifelines/fitters/kaplan_meier_fitter.py
Expand Up @@ -119,6 +119,7 @@ def fit(
self._label = label

if weights is not None:
weights = np.asarray(weights)
if (weights.astype(int) != weights).any():
warnings.warn(
"""It looks like your weights are not integers, possibly propensity scores then?
Expand Down
53 changes: 31 additions & 22 deletions lifelines/utils/__init__.py
Expand Up @@ -19,6 +19,7 @@
"median_survival_times",
"survival_table_from_events",
"group_survival_table_from_events",
"survival_events_from_table",
"datetimes_to_durations",
"concordance_index",
"k_fold_cross_validation",
Expand Down Expand Up @@ -374,26 +375,28 @@ def _group_event_table_by_intervals(event_table, intervals):
)


def survival_events_from_table(event_table, observed_deaths_col="observed", censored_col="censored"):
def survival_events_from_table(survival_table, observed_deaths_col="observed", censored_col="censored"):
"""
This is the inverse of the function ``survival_table_from_events``.
Parameters
----------
event_table: DataFrame
a pandas DataFrame with index as the durations (!!) and columns "observed" and "censored", referring to
survival_table: DataFrame
a pandas DataFrame with index as the durations and columns "observed" and "censored", referring to
the number of individuals that died and were censored at time t.
observed_deaths_col: str
default: "observed"
censored_col: str
default: "censored"
observed_deaths_col: str, optional (default: "observed")
the column in the survival table that represents the number of subjects that were observed to die at a specific time
censored_col: str, optional (default: "censored")
the column in the survival table that represents the number of subjects that were censored at a specific time
Returns
-------
T: array
durations of observation -- one element for each individual in the population.
C: array
event observations -- one element for each individual in the population. 1 if observed, 0 else.
durations of observation -- one element for observed time
E: array
event observations -- 1 if observed, 0 else.
W: array
weights - integer weights to "condense" the data
Example
-------
Expand All @@ -409,21 +412,27 @@ def survival_events_from_table(event_table, observed_deaths_col="observed", cens
>>>
>>> # would return
>>> T = np.array([ 1., 2., 3., 4., 4., 5.]),
>>> C = np.array([ 1., 0., 1., 1., 0., 0.])
>>> E = np.array([ 1., 0., 1., 1., 0., 0.])
>>> W = np.array([ 1, 1, 1, 1, 1, 1])
"""
columns = [observed_deaths_col, censored_col]
N = event_table[columns].sum().sum()
T = np.empty(N)
C = np.empty(N)
i = 0
for event_time, row in event_table.iterrows():
n = row[columns].sum()
T[i : i + n] = event_time
C[i : i + n] = np.r_[np.ones(row[columns[0]]), np.zeros(row[columns[1]])]
i += n

return T, C

T_ = []
E_ = []
W_ = []

for t, row in survival_table.iterrows():
if row[observed_deaths_col] > 0:
T_.append(t)
E_.append(1)
W_.append(row[observed_deaths_col])
if row[censored_col] > 0:
T_.append(t)
E_.append(0)
W_.append(row[censored_col])

return np.asarray(T_), np.asarray(E_), np.asarray(W_)


def datetimes_to_durations(
Expand Down
18 changes: 14 additions & 4 deletions tests/utils/test_utils.py
Expand Up @@ -254,12 +254,22 @@ def test_datetimes_to_durations_custom_censor():
npt.assert_almost_equal(C, np.array([1, 0, 0], dtype=bool))


def test_survival_table_to_events():
T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 1, 1])
d = utils.survival_table_from_events(T, C, np.zeros_like(T))
T_, C_ = utils.survival_events_from_table(d[["censored", "observed"]])
def test_survival_events_from_table_no_ties():
T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 0, 1])
d = utils.survival_table_from_events(T, C)
T_, C_, W_ = utils.survival_events_from_table(d[["censored", "observed"]])
npt.assert_array_equal(T, T_)
npt.assert_array_equal(C, C_)
npt.assert_array_equal(W_, np.ones_like(T))


def test_survival_events_from_table_with_ties():
T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([1, 0, 1, 1, 1, 1])
d = utils.survival_table_from_events(T, C)
T_, C_, W_ = utils.survival_events_from_table(d[["censored", "observed"]])
npt.assert_array_equal([1, 2, 3, 4, 5], T_)
npt.assert_array_equal([1, 0, 1, 1, 1], C_)
npt.assert_array_equal([1, 1, 1, 2, 1], W_)


def test_survival_table_from_events_with_non_trivial_censorship_column():
Expand Down

0 comments on commit 085f744

Please sign in to comment.