Skip to content

Commit

Permalink
Merge 8a7cd99 into fc99934
Browse files Browse the repository at this point in the history
  • Loading branch information
CamDavidsonPilon committed Jun 30, 2018
2 parents fc99934 + 8a7cd99 commit dd27a93
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
### Changelogs

#### 0.14.5
- fix for n > 2 groups in `multivariate_logrank_test`
- fix weights in KaplanMeierFitter when using a pandas Series.

#### 0.14.4
- Adds `baseline_cumulative_hazard_` and `baseline_survival_` to `CoxTimeVaryingFitter`. Because of this, new prediction methods are available.
- fixed a bug in `add_covariate_to_timeline` when using `cumulative_sum` with multiple columns.
Expand Down
19 changes: 19 additions & 0 deletions lifelines/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,22 @@ def load_dfcv():
"""
from lifelines.datasets.dfcv_dataset import dfcv
return dfcv


def load_lymphoma(**kwargs):
"""
From https://www.statsdirect.com/help/content/survival_analysis/logrank.htm
Size: (80, 3)
Example:
Stage_group Time Censor
0 1 6 1
1 1 19 1
2 1 32 1
3 1 42 1
4 1 42 1
"""
return load_dataset('lymphoma.csv', **kwargs)
81 changes: 81 additions & 0 deletions lifelines/datasets/lymphoma.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
Stage_group,Time,Censor
1,6,1
1,19,1
1,32,1
1,42,1
1,42,1
1,43,0
1,94,1
1,126,0
1,169,0
1,207,1
1,211,0
1,227,0
1,253,1
1,255,0
1,270,0
1,310,0
1,316,0
1,335,0
1,346,0
2,4,1
2,6,1
2,10,1
2,11,1
2,11,1
2,11,1
2,13,1
2,17,1
2,20,1
2,20,1
2,21,1
2,22,1
2,24,1
2,24,1
2,29,1
2,30,1
2,30,1
2,31,1
2,33,1
2,34,1
2,35,1
2,39,1
2,40,1
2,41,0
2,43,0
2,45,1
2,46,1
2,50,1
2,56,1
2,61,0
2,61,0
2,63,1
2,68,1
2,82,1
2,85,1
2,88,1
2,89,1
2,90,1
2,93,1
2,104,1
2,110,1
2,134,1
2,137,1
2,160,0
2,169,1
2,171,1
2,173,1
2,175,1
2,184,1
2,201,1
2,222,1
2,235,0
2,247,0
2,260,0
2,284,0
2,290,0
2,291,0
2,302,0
2,304,0
2,341,0
2,345,0
11 changes: 6 additions & 5 deletions lifelines/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,16 +223,17 @@ def multivariate_logrank_test(event_durations, groups, event_observed=None,
assert abs(Z_j.sum()) < 10e-8, "Sum is not zero." # this should move to a test eventually.

# compute covariance matrix
factor = (((n_i - d_i) / (n_i - 1)).replace(np.inf, 1)) * d_i
factor = (((n_i - d_i) / (n_i - 1)).replace(np.inf, 1)) * d_i / n_i ** 2
n_ij['_'] = n_i.values
V_ = n_ij.mul(np.sqrt(factor) / n_i, axis='index').fillna(1)
V = -np.dot(V_.T, V_)
V_ = n_ij.mul(np.sqrt(factor), axis='index').fillna(1)

V = -np.dot(V_.T, V_) + 1
ix = np.arange(n_groups)
V[ix, ix] = -V[-1, ix] + V[ix, ix]
V[ix, ix] = V[ix, ix] - V[-1, ix]
V = V[:-1, :-1]

# take the first n-1 groups
U = Z_j.iloc[:-1].dot(np.linalg.pinv(V[:-1, :-1]).dot(Z_j.iloc[:-1])) # Z.T*inv(V)*Z
U = Z_j.iloc[:-1].dot(np.linalg.pinv(V[:-1, :-1])).dot(Z_j.iloc[:-1]) # Z.T*inv(V)*Z

# compute the p-values and tests
test_result, p_value = chisq_test(U, n_groups - 1, alpha)
Expand Down
2 changes: 1 addition & 1 deletion lifelines/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from __future__ import unicode_literals

__version__ = '0.14.4'
__version__ = '0.14.5'
10 changes: 9 additions & 1 deletion tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

from lifelines import statistics as stats
from lifelines.datasets import load_waltons, load_g3
from lifelines.datasets import load_waltons, load_g3, load_lymphoma


def test_sample_size_necessary_under_cph():
Expand Down Expand Up @@ -67,6 +67,14 @@ def test_rank_test_output_against_R_no_censorship():
assert abs(result.test_statistic - r_stat) < 10e-6


def test_n_more_than_2_multivariate_logrank():
# from https://www.statsdirect.com/help/content/survival_analysis/logrank.htm
df_ = load_lymphoma()
results = stats.multivariate_logrank_test(df_['Time'], df_['Stage_group'], df_['Censor'])
assert abs(results.test_statistic - 6.70971) < 1e-4
assert abs(results.p_value - 0.0096) < 1e-4


def test_rank_test_output_against_R_with_censorship():
"""
> time <- c(10,20,30,10,20,50)
Expand Down

0 comments on commit dd27a93

Please sign in to comment.