Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nlogn concordance index algorithm (first pass) #145

Merged
merged 8 commits into from
Apr 21, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 0 additions & 13 deletions lifelines/_utils/__init__.py

This file was deleted.

81 changes: 0 additions & 81 deletions lifelines/_utils/_cindex.f90

This file was deleted.

48 changes: 0 additions & 48 deletions lifelines/_utils/cindex.py

This file was deleted.

13 changes: 5 additions & 8 deletions lifelines/estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1307,12 +1307,10 @@ def summary(self):
df['upper %.2f' % self.alpha] = self.confidence_intervals_.ix['upper-bound'].values
return df

def print_summary(self, c_index=True):
def print_summary(self):
"""
Print summary statistics describing the fit.

c_index: If set to False, will not print the concordance index

"""
df = self.summary
# Significance codes last
Expand All @@ -1327,11 +1325,10 @@ def print_summary(self, c_index=True):
print('---')
print("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ",
end='\n\n')
if c_index:
print("Concordance = {:.3f}"
.format(concordance_index(self.durations,
-self.predict_partial_hazard(self.data).values.ravel(),
self.event_observed)))
print("Concordance = {:.3f}"
.format(concordance_index(self.durations,
-self.predict_partial_hazard(self.data).values.ravel(),
self.event_observed)))
return

def predict_partial_hazard(self, X):
Expand Down
2 changes: 1 addition & 1 deletion lifelines/tests/test_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ def test_print_summary(self):
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Concordance = 0.644""".strip().split()
Concordance = 0.640""".strip().split()
for i in [0, 1, 2, -2, -1]:
assert output[i] == expected[i]
finally:
Expand Down
43 changes: 35 additions & 8 deletions lifelines/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,10 @@
from ..estimation import CoxPHFitter
from ..datasets import (load_regression_dataset, load_larynx,
load_waltons, load_rossi)
from .._utils.cindex import concordance_index as slow_cindex
from lifelines import utils
try:
from .._utils._cindex import concordance_index as fast_cindex
except ImportError:
# If code has not been compiled.
fast_cindex = None
from lifelines.utils import _concordance_index as fast_cindex
from lifelines.utils import _naive_concordance_index as slow_cindex
from lifelines.utils import _BTree as BTree


def test_ridge_regression_with_penalty_is_less_than_without_penalty():
Expand Down Expand Up @@ -350,8 +347,38 @@ def test_survival_table_from_events_raises_value_error_if_too_early_births():
utils.survival_table_from_events(T, C, min_obs)


@pytest.mark.skipif(fast_cindex is None, reason='extensions not compiled')
def test_concordance_index_py_is_same_as_native():
def test_btree():
t = BTree(np.arange(10))
for i in range(10):
assert t.rank(i) == (0, 0)

assert len(t) == 0
t.insert(5)
t.insert(6)
t.insert(6)
t.insert(0)
t.insert(9)
assert len(t) == 5

assert t.rank(0) == (0, 1)
assert t.rank(0.5) == (1, 0)
assert t.rank(4.5) == (1, 0)
assert t.rank(5) == (1, 1)
assert t.rank(5.5) == (2, 0)
assert t.rank(6) == (2, 2)
assert t.rank(6.5) == (4, 0)
assert t.rank(8.5) == (4, 0)
assert t.rank(9) == (4, 1)
assert t.rank(9.5) == (5, 0)

for i in range(1, 32):
BTree(np.arange(i))

with pytest.raises(ValueError):
# This has to go last since it screws up the counts
t.insert(5.5)

def test_concordance_index_fast_is_same_as_slow():
size = 100
T = np.random.normal(size=size)
P = np.random.normal(size=size)
Expand Down