From 20809c17d6e7cb4e71f52b9cdc2755d2ce7b0ac1 Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Tue, 23 Jun 2015 22:42:18 -0400
Subject: [PATCH 01/11] bump version to 0.8.0

---
 lifelines/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lifelines/version.py b/lifelines/version.py
index 8b4603bbb..94d895c07 100644
--- a/lifelines/version.py
+++ b/lifelines/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '0.7.1.0'
+__version__ = '0.8.0.0'

From 5858b3885486e3dda8f15578d84531f77e030a0d Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Tue, 23 Jun 2015 22:25:51 -0400
Subject: [PATCH 02/11] move files around first

---
 .travis.yml                                      |  2 +-
 lifelines/estimation.py                          | 16 ++++++++--------
 .../{_base_fitter.py => fitters/__init__.py}     |  2 ++
 lifelines/{ => fitters}/aalen_additive_fitter.py |  6 ++++--
 .../breslow_fleming_harrington_fitter.py         |  4 ++--
 lifelines/{ => fitters}/coxph_fitter.py          |  2 +-
 lifelines/{ => fitters}/exponential_fitter.py    |  2 +-
 lifelines/{ => fitters}/kaplan_meier_fitter.py   |  2 +-
 lifelines/{ => fitters}/nelson_aalen_fitter.py   |  2 +-
 lifelines/{ => fitters}/weibull_fitter.py        |  3 ++-
 lifelines/tests/__init__.py                      |  0
 lifelines/{utils.py => utils/__init__.py}        |  0
 lifelines/{ => utils}/progress_bar.py            |  0
 setup.py                                         |  5 +++--
 {lifelines/tests => tests}/__main__.py           |  0
 {lifelines/tests => tests}/conftest.py           |  0
 {lifelines/tests => tests}/test_estimation.py    | 10 +++++-----
 .../tests => tests}/test_generate_datasets.py    |  4 ++--
 {lifelines/tests => tests}/test_plotting.py      |  6 +++---
 {lifelines/tests => tests}/test_statistics.py    |  4 ++--
 {lifelines/tests => tests}/test_weibull.py       |  2 +-
 {lifelines/tests => tests/utils}/test_utils.py   |  5 +++--
 22 files changed, 42 insertions(+), 35 deletions(-)
 rename lifelines/{_base_fitter.py => fitters/__init__.py} (99%)
 rename lifelines/{ => fitters}/aalen_additive_fitter.py (99%)
 rename lifelines/{ => fitters}/breslow_fleming_harrington_fitter.py (96%)
 rename lifelines/{ => fitters}/coxph_fitter.py (99%)
 rename lifelines/{ => fitters}/exponential_fitter.py (98%)
 rename lifelines/{ => fitters}/kaplan_meier_fitter.py (98%)
 rename lifelines/{ => fitters}/nelson_aalen_fitter.py (99%)
 rename lifelines/{ => fitters}/weibull_fitter.py (99%)
 delete mode 100644 lifelines/tests/__init__.py
 rename lifelines/{utils.py => utils/__init__.py} (100%)
 rename lifelines/{ => utils}/progress_bar.py (100%)
 rename {lifelines/tests => tests}/__main__.py (100%)
 rename {lifelines/tests => tests}/conftest.py (100%)
 rename {lifelines/tests => tests}/test_estimation.py (99%)
 rename {lifelines/tests => tests}/test_generate_datasets.py (87%)
 rename {lifelines/tests => tests}/test_plotting.py (97%)
 rename {lifelines/tests => tests}/test_statistics.py (98%)
 rename {lifelines/tests => tests}/test_weibull.py (92%)
 rename {lifelines/tests => tests/utils}/test_utils.py (99%)

diff --git a/.travis.yml b/.travis.yml
index 663e0b860..b799c623f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,7 +28,7 @@ install:
   - conda create -n test-environment python=$TRAVIS_PYTHON_VERSION pip numpy scipy pandas matplotlib pytest
   - source activate test-environment
   # Build in place so we can run tests
-  - python setup.py build_ext --inplace
+  - python setup.py install
   - pip install coveralls
   - pip install pytest-cov
 # command to run tests
diff --git a/lifelines/estimation.py b/lifelines/estimation.py
index 906506b6c..00925a61b 100644
--- a/lifelines/estimation.py
+++ b/lifelines/estimation.py
@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
-from lifelines._base_fitter import BaseFitter
-from lifelines.weibull_fitter import WeibullFitter
-from lifelines.exponential_fitter import ExponentialFitter
-from lifelines.nelson_aalen_fitter import NelsonAalenFitter
-from lifelines.kaplan_meier_fitter import KaplanMeierFitter
-from lifelines.breslow_fleming_harrington_fitter import BreslowFlemingHarringtonFitter
-from lifelines.coxph_fitter import CoxPHFitter
-from lifelines.aalen_additive_fitter import AalenAdditiveFitter
+from lifelines.fitters import BaseFitter
+from lifelines.fitters.weibull_fitter import WeibullFitter
+from lifelines.fitters.exponential_fitter import ExponentialFitter
+from lifelines.fitters.nelson_aalen_fitter import NelsonAalenFitter
+from lifelines.fitters.kaplan_meier_fitter import KaplanMeierFitter
+from lifelines.fitters.breslow_fleming_harrington_fitter import BreslowFlemingHarringtonFitter
+from lifelines.fitters.coxph_fitter import CoxPHFitter
+from lifelines.fitters.aalen_additive_fitter import AalenAdditiveFitter
diff --git a/lifelines/_base_fitter.py b/lifelines/fitters/__init__.py
similarity index 99%
rename from lifelines/_base_fitter.py
rename to lifelines/fitters/__init__.py
index 089ccf9dc..3cba379b2 100644
--- a/lifelines/_base_fitter.py
+++ b/lifelines/fitters/__init__.py
@@ -1,7 +1,9 @@
 # -*- coding: utf-8 -*-
 from __future__ import print_function
+
 import numpy as np
 import pandas as pd
+
 from lifelines.plotting import plot_estimate
 from lifelines.utils import qth_survival_times
 
diff --git a/lifelines/aalen_additive_fitter.py b/lifelines/fitters/aalen_additive_fitter.py
similarity index 99%
rename from lifelines/aalen_additive_fitter.py
rename to lifelines/fitters/aalen_additive_fitter.py
index 59dffcd4c..c905a023b 100644
--- a/lifelines/aalen_additive_fitter.py
+++ b/lifelines/fitters/aalen_additive_fitter.py
@@ -1,13 +1,15 @@
 # -*- coding: utf-8 -*-
 from __future__ import print_function
+
 import numpy as np
 import pandas as pd
 from numpy.linalg import LinAlgError
 from scipy.integrate import trapz
-from lifelines._base_fitter import BaseFitter
+
+from lifelines.fitters import BaseFitter
 from lifelines.utils import _get_index, inv_normal_cdf, epanechnikov_kernel, \
     ridge_regression as lr, qth_survival_times
-from lifelines.progress_bar import progress_bar
+from lifelines.utils.progress_bar import progress_bar
 from lifelines.plotting import plot_regressions
 
 
diff --git a/lifelines/breslow_fleming_harrington_fitter.py b/lifelines/fitters/breslow_fleming_harrington_fitter.py
similarity index 96%
rename from lifelines/breslow_fleming_harrington_fitter.py
rename to lifelines/fitters/breslow_fleming_harrington_fitter.py
index 4090706fa..9561d50e7 100644
--- a/lifelines/breslow_fleming_harrington_fitter.py
+++ b/lifelines/fitters/breslow_fleming_harrington_fitter.py
@@ -2,8 +2,8 @@
 from __future__ import print_function
 import numpy as np
 
-from lifelines._base_fitter import UnivariateFitter
-from lifelines.nelson_aalen_fitter import NelsonAalenFitter
+from lifelines.fitters import UnivariateFitter
+from lifelines.fitters.nelson_aalen_fitter import NelsonAalenFitter
 from lifelines.utils import median_survival_times
 
 
diff --git a/lifelines/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py
similarity index 99%
rename from lifelines/coxph_fitter.py
rename to lifelines/fitters/coxph_fitter.py
index 1aea9c5f1..fbe530ff5 100644
--- a/lifelines/coxph_fitter.py
+++ b/lifelines/fitters/coxph_fitter.py
@@ -8,7 +8,7 @@
 from scipy.integrate import trapz
 import scipy.stats as stats
 
-from lifelines._base_fitter import BaseFitter
+from lifelines.fitters import BaseFitter
 from lifelines.utils import survival_table_from_events, inv_normal_cdf, normalize,\
     significance_code, concordance_index, _get_index, qth_survival_times
 
diff --git a/lifelines/exponential_fitter.py b/lifelines/fitters/exponential_fitter.py
similarity index 98%
rename from lifelines/exponential_fitter.py
rename to lifelines/fitters/exponential_fitter.py
index 9596c6000..64b5828d0 100644
--- a/lifelines/exponential_fitter.py
+++ b/lifelines/fitters/exponential_fitter.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from lifelines._base_fitter import UnivariateFitter
+from lifelines.fitters import UnivariateFitter
 from lifelines.utils import inv_normal_cdf
 
 
diff --git a/lifelines/kaplan_meier_fitter.py b/lifelines/fitters/kaplan_meier_fitter.py
similarity index 98%
rename from lifelines/kaplan_meier_fitter.py
rename to lifelines/fitters/kaplan_meier_fitter.py
index 740e7c8a3..9d6fa9aef 100644
--- a/lifelines/kaplan_meier_fitter.py
+++ b/lifelines/fitters/kaplan_meier_fitter.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from lifelines._base_fitter import UnivariateFitter
+from lifelines.fitters import UnivariateFitter
 from lifelines.utils import _preprocess_inputs, _additive_estimate, StatError, inv_normal_cdf,\
     median_survival_times
 
diff --git a/lifelines/nelson_aalen_fitter.py b/lifelines/fitters/nelson_aalen_fitter.py
similarity index 99%
rename from lifelines/nelson_aalen_fitter.py
rename to lifelines/fitters/nelson_aalen_fitter.py
index 5cf73589d..359fe4a20 100644
--- a/lifelines/nelson_aalen_fitter.py
+++ b/lifelines/fitters/nelson_aalen_fitter.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from lifelines._base_fitter import UnivariateFitter
+from lifelines.fitters import UnivariateFitter
 from lifelines.utils import _preprocess_inputs, _additive_estimate, epanechnikov_kernel,\
     inv_normal_cdf
 
diff --git a/lifelines/weibull_fitter.py b/lifelines/fitters/weibull_fitter.py
similarity index 99%
rename from lifelines/weibull_fitter.py
rename to lifelines/fitters/weibull_fitter.py
index 5f6afd1f3..dc86e139b 100644
--- a/lifelines/weibull_fitter.py
+++ b/lifelines/fitters/weibull_fitter.py
@@ -2,8 +2,9 @@
 from __future__ import print_function, division
 import numpy as np
 import pandas as pd
+
 from numpy.linalg import solve, norm, inv
-from lifelines._base_fitter import UnivariateFitter
+from lifelines.fitters import UnivariateFitter
 from lifelines.utils import inv_normal_cdf
 
 def _negative_log_likelihood(lambda_rho, T, E):
diff --git a/lifelines/tests/__init__.py b/lifelines/tests/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/lifelines/utils.py b/lifelines/utils/__init__.py
similarity index 100%
rename from lifelines/utils.py
rename to lifelines/utils/__init__.py
diff --git a/lifelines/progress_bar.py b/lifelines/utils/progress_bar.py
similarity index 100%
rename from lifelines/progress_bar.py
rename to lifelines/utils/progress_bar.py
diff --git a/setup.py b/setup.py
index d4ab610b8..f75e6aaee 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,9 @@ def read(fname):
     url="https://github.com/CamDavidsonPilon/lifelines",
     packages=['lifelines',
               'lifelines.datasets',
-              'lifelines.tests'],
+              'lifelines.fitters',
+              'lifelines.utils',
+              ],
     long_description=read('README.txt'),
     classifiers=[
         "Development Status :: 4 - Beta",
@@ -44,7 +46,6 @@ def read(fname):
             "../README.txt",
             "../LICENSE",
             "../MANIFEST.in",
-            "../*.ipynb",
             "datasets/*",
         ]
     },
diff --git a/lifelines/tests/__main__.py b/tests/__main__.py
similarity index 100%
rename from lifelines/tests/__main__.py
rename to tests/__main__.py
diff --git a/lifelines/tests/conftest.py b/tests/conftest.py
similarity index 100%
rename from lifelines/tests/conftest.py
rename to tests/conftest.py
diff --git a/lifelines/tests/test_estimation.py b/tests/test_estimation.py
similarity index 99%
rename from lifelines/tests/test_estimation.py
rename to tests/test_estimation.py
index b8168abdf..209e1b753 100644
--- a/lifelines/tests/test_estimation.py
+++ b/tests/test_estimation.py
@@ -9,14 +9,14 @@
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 import numpy.testing as npt
 
-from ..utils import k_fold_cross_validation, StatError
-from ..estimation import CoxPHFitter, AalenAdditiveFitter, KaplanMeierFitter, \
+from lifelines.utils import k_fold_cross_validation, StatError
+from lifelines.estimation import CoxPHFitter, AalenAdditiveFitter, KaplanMeierFitter, \
     NelsonAalenFitter, BreslowFlemingHarringtonFitter, ExponentialFitter, \
     WeibullFitter, BaseFitter
-from ..datasets import load_regression_dataset, load_larynx, load_waltons, load_kidney_transplant, load_rossi,\
+from lifelines.datasets import load_regression_dataset, load_larynx, load_waltons, load_kidney_transplant, load_rossi,\
     load_lcd, load_panel_test, load_g3, load_holly_molly_polly
-from ..generate_datasets import generate_hazard_rates, generate_random_lifetimes, cumulative_integral
-from ..utils import concordance_index
+from lifelines.generate_datasets import generate_hazard_rates, generate_random_lifetimes, cumulative_integral
+from lifelines.utils import concordance_index
 
 
 @pytest.fixture
diff --git a/lifelines/tests/test_generate_datasets.py b/tests/test_generate_datasets.py
similarity index 87%
rename from lifelines/tests/test_generate_datasets.py
rename to tests/test_generate_datasets.py
index e9f46cdd8..6091a42d4 100644
--- a/lifelines/tests/test_generate_datasets.py
+++ b/tests/test_generate_datasets.py
@@ -5,8 +5,8 @@
 import pytest
 import matplotlib.pyplot as plt
 
-from ..estimation import KaplanMeierFitter, NelsonAalenFitter
-from ..generate_datasets import exponential_survival_data
+from lifelines.estimation import KaplanMeierFitter, NelsonAalenFitter
+from lifelines.generate_datasets import exponential_survival_data
 
 
 def test_exponential_data_sets_correct_censor():
diff --git a/lifelines/tests/test_plotting.py b/tests/test_plotting.py
similarity index 97%
rename from lifelines/tests/test_plotting.py
rename to tests/test_plotting.py
index a87584072..efc64e9c6 100644
--- a/lifelines/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -3,9 +3,9 @@
 import os
 import pytest
 import numpy as np
-from ..estimation import NelsonAalenFitter, KaplanMeierFitter, AalenAdditiveFitter
-from ..generate_datasets import generate_random_lifetimes, generate_hazard_rates
-from ..plotting import plot_lifetimes
+from lifelines.estimation import NelsonAalenFitter, KaplanMeierFitter, AalenAdditiveFitter
+from lifelines.generate_datasets import generate_random_lifetimes, generate_hazard_rates
+from lifelines.plotting import plot_lifetimes
 
 
 @pytest.mark.plottest
diff --git a/lifelines/tests/test_statistics.py b/tests/test_statistics.py
similarity index 98%
rename from lifelines/tests/test_statistics.py
rename to tests/test_statistics.py
index 8dd4f1bd9..1d999109d 100644
--- a/lifelines/tests/test_statistics.py
+++ b/tests/test_statistics.py
@@ -4,8 +4,8 @@
 import numpy.testing as npt
 import pytest
 
-from .. import statistics as stats
-from ..datasets import load_waltons, load_g3
+from lifelines import statistics as stats
+from lifelines.datasets import load_waltons, load_g3
 
 
 def test_unequal_intensity_with_random_data():
diff --git a/lifelines/tests/test_weibull.py b/tests/test_weibull.py
similarity index 92%
rename from lifelines/tests/test_weibull.py
rename to tests/test_weibull.py
index 1c302a680..0eb21f674 100644
--- a/lifelines/tests/test_weibull.py
+++ b/tests/test_weibull.py
@@ -1,6 +1,6 @@
 
 import numpy as np
-from lifelines import weibull_fitter as wf
+from lifelines.fitters import weibull_fitter as wf
 
 
 def test_lambda_gradient():
diff --git a/lifelines/tests/test_utils.py b/tests/utils/test_utils.py
similarity index 99%
rename from lifelines/tests/test_utils.py
rename to tests/utils/test_utils.py
index 9dcf006f0..d5d2a5676 100644
--- a/lifelines/tests/test_utils.py
+++ b/tests/utils/test_utils.py
@@ -7,9 +7,10 @@
 import numpy.testing as npt
 from numpy.linalg import norm, lstsq
 from numpy.random import randn
-from ..estimation import CoxPHFitter
-from ..datasets import (load_regression_dataset, load_larynx,
+from lifelines.estimation import CoxPHFitter
+from lifelines.datasets import (load_regression_dataset, load_larynx,
                         load_waltons, load_rossi)
+
 from lifelines import utils
 from lifelines.utils import _concordance_index as fast_cindex
 from lifelines.utils import _naive_concordance_index as slow_cindex

From eb4afe0616596310bb99b3f3f9f97d32838c18ad Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Thu, 9 Jul 2015 20:51:38 -0400
Subject: [PATCH 03/11] add at-risk column to survival table

---
 lifelines/plotting.py       |  2 +-
 lifelines/utils/__init__.py | 34 ++++++++++++++++++----------------
 tests/utils/test_utils.py   |  7 +++++++
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/lifelines/plotting.py b/lifelines/plotting.py
index 787b8b0ed..d2ccdafeb 100644
--- a/lifelines/plotting.py
+++ b/lifelines/plotting.py
@@ -2,7 +2,7 @@
 from __future__ import print_function
 
 import numpy as np
-from lifelines.utils import coalesce
+from .utils import coalesce
 
 
 def is_latex_enabled():
diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
index 36cc0ec76..3481fa9c2 100644
--- a/lifelines/utils/__init__.py
+++ b/lifelines/utils/__init__.py
@@ -121,6 +121,9 @@ def group_survival_table_from_events(groups, durations, event_observed, birth_ti
     unique_groups = groups.unique()
 
     # set first group
+
+    ### This function is terrible. clean it up! 
+
     g = unique_groups[0]
     ix = (groups == g)
     T = durations[ix]
@@ -147,7 +150,8 @@ def group_survival_table_from_events(groups, durations, event_observed, birth_ti
 
 
 def survival_table_from_events(death_times, event_observed, birth_times=None,
-                               columns=["removed", "observed", "censored", "entrance"], weights=None):
+                               columns=["removed", "observed", "censored", "entrance", "at_risk"], 
+                               weights=None):
     """
     Parameters:
         death_times: (n,) array of event times
@@ -167,21 +171,18 @@ def survival_table_from_events(death_times, event_observed, birth_times=None,
          left the population due to event_observed)
 
     Example:
-        #input
-        survival_table_from_events( waltonT, np.ones_like(waltonT)) #available in test suite
-
-        #output
 
-                  removed  observed  censored  entrance
+                  removed  observed  censored  entrance   at_risk
         event_at
-        0               0         0         0        11
-        6               1         1         0         0
-        7               2         2         0         0
-        9               3         3         0         0
-        13              3         3         0         0
-        15              2         2         0         0
+        0               0         0         0        11        11
+        6               1         1         0         0        11
+        7               2         2         0         0        10
+        9               3         3         0         0         8
+        13              3         3         0         0         5
+        15              2         2         0         0         2    
 
     """
+    removed, observed, censored, entrance, at_risk = columns
     death_times = np.asarray(death_times)
     if birth_times is None:
         birth_times = min(0, death_times.min()) * np.ones(death_times.shape[0])
@@ -192,17 +193,18 @@ def survival_table_from_events(death_times, event_observed, birth_times=None,
 
     # deal with deaths and censorships
     df = pd.DataFrame(death_times, columns=["event_at"])
-    df[columns[0]] = 1 if weights is None else weights
-    df[columns[1]] = np.asarray(event_observed)
+    df[removed] = 1 if weights is None else weights
+    df[observed] = np.asarray(event_observed)
     death_table = df.groupby("event_at").sum()
-    death_table[columns[2]] = (death_table[columns[0]] - death_table[columns[1]]).astype(int)
+    death_table[censored] = (death_table[removed] - death_table[observed]).astype(int)
 
     # deal with late births
     births = pd.DataFrame(birth_times, columns=['event_at'])
-    births[columns[3]] = 1
+    births[entrance] = 1
     births_table = births.groupby('event_at').sum()
 
     event_table = death_table.join(births_table, how='outer', sort=True).fillna(0)  # http://wesmckinney.com/blog/?p=414
+    event_table[at_risk] = event_table[entrance].cumsum() - event_table[removed].cumsum().shift(1).fillna(0)
     return event_table.astype(float)
 
 
diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py
index d5d2a5676..42465ea50 100644
--- a/tests/utils/test_utils.py
+++ b/tests/utils/test_utils.py
@@ -207,6 +207,13 @@ def test_group_survival_table_from_events_on_waltons_data():
     assert all(removed.index == observed.index)
     assert all(removed.index == censored.index)
 
+def test_survival_table_from_events_at_risk_column():
+    df = load_waltons()
+    # from R
+    expected = [163.0, 162.0, 160.0, 157.0, 154.0, 152.0, 151.0, 148.0, 144.0, 139.0, 134.0, 133.0, 130.0, 128.0, 126.0, 119.0, 118.0, 
+                108.0, 107.0, 99.0, 96.0, 89.0, 87.0, 69.0, 65.0, 49.0, 38.0, 36.0, 27.0, 24.0, 14.0, 1.0]
+    df = utils.survival_table_from_events(df['T'], df['E'])
+    assert list(df['at_risk'][1:]) == expected # skip the first event as that is the birth time, 0.
 
 def test_survival_table_to_events_casts_to_float():
     T, C = np.array([1, 2, 3, 4, 4, 5]), np.array([True, False, True, True, True, True])

From 607945d6b155d746131f11293257323f7fc6b53a Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Sun, 12 Jul 2015 20:29:58 -0400
Subject: [PATCH 04/11] Adding at risk column to survival_table_from_events

---
 lifelines/utils/__init__.py | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
index 3481fa9c2..d77f8e590 100644
--- a/lifelines/utils/__init__.py
+++ b/lifelines/utils/__init__.py
@@ -108,8 +108,10 @@ def group_survival_table_from_events(groups, durations, event_observed, birth_ti
         ]
 
     """
+
     n = np.max(groups.shape)
     assert n == np.max(durations.shape) == np.max(event_observed.shape), "inputs must be of the same length."
+    
     if birth_times is None:
         # Create some birth times
         birth_times = np.zeros(np.max(durations.shape))
@@ -117,35 +119,26 @@ def group_survival_table_from_events(groups, durations, event_observed, birth_ti
 
     assert n == np.max(birth_times.shape), "inputs must be of the same length."
 
-    groups, durations, event_observed, birth_times = map(lambda x: pd.Series(np.reshape(x, (n,))), [groups, durations, event_observed, birth_times])
+    groups, durations, event_observed, birth_times = [pd.Series(np.reshape(data, (n,))) for data in [groups, durations, event_observed, birth_times]]
     unique_groups = groups.unique()
 
-    # set first group
-
-    ### This function is terrible. clean it up! 
-
-    g = unique_groups[0]
-    ix = (groups == g)
-    T = durations[ix]
-    C = event_observed[ix]
-    B = birth_times[ix]
-
-    g_name = str(g)
-    data = survival_table_from_events(T, C, B,
-                                      columns=['removed:' + g_name, "observed:" + g_name, 'censored:' + g_name, 'entrance' + g_name])
-    for g in unique_groups[1:]:
-        ix = groups == g
+    for i, group in enumerate(unique_groups):
+        ix = groups == group
         T = durations[ix]
         C = event_observed[ix]
         B = birth_times[ix]
-        g_name = str(g)
-        data = data.join(survival_table_from_events(T, C, B,
-                                                    columns=['removed:' + g_name, "observed:" + g_name, 'censored:' + g_name, 'entrance' + g_name]),
-                         how='outer')
+        group_name = str(group)
+        columns = [event_name + ":" + group_name for event_name in ['removed', 'observed', 'censored', 'entrance', 'at_risk']]
+        if i == 0:
+            data = survival_table_from_events(T, C, B, columns=columns)
+        else:
+            data = data.join(survival_table_from_events(T, C, B, columns=columns), how='outer')
+
     data = data.fillna(0)
     # hmmm pandas its too bad I can't do data.ix[:limit] and leave out the if.
     if int(limit) != -1:
         data = data.ix[:limit]
+
     return unique_groups, data.filter(like='removed:'), data.filter(like='observed:'), data.filter(like='censored:')
 
 

From cca47600a72decbc8658af5c697a286138b87deb Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Sun, 12 Jul 2015 20:33:07 -0400
Subject: [PATCH 05/11] use at risk column in additive estiamtes

---
 lifelines/utils/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
index d77f8e590..b48507c5b 100644
--- a/lifelines/utils/__init__.py
+++ b/lifelines/utils/__init__.py
@@ -582,13 +582,13 @@ def _additive_estimate(events, timeline, _additive_f, _additive_var, reverse):
     """
     if reverse:
         events = events.sort_index(ascending=False)
-        population = events['entrance'].sum() - events['removed'].cumsum().shift(1).fillna(0)
+        at_risk = events['at_risk']
         deaths = events['observed'].shift(1).fillna(0)
         estimate_ = np.cumsum(_additive_f(population, deaths)).ffill().sort_index()
         var_ = np.cumsum(_additive_var(population, deaths)).ffill().sort_index()
     else:
         deaths = events['observed']
-        population = events['entrance'].cumsum() - events['removed'].cumsum().shift(1).fillna(0)  # slowest line here.
+        at_risk = events['at_risk']
         estimate_ = np.cumsum(_additive_f(population, deaths))
         var_ = np.cumsum(_additive_var(population, deaths))
 

From d14819f5b1316afdae65a69a1370f600529bdafc Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Mon, 13 Jul 2015 19:35:00 -0400
Subject: [PATCH 06/11] complete rename

---
 lifelines/utils/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
index b48507c5b..f61e7ec20 100644
--- a/lifelines/utils/__init__.py
+++ b/lifelines/utils/__init__.py
@@ -584,13 +584,13 @@ def _additive_estimate(events, timeline, _additive_f, _additive_var, reverse):
         events = events.sort_index(ascending=False)
         at_risk = events['at_risk']
         deaths = events['observed'].shift(1).fillna(0)
-        estimate_ = np.cumsum(_additive_f(population, deaths)).ffill().sort_index()
-        var_ = np.cumsum(_additive_var(population, deaths)).ffill().sort_index()
+        estimate_ = np.cumsum(_additive_f(at_risk, deaths)).ffill().sort_index()
+        var_ = np.cumsum(_additive_var(at_risk, deaths)).ffill().sort_index()
     else:
         deaths = events['observed']
         at_risk = events['at_risk']
-        estimate_ = np.cumsum(_additive_f(population, deaths))
-        var_ = np.cumsum(_additive_var(population, deaths))
+        estimate_ = np.cumsum(_additive_f(at_risk, deaths))
+        var_ = np.cumsum(_additive_var(at_risk, deaths))
 
     timeline = sorted(timeline)
     estimate_ = estimate_.reindex(timeline, method='pad').fillna(0)

From a2a68b8177d0c5c942cd06cfc47bfef54b9a731e Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Mon, 13 Jul 2015 19:38:58 -0400
Subject: [PATCH 07/11] add survival_table_from_events to docs

---
 docs/Quickstart.rst | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/docs/Quickstart.rst b/docs/Quickstart.rst
index 173d6bf98..aa7a6780a 100644
--- a/docs/Quickstart.rst
+++ b/docs/Quickstart.rst
@@ -114,6 +114,27 @@ Lifelines has some utility functions to transform this dataset into durations an
     T, C = datetimes_to_durations(start_times, end_times, freq='h')
 
 
+Alternatively, perhaps you are intersted in viewing the survival table given some durations and censorship vectors.
+
+
+.. code:: python
+    
+    from lifelines.utils import survival_table_from_events
+
+    table = survival_table_from_events(T, C)
+    print table.head()
+    
+    """
+              removed  observed  censored  entrance  at_risk
+    event_at
+    0               0         0         0        60       60
+    2               2         1         1         0       60
+    3               3         1         2         0       58
+    4               5         3         2         0       55
+    5              12         6         6         0       50
+    """
+
+
 Survival Regression
 ---------------------------------
 

From dc3c61ef6c9e1bb2c222bde6aa40abbc6efe6f9d Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Mon, 13 Jul 2015 19:40:03 -0400
Subject: [PATCH 08/11] more docs

---
 docs/Examples.rst   | 21 +++++++++++++++++++++
 docs/Quickstart.rst |  4 ++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/docs/Examples.rst b/docs/Examples.rst
index 39e96722a..6a0648529 100644
--- a/docs/Examples.rst
+++ b/docs/Examples.rst
@@ -156,6 +156,27 @@ time (months, days, ...)      observed deaths       censored
     print C # np.array([1,1,1,1,1,1,1,0,1,1, ...])
 
 
+Alternatively, perhaps you are interested in viewing the survival table given some durations and censorship vectors.
+
+
+.. code:: python
+    
+    from lifelines.utils import survival_table_from_events
+
+    table = survival_table_from_events(T, C)
+    print table.head()
+    
+    """
+              removed  observed  censored  entrance  at_risk
+    event_at
+    0               0         0         0        60       60
+    2               2         1         1         0       60
+    3               3         1         2         0       58
+    4               5         3         2         0       55
+    5              12         6         6         0       50
+    """
+
+
 
 Plotting multiple figures on an plot 
 ##############################################
diff --git a/docs/Quickstart.rst b/docs/Quickstart.rst
index aa7a6780a..d1a3c247e 100644
--- a/docs/Quickstart.rst
+++ b/docs/Quickstart.rst
@@ -114,7 +114,7 @@ Lifelines has some utility functions to transform this dataset into durations an
     T, C = datetimes_to_durations(start_times, end_times, freq='h')
 
 
-Alternatively, perhaps you are intersted in viewing the survival table given some durations and censorship vectors.
+Alternatively, perhaps you are interested in viewing the survival table given some durations and censorship vectors.
 
 
 .. code:: python
@@ -123,7 +123,7 @@ Alternatively, perhaps you are intersted in viewing the survival table given som
 
     table = survival_table_from_events(T, C)
     print table.head()
-    
+
     """
               removed  observed  censored  entrance  at_risk
     event_at

From 84e96a9f33bc1e9f81868ec1601a2b784c392faa Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Sat, 1 Aug 2015 12:00:58 -0400
Subject: [PATCH 09/11] adding sample size and power cals

---
 lifelines/fitters/weibull_fitter.py |  1 +
 lifelines/statistics.py             | 58 +++++++++++++++++++++++++++++
 lifelines/utils/__init__.py         |  6 +--
 tests/test_statistics.py            |  9 +++++
 4 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/lifelines/fitters/weibull_fitter.py b/lifelines/fitters/weibull_fitter.py
index dc86e139b..2d48d2e93 100644
--- a/lifelines/fitters/weibull_fitter.py
+++ b/lifelines/fitters/weibull_fitter.py
@@ -7,6 +7,7 @@
 from lifelines.fitters import UnivariateFitter
 from lifelines.utils import inv_normal_cdf
 
+
 def _negative_log_likelihood(lambda_rho, T, E):
     if np.any(lambda_rho < 0):
         return np.inf
diff --git a/lifelines/statistics.py b/lifelines/statistics.py
index 3a8ec9ac1..f5b9f4a73 100644
--- a/lifelines/statistics.py
+++ b/lifelines/statistics.py
@@ -9,6 +9,64 @@
 from lifelines.utils import group_survival_table_from_events
 
 
+def sample_size_necessary_under_cph(power, ratio_of_participants, p_exp, p_con,
+                                    postulated_hazard_ratio, alpha=0.05):
+    """
+    This computes the sample size for needed power to compare two groups under a Cox
+    Proportional Hazard model.
+
+    References:
+        https://cran.r-project.org/web/packages/powerSurvEpi/powerSurvEpi.pdf
+
+    Parameters:
+        power: power to detect the magnitude of the hazard ratio as small as that specified by postulated_hazard_ratio.
+        ratio_of_participants: ratio of participants in experimental group over control group.
+        p_exp: probability of failure in experimental group over period of study.
+        p_con: probability of failure in control group over period of study
+        postulated_hazard_ratio: the postulated hazard ratio
+        alpha: type I error rate
+
+    Returns:
+        n_exp, n_con: the samples sizes need for the experiment and control group, respectively, to achieve desired power
+    """
+    z = lambda p: stats.norm.ppf(p)
+
+    m = 1.0 / ratio_of_participants \
+        * ((ratio_of_participants * postulated_hazard_ratio + 1.0) / (postulated_hazard_ratio - 1.0)) ** 2 \
+        * (z(1. - alpha / 2.) + z(power)) ** 2
+
+    n_exp = m * ratio_of_participants / (ratio_of_participants * p_exp + p_con)
+    n_con = m / (ratio_of_participants * p_exp + p_con)
+
+    return int(np.ceil(n_exp)), int(np.ceil(n_con))
+
+
+def power_under_cph(n_exp, n_con, p_exp, p_con, postulated_hazard_ratio, alpha=0.05):
+    """
+    This computes the sample size for needed power to compare two groups under a Cox
+    Proportional Hazard model.
+
+    References:
+        https://cran.r-project.org/web/packages/powerSurvEpi/powerSurvEpi.pdf
+
+    Parameters:
+        n_exp: size of the experiment group.
+        n_con: size of the control group.
+        p_exp: probability of failure in experimental group over period of study.
+        p_con: probability of failure in control group over period of study
+        postulated_hazard_ratio: the postulated hazard ratio
+        alpha: type I error rate
+
+    Returns:
+        power: power to detect the magnitude of the hazard ratio as small as that specified by postulated_hazard_ratio.
+    """
+    z = lambda p: stats.norm.ppf(p)
+
+    m = n_exp * p_exp + n_con * p_con
+    k = float(n_exp) / float(n_con)
+    return stats.norm.cdf(np.sqrt(k * m) * abs(postulated_hazard_ratio - 1) / (k * postulated_hazard_ratio + 1) - z(1 - alpha / 2.))
+
+
 def logrank_test(event_times_A, event_times_B, event_observed_A=None, event_observed_B=None,
                  alpha=0.95, t_0=-1, **kwargs):
     """
diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
index f61e7ec20..5334ded2b 100644
--- a/lifelines/utils/__init__.py
+++ b/lifelines/utils/__init__.py
@@ -111,7 +111,7 @@ def group_survival_table_from_events(groups, durations, event_observed, birth_ti
 
     n = np.max(groups.shape)
     assert n == np.max(durations.shape) == np.max(event_observed.shape), "inputs must be of the same length."
-    
+
     if birth_times is None:
         # Create some birth times
         birth_times = np.zeros(np.max(durations.shape))
@@ -143,7 +143,7 @@ def group_survival_table_from_events(groups, durations, event_observed, birth_ti
 
 
 def survival_table_from_events(death_times, event_observed, birth_times=None,
-                               columns=["removed", "observed", "censored", "entrance", "at_risk"], 
+                               columns=["removed", "observed", "censored", "entrance", "at_risk"],
                                weights=None):
     """
     Parameters:
@@ -172,7 +172,7 @@ def survival_table_from_events(death_times, event_observed, birth_times=None,
         7               2         2         0         0        10
         9               3         3         0         0         8
         13              3         3         0         0         5
-        15              2         2         0         0         2    
+        15              2         2         0         0         2
 
     """
     removed, observed, censored, entrance, at_risk = columns
diff --git a/tests/test_statistics.py b/tests/test_statistics.py
index 1d999109d..38ad9a8d5 100644
--- a/tests/test_statistics.py
+++ b/tests/test_statistics.py
@@ -7,6 +7,15 @@
 from lifelines import statistics as stats
 from lifelines.datasets import load_waltons, load_g3
 
+def test_sample_size_necessary_under_cph():
+    assert stats.sample_size_necessary_under_cph(0.8, 1, 0.8, 0.2, 0.139) == (14, 14)
+    assert stats.sample_size_necessary_under_cph(0.8, 1, 0.5, 0.5, 1.2) == (950, 950)
+    assert stats.sample_size_necessary_under_cph(0.8, 1.5, 0.5, 0.5, 1.2) == (1231, 821)
+    assert stats.sample_size_necessary_under_cph(0.8, 1.5, 0.5, 0.5, 1.2, alpha=0.01) == (1832, 1221)
+
+def test_power_under_cph():
+    assert abs(stats.power_under_cph(12,12, 0.8, 0.2, 0.139) - 0.744937) < 10e-6
+    assert abs(stats.power_under_cph(12,20, 0.8, 0.2, 1.2) - 0.05178317) < 10e-6
 
 def test_unequal_intensity_with_random_data():
     data1 = np.random.exponential(5, size=(2000, 1))

From b401593c9705ead104d90345bb10eb38c657eb95 Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Sat, 1 Aug 2015 13:33:24 -0400
Subject: [PATCH 10/11] fixing left censorship + improved test

---
 lifelines/utils/__init__.py | 10 ++++++----
 tests/test_estimation.py    |  8 +++++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
index f61e7ec20..da4b84022 100644
--- a/lifelines/utils/__init__.py
+++ b/lifelines/utils/__init__.py
@@ -582,10 +582,12 @@ def _additive_estimate(events, timeline, _additive_f, _additive_var, reverse):
     """
     if reverse:
         events = events.sort_index(ascending=False)
-        at_risk = events['at_risk']
-        deaths = events['observed'].shift(1).fillna(0)
-        estimate_ = np.cumsum(_additive_f(at_risk, deaths)).ffill().sort_index()
-        var_ = np.cumsum(_additive_var(at_risk, deaths)).ffill().sort_index()
+        at_risk = events['entrance'].sum() - events['removed'].cumsum().shift(1).fillna(0)
+        
+        deaths = events['observed']
+        
+        estimate_ = np.cumsum(_additive_f(at_risk, deaths)).sort_index().shift(-1).fillna(0)
+        var_ = np.cumsum(_additive_var(at_risk, deaths)).sort_index().shift(-1).fillna(0)
     else:
         deaths = events['observed']
         at_risk = events['at_risk']
diff --git a/tests/test_estimation.py b/tests/test_estimation.py
index 209e1b753..8936b4078 100644
--- a/tests/test_estimation.py
+++ b/tests/test_estimation.py
@@ -349,12 +349,14 @@ def test_passing_in_left_censorship_creates_a_cumulative_density(self, sample_li
         assert not hasattr(kmf, 'survival_function_')
 
     def test_kmf_left_censorship_stats(self):
+        # from http://www.public.iastate.edu/~pdixon/stat505/Chapter%2011.pdf
         T = [3, 5, 5, 5, 6, 6, 10, 12]
-        C = [1, 0, 0, 1, 1, 1, 0, 1]
+        C = [1, 0, 0, 1, 1, 1,  0,  1]
         kmf = KaplanMeierFitter()
         kmf.fit(T, C, left_censorship=True)
-        assert kmf.cumulative_density_[kmf._label].ix[0] == 0.0
-        assert kmf.cumulative_density_[kmf._label].ix[12] == 1.0
+
+        actual = kmf.cumulative_density_[kmf._label].values 
+        npt.assert_almost_equal(actual, np.array([0, 0.437500, 0.5833333, 0.875, 0.875, 1]))
 
     def test_shifting_durations_doesnt_affect_survival_function_values(self):
         T = np.random.exponential(10, size=100)

From 545a05faeb0dab2d7fc30876ff43f37a393c6fa4 Mon Sep 17 00:00:00 2001
From: Cameron Davidson-Pilon <cam.davidson.pilon@gmail.com>
Date: Sat, 1 Aug 2015 13:49:57 -0400
Subject: [PATCH 11/11] update changelog

---
 CHANGELOG.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 28d147599..874a3a8a2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,15 @@
 ### Changelogs
 
+#### 0.8.0
+ - reorganized lifelines directories: 
+    - moved test files out of main directory. 
+    - moved `utils.py` into it's down directory.
+    - moved all estimators `fitters` directory.
+ - added a `at_risk` column to the output of `group_survival_table_from_events` and `survival_table_from_events`
+ - added sample size and power calculations for statistical tests. See `lifeline.statistics. sample_size_necessary_under_cph` and `lifelines.statistics. power_under_cph`. 
+ - fixed a bug when using KaplanMeierFitter for left-censored data. 
+
+
 #### 0.7.1 
 - addition of a l2 `penalizer` to `CoxPHFitter`.
 - dropped Fortran implementation of efficient Python version. Lifelines is pure python once again!