Skip to content

Commit

Permalink
slight perf improvement, and rerun the script to look at large counts
Browse files Browse the repository at this point in the history
  • Loading branch information
CamDavidsonPilon committed Jul 14, 2019
1 parent 441ee59 commit f232b55
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 10 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
@@ -1,6 +1,6 @@
### Changelog

### 0.22.1
#### 0.22.1

##### New features
- New univariate model, `GeneralizedGammaFitter`. This model contains many sub-models, so it is a good model to check fits.
Expand Down
24 changes: 19 additions & 5 deletions lifelines/fitters/coxph_fitter.py
Expand Up @@ -11,6 +11,7 @@
from scipy.integrate import trapz
from scipy import stats
from bottleneck import nansum as array_sum_to_scalar
from numpy import sum as array_sum_to_scalar

from lifelines.fitters import BaseFitter
from lifelines.plotting import set_kwargs_ax, set_kwargs_drawstyle
Expand Down Expand Up @@ -58,7 +59,17 @@ def decide(batch_mode, T):
# https://github.com/CamDavidsonPilon/lifelines/issues/591 for original issue.
# new values from from perf/batch_vs_single script.
(batch_mode is None)
and (0.712085 + -0.000025 * n_total + 0.579359 * frac_dups + 0.000044 * n_total * frac_dups < 1)
and (
(
5.302813e-01
+ -1.789398e-06 * n_total
+ -3.496285e-11 * n_total ** 2
+ 2.756569e00 * frac_dups
+ -1.306258e00 * frac_dups ** 2
+ 9.535042e-06 * n_total * frac_dups
)
< 1
)
):
return "batch"
return "single"
Expand Down Expand Up @@ -616,19 +627,22 @@ def _get_efron_values_single(self, X, T, E, weights, beta):
tied_death_counts = 0
scores = weights * np.exp(np.dot(X, beta))

phi_x_is = scores[:, None] * X
phi_x_x_i = np.empty((d, d))

# Iterate backwards to utilize recursive relationship
for i in range(n - 1, -1, -1):
# Doing it like this to preserve shape
ti = T[i]
ei = E[i]
xi = X[i]
score = scores[i]
w = weights[i]

# Calculate phi values
phi_i = score
phi_x_i = phi_i * xi
phi_x_x_i = np.outer(xi, phi_x_i)
phi_i = scores[i]
phi_x_i = phi_x_is[i]
# https://stackoverflow.com/a/51481295/1895939
phi_x_x_i = np.multiply.outer(xi, phi_x_i)

# Calculate sums of Risk set
risk_phi = risk_phi + phi_i
Expand Down
2 changes: 1 addition & 1 deletion lifelines/fitters/weibull_fitter.py
Expand Up @@ -71,7 +71,7 @@ class WeibullFitter(KnownModelParametericUnivariateFitter):
entry: array or None
The entry array provided, or None
See Also
Notes
----------
Looking for a 3-parameter Weibull model? See notes here: https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Piecewise%20Exponential%20Models%20and%20Creating%20Custom%20Models.html#3-parameter-Weibull-distribution
"""
Expand Down
6 changes: 4 additions & 2 deletions perf_tests/batch_vs_single.py
Expand Up @@ -14,7 +14,7 @@
results = {}


for n_copies in [1, 2, 4, 6, 8, 10, 13, 17, 20, 25]:
for n_copies in [1, 2, 4, 6, 8, 10, 15, 20, 50, 100, 150]:

# lower percents means more ties.
# original rossi dataset has 0.113
Expand Down Expand Up @@ -56,8 +56,10 @@


results["N * frac"] = results["N"] * results["frac"]
results["N**2"] = results["N"] ** 2
results["frac**2"] = results["frac"] ** 2

X = results[["N", "frac", "N * frac"]]
X = results[["N", "frac", "N * frac", "frac**2", "N**2"]]
X = sm.add_constant(X)

Y = results["ratio"]
Expand Down
2 changes: 1 addition & 1 deletion perf_tests/cp_perf_test.py
Expand Up @@ -13,7 +13,7 @@
df = load_rossi()
df = pd.concat([df] * 16)
# df = df.reset_index()
# df['week'] = np.random.exponential(1, size=df.shape[0])
df["week"] = np.random.exponential(1, size=df.shape[0])
cp = CoxPHFitter()
start_time = time.time()
cp.fit(df, duration_col="week", event_col="arrest", batch_mode=True)
Expand Down

0 comments on commit f232b55

Please sign in to comment.