diff --git a/CHANGELOG.md b/CHANGELOG.md index 2aaa8dfb0..9b09dce69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ### Changelog -### 0.22.1 +#### 0.22.1 ##### New features - New univariate model, `GeneralizedGammaFitter`. This model contains many sub-models, so it is a good model to check fits. diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py index fbaa1d508..d4243a3e0 100644 --- a/lifelines/fitters/coxph_fitter.py +++ b/lifelines/fitters/coxph_fitter.py @@ -11,6 +11,7 @@ from scipy.integrate import trapz from scipy import stats from bottleneck import nansum as array_sum_to_scalar +from numpy import sum as array_sum_to_scalar from lifelines.fitters import BaseFitter from lifelines.plotting import set_kwargs_ax, set_kwargs_drawstyle @@ -58,7 +59,17 @@ def decide(batch_mode, T): # https://github.com/CamDavidsonPilon/lifelines/issues/591 for original issue. # new values from from perf/batch_vs_single script. (batch_mode is None) - and (0.712085 + -0.000025 * n_total + 0.579359 * frac_dups + 0.000044 * n_total * frac_dups < 1) + and ( + ( + 5.302813e-01 + + -1.789398e-06 * n_total + + -3.496285e-11 * n_total ** 2 + + 2.756569e00 * frac_dups + + -1.306258e00 * frac_dups ** 2 + + 9.535042e-06 * n_total * frac_dups + ) + < 1 + ) ): return "batch" return "single" @@ -616,19 +627,22 @@ def _get_efron_values_single(self, X, T, E, weights, beta): tied_death_counts = 0 scores = weights * np.exp(np.dot(X, beta)) + phi_x_is = scores[:, None] * X + phi_x_x_i = np.empty((d, d)) + # Iterate backwards to utilize recursive relationship for i in range(n - 1, -1, -1): # Doing it like this to preserve shape ti = T[i] ei = E[i] xi = X[i] - score = scores[i] w = weights[i] # Calculate phi values - phi_i = score - phi_x_i = phi_i * xi - phi_x_x_i = np.outer(xi, phi_x_i) + phi_i = scores[i] + phi_x_i = phi_x_is[i] + # https://stackoverflow.com/a/51481295/1895939 + phi_x_x_i = np.multiply.outer(xi, phi_x_i) # Calculate sums of Risk set risk_phi = risk_phi + phi_i diff --git a/lifelines/fitters/weibull_fitter.py b/lifelines/fitters/weibull_fitter.py index 10887a051..39f6f3ca9 100644 --- a/lifelines/fitters/weibull_fitter.py +++ b/lifelines/fitters/weibull_fitter.py @@ -71,7 +71,7 @@ class WeibullFitter(KnownModelParametericUnivariateFitter): entry: array or None The entry array provided, or None - See Also + Notes ---------- Looking for a 3-parameter Weibull model? See notes here: https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Piecewise%20Exponential%20Models%20and%20Creating%20Custom%20Models.html#3-parameter-Weibull-distribution """ diff --git a/perf_tests/batch_vs_single.py b/perf_tests/batch_vs_single.py index 46fc856ea..828b894ab 100644 --- a/perf_tests/batch_vs_single.py +++ b/perf_tests/batch_vs_single.py @@ -14,7 +14,7 @@ results = {} -for n_copies in [1, 2, 4, 6, 8, 10, 13, 17, 20, 25]: +for n_copies in [1, 2, 4, 6, 8, 10, 15, 20, 50, 100, 150]: # lower percents means more ties. # original rossi dataset has 0.113 @@ -56,8 +56,10 @@ results["N * frac"] = results["N"] * results["frac"] +results["N**2"] = results["N"] ** 2 +results["frac**2"] = results["frac"] ** 2 -X = results[["N", "frac", "N * frac"]] +X = results[["N", "frac", "N * frac", "frac**2", "N**2"]] X = sm.add_constant(X) Y = results["ratio"] diff --git a/perf_tests/cp_perf_test.py b/perf_tests/cp_perf_test.py index 793008375..11d3c0fba 100644 --- a/perf_tests/cp_perf_test.py +++ b/perf_tests/cp_perf_test.py @@ -13,7 +13,7 @@ df = load_rossi() df = pd.concat([df] * 16) # df = df.reset_index() - # df['week'] = np.random.exponential(1, size=df.shape[0]) + df["week"] = np.random.exponential(1, size=df.shape[0]) cp = CoxPHFitter() start_time = time.time() cp.fit(df, duration_col="week", event_col="arrest", batch_mode=True)