Skip to content

Commit

Permalink
Merge pull request #300 from CamDavidsonPilon/dump-ix
Browse files Browse the repository at this point in the history
dump .ix from pandas as it is deprecated
  • Loading branch information
CamDavidsonPilon committed Jun 11, 2017
2 parents 149142d + 78630c2 commit 65d03e5
Show file tree
Hide file tree
Showing 13 changed files with 63 additions and 63 deletions.
2 changes: 1 addition & 1 deletion docs/Quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ After fitting, you'll have access to properties like ``cumulative_hazards_`` and
.. code:: python
x = regression_dataset[regression_dataset.columns - ['E', 'T']]
aaf.predict_survival_function(x.ix[10:12]).plot() # get the unique survival functions of the first two subjects
aaf.predict_survival_function(x.iloc[10:12]).plot() # get the unique survival functions of the first two subjects
.. image:: images/quickstart_predict_aaf.png

Expand Down
2 changes: 1 addition & 1 deletion docs/Survival Regression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ Prime Minister Stephen Harper.
.. code:: python
ix = (data['ctryname'] == 'Canada') * (data['start_year'] == 2006)
harper = X.ix[ix]
harper = X.loc[ix]
print "Harper's unique data point", harper
.. parsed-literal::
Expand Down
2 changes: 1 addition & 1 deletion lifelines/fitters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _predict(self, estimate, label):
""" % (class_name, class_name)

def predict(time):
predictor = lambda t: getattr(self, estimate).ix[:t].iloc[-1][label]
predictor = lambda t: getattr(self, estimate).loc[:t].iloc[-1][label]
try:
return np.array([predictor(t) for t in time])
except TypeError:
Expand Down
16 changes: 8 additions & 8 deletions lifelines/fitters/aalen_additive_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ def _fit_static(self, dataframe, duration_col, event_col=None,
except LinAlgError:
print("Linear regression error. Try increasing the penalizer term.")

hazards_.ix[time, id] = v.T
variance_.ix[time, id] = V[:, relevant_individuals][:, 0] ** 2
hazards_.loc[time, id] = v.T
variance_.loc[time, id] = V[:, relevant_individuals][:, 0] ** 2
previous_hazard = v.T

# update progress bar
Expand Down Expand Up @@ -279,8 +279,8 @@ def _fit_varying(self, dataframe, duration_col="T", event_col="E",
except LinAlgError:
print("Linear regression error. Try increasing the penalizer term.")

hazards_.ix[id, time] = v.T
variance_.ix[id, time] = V[:, relevant_individuals][:, 0] ** 2
hazards_.loc[id, time] = v.T
variance_.loc[id, time] = V[:, relevant_individuals][:, 0] ** 2
previous_hazard = v.T

# update progress bar
Expand Down Expand Up @@ -332,10 +332,10 @@ def _compute_confidence_intervals(self):
columns=self.cumulative_hazards_.columns
)

self.confidence_intervals_.ix['upper'] = self.cumulative_hazards_.values + \
self.confidence_intervals_.loc['upper'] = self.cumulative_hazards_.values + \
alpha2 * np.sqrt(self.variance_.cumsum().values)

self.confidence_intervals_.ix['lower'] = self.cumulative_hazards_.values - \
self.confidence_intervals_.loc['lower'] = self.cumulative_hazards_.values - \
alpha2 * np.sqrt(self.variance_.cumsum().values)
return

Expand Down Expand Up @@ -456,8 +456,8 @@ def shaded_plot(ax, x, y, y_upper, y_lower, **kwargs):

for column in columns:
y = get_loc(self.cumulative_hazards_[column]).values
y_upper = get_loc(self.confidence_intervals_[column].ix['upper']).values
y_lower = get_loc(self.confidence_intervals_[column].ix['lower']).values
y_upper = get_loc(self.confidence_intervals_[column].loc['upper']).values
y_lower = get_loc(self.confidence_intervals_[column].loc['lower']).values
shaded_plot(ax, x, y, y_upper, y_lower, label=kwargs.get('label', column))

if legend:
Expand Down
16 changes: 8 additions & 8 deletions lifelines/fitters/coxph_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,8 @@ def _compute_standard_errors(self):
index=['se'], columns=self.hazards_.columns)

def _compute_z_values(self):
return (self.hazards_.ix['coef'] /
self._compute_standard_errors().ix['se'])
return (self.hazards_.loc['coef'] /
self._compute_standard_errors().loc['se'])

def _compute_p_values(self):
U = self._compute_z_values() ** 2
Expand All @@ -359,13 +359,13 @@ def summary(self):
Contains columns coef, exp(coef), se(coef), z, p, lower, upper"""

df = pd.DataFrame(index=self.hazards_.columns)
df['coef'] = self.hazards_.ix['coef'].values
df['exp(coef)'] = exp(self.hazards_.ix['coef'].values)
df['se(coef)'] = self._compute_standard_errors().ix['se'].values
df['coef'] = self.hazards_.loc['coef'].values
df['exp(coef)'] = exp(self.hazards_.loc['coef'].values)
df['se(coef)'] = self._compute_standard_errors().loc['se'].values
df['z'] = self._compute_z_values()
df['p'] = self._compute_p_values()
df['lower %.2f' % self.alpha] = self.confidence_intervals_.ix['lower-bound'].values
df['upper %.2f' % self.alpha] = self.confidence_intervals_.ix['upper-bound'].values
df['lower %.2f' % self.alpha] = self.confidence_intervals_.loc['lower-bound'].values
df['upper %.2f' % self.alpha] = self.confidence_intervals_.loc['upper-bound'].values
return df

def print_summary(self):
Expand Down Expand Up @@ -526,7 +526,7 @@ def _compute_baseline_hazards(self, df, T, E):
baseline_hazards_ = pd.DataFrame(index=self.durations.unique())
for stratum in df.index.unique():
baseline_hazards_ = baseline_hazards_.merge(
self._compute_baseline_hazard(data=df.ix[[stratum]], durations=T.ix[[stratum]], event_observed=E.ix[[stratum]], name=stratum),
self._compute_baseline_hazard(data=df.loc[[stratum]], durations=T.loc[[stratum]], event_observed=E.loc[[stratum]], name=stratum),
left_index=True,
right_index=True,
how='left')
Expand Down
8 changes: 4 additions & 4 deletions lifelines/fitters/exponential_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _compute_standard_errors(self):
index=['se'], columns=['lambda_'])

def _compute_confidence_bounds_of_parameters(self):
se = self._compute_standard_errors().ix['se']
se = self._compute_standard_errors().loc['se']
alpha2 = inv_normal_cdf((1. + self.alpha) / 2.)
return pd.DataFrame([
np.array([self.lambda_]) + alpha2 * se,
Expand All @@ -116,9 +116,9 @@ def summary(self):
lower_upper_bounds = self._compute_confidence_bounds_of_parameters()
df = pd.DataFrame(index=['lambda_'])
df['coef'] = [self.lambda_]
df['se(coef)'] = self._compute_standard_errors().ix['se']
df['lower %.2f' % self.alpha] = lower_upper_bounds.ix['lower-bound']
df['upper %.2f' % self.alpha] = lower_upper_bounds.ix['upper-bound']
df['se(coef)'] = self._compute_standard_errors().loc['se']
df['lower %.2f' % self.alpha] = lower_upper_bounds.loc['lower-bound']
df['upper %.2f' % self.alpha] = lower_upper_bounds.loc['upper-bound']
return df

def print_summary(self):
Expand Down
8 changes: 4 additions & 4 deletions lifelines/fitters/weibull_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def _compute_standard_errors(self):
index=['se'], columns=['lambda_', 'rho_'])

def _compute_confidence_bounds_of_parameters(self):
se = self._compute_standard_errors().ix['se']
se = self._compute_standard_errors().loc['se']
alpha2 = inv_normal_cdf((1. + self.alpha) / 2.)
return pd.DataFrame([
np.array([self.lambda_, self.rho_]) + alpha2 * se,
Expand All @@ -210,9 +210,9 @@ def summary(self):
lower_upper_bounds = self._compute_confidence_bounds_of_parameters()
df = pd.DataFrame(index=['lambda_', 'rho_'])
df['coef'] = [self.lambda_, self.rho_]
df['se(coef)'] = self._compute_standard_errors().ix['se']
df['lower %.2f' % self.alpha] = lower_upper_bounds.ix['lower-bound']
df['upper %.2f' % self.alpha] = lower_upper_bounds.ix['upper-bound']
df['se(coef)'] = self._compute_standard_errors().loc['se']
df['lower %.2f' % self.alpha] = lower_upper_bounds.loc['lower-bound']
df['upper %.2f' % self.alpha] = lower_upper_bounds.loc['upper-bound']
return df

def print_summary(self):
Expand Down
4 changes: 2 additions & 2 deletions lifelines/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def _plot_loglogs(ix=None, iloc=None, show_censors=False, censor_styles=None, **
'mew': 1
}
cs.update(censor_styles)
times = dataframe_slicer(cls.event_table.ix[(cls.event_table['censored'] > 0)]).index.values.astype(float)
times = dataframe_slicer(cls.event_table.loc[(cls.event_table['censored'] > 0)]).index.values.astype(float)
v = cls.predict(times)
# don't log times, as Pandas will take care of all log-scaling later.
ax.plot(times, loglog(v), linestyle='None',
Expand Down Expand Up @@ -362,7 +362,7 @@ def plot(ix=None, iloc=None, show_censors=False,
'mew': 1
}
cs.update(censor_styles)
times = dataframe_slicer(cls.event_table.ix[(cls.event_table['censored'] > 0)]).index.values.astype(float)
times = dataframe_slicer(cls.event_table.loc[(cls.event_table['censored'] > 0)]).index.values.astype(float)
v = cls.predict(times)
ax.plot(times, v, linestyle='None',
color=colour, **cs)
Expand Down
4 changes: 2 additions & 2 deletions lifelines/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ def pairwise_logrank_test(event_durations, groups, event_observed=None,
g1, g2 = unique_groups[[i1, i2]]
ix1, ix2 = (groups == g1), (groups == g2)
test_name = str(g1) + " vs. " + str(g2)
result = logrank_test(event_durations.ix[ix1], event_durations.ix[ix2],
event_observed.ix[ix1], event_observed.ix[ix2],
result = logrank_test(event_durations.loc[ix1], event_durations.loc[ix2],
event_observed.loc[ix1], event_observed.loc[ix2],
alpha=alpha, t_0=t_0, use_bonferroni=bonferroni,
test_name=test_name, **kwargs)
R[i1, i2], R[i2, i1] = result, result
Expand Down
10 changes: 5 additions & 5 deletions lifelines/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def qth_survival_times(q, survival_functions):
assert (q <= 1).all() and (0 <= q).all(), 'q must be between 0 and 1'
survival_functions = pd.DataFrame(survival_functions)
if survival_functions.shape[1] == 1 and q.shape == (1,):
return survival_functions.apply(lambda s: qth_survival_time(q[0], s)).ix[0]
return survival_functions.apply(lambda s: qth_survival_time(q[0], s)).iloc[0]
else:
return pd.DataFrame({_q: survival_functions.apply(lambda s: qth_survival_time(_q, s)) for _q in q})

Expand Down Expand Up @@ -135,9 +135,9 @@ def group_survival_table_from_events(groups, durations, event_observed, birth_ti
data = data.join(survival_table_from_events(T, C, B, columns=columns), how='outer')

data = data.fillna(0)
# hmmm pandas its too bad I can't do data.ix[:limit] and leave out the if.
# hmmm pandas its too bad I can't do data.loc[:limit] and leave out the if.
if int(limit) != -1:
data = data.ix[:limit]
data = data.loc[:limit]

return unique_groups, data.filter(like='removed:'), data.filter(like='observed:'), data.filter(like='censored:')

Expand Down Expand Up @@ -474,8 +474,8 @@ def k_fold_cross_validation(fitters, df, duration_col, event_col=None,
for i in range(1, k + 1):

ix = assignments == i
training_data = df.ix[~ix]
testing_data = df.ix[ix]
training_data = df.loc[~ix]
testing_data = df.loc[ix]

T_actual = testing_data[duration_col].values
E_actual = testing_data[event_col].values
Expand Down
42 changes: 21 additions & 21 deletions tests/test_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,14 @@ def test_predict_method_returns_exact_value_if_given_an_observed_time(self):
kmf = KaplanMeierFitter()
kmf.fit(T)
time = 1
assert abs(kmf.predict(time) - kmf.survival_function_.ix[time].values) < 10e-8
assert abs(kmf.predict(time) - kmf.survival_function_.iloc[time].values) < 10e-8

def test_predict_method_returns_gives_values_prior_to_the_value_in_the_survival_function(self):
T = [1, 2, 3]
kmf = KaplanMeierFitter()
kmf.fit(T)
assert abs(kmf.predict(0.5) - kmf.survival_function_.ix[0].values) < 10e-8
assert abs(kmf.predict(1.9999) - kmf.survival_function_.ix[1].values) < 10e-8
assert abs(kmf.predict(0.5) - kmf.survival_function_.iloc[0].values) < 10e-8
assert abs(kmf.predict(1.9999) - kmf.survival_function_.iloc[1].values) < 10e-8

def test_custom_timeline_can_be_list_or_array(self, positive_sample_lifetimes, univariate_fitters):
T, C = positive_sample_lifetimes
Expand Down Expand Up @@ -397,8 +397,8 @@ def test_kmf_left_censorship_plots(self, block):

kmf = KaplanMeierFitter()
lcd_dataset = load_lcd()
alluvial_fan = lcd_dataset.ix[lcd_dataset['group'] == 'alluvial_fan']
basin_trough = lcd_dataset.ix[lcd_dataset['group'] == 'basin_trough']
alluvial_fan = lcd_dataset.loc[lcd_dataset['group'] == 'alluvial_fan']
basin_trough = lcd_dataset.loc[lcd_dataset['group'] == 'basin_trough']
kmf.fit(alluvial_fan['T'], alluvial_fan['C'], left_censorship=True, label='alluvial_fan')
ax = kmf.plot()

Expand All @@ -413,19 +413,19 @@ def test_kmf_survival_curve_output_against_R(self):
kmf = KaplanMeierFitter()

expected = np.array([[0.909, 0.779]]).T
kmf.fit(df.ix[ix]['time'], df.ix[ix]['event'], timeline=[25, 53])
kmf.fit(df.loc[ix]['time'], df.loc[ix]['event'], timeline=[25, 53])
npt.assert_array_almost_equal(kmf.survival_function_.values, expected, decimal=3)

expected = np.array([[0.833, 0.667, 0.5, 0.333]]).T
kmf.fit(df.ix[~ix]['time'], df.ix[~ix]['event'], timeline=[9, 19, 32, 34])
kmf.fit(df.loc[~ix]['time'], df.loc[~ix]['event'], timeline=[9, 19, 32, 34])
npt.assert_array_almost_equal(kmf.survival_function_.values, expected, decimal=3)

def test_kmf_confidence_intervals_output_against_R(self):
# this uses conf.type = 'log-log'
df = load_g3()
ix = df['group'] != 'RIT'
kmf = KaplanMeierFitter()
kmf.fit(df.ix[ix]['time'], df.ix[ix]['event'], timeline=[9, 19, 32, 34])
kmf.fit(df.loc[ix]['time'], df.loc[ix]['event'], timeline=[9, 19, 32, 34])

expected_lower_bound = np.array([0.2731, 0.1946, 0.1109, 0.0461])
npt.assert_array_almost_equal(kmf.confidence_interval_['KM_estimate_lower_0.95'].values,
Expand Down Expand Up @@ -472,9 +472,9 @@ def test_censor_nelson_aalen(self, sample_lifetimes):
naf.fit(T, C)
npt.assert_almost_equal(naf.cumulative_hazard_.values, self.nelson_aalen(T, C))

def test_ix_slicing(self, waltons_dataset):
def test_loc_slicing(self, waltons_dataset):
naf = NelsonAalenFitter().fit(waltons_dataset['T'])
assert naf.cumulative_hazard_.ix[0:10].shape[0] == 4
assert naf.cumulative_hazard_.loc[0:10].shape[0] == 4

def test_iloc_slicing(self, waltons_dataset):
naf = NelsonAalenFitter().fit(waltons_dataset['T'])
Expand Down Expand Up @@ -569,7 +569,7 @@ def test_duration_vector_can_be_normalized(self, regression_models, rossi):
assert_frame_equal(hazards, hazards_norm)

def test_prediction_methods_respect_index(self, regression_models, rossi):
X = rossi.ix[:3].sort_index(ascending=False)
X = rossi.iloc[:4].sort_index(ascending=False)
expected_index = pd.Index(np.array([3, 2, 1, 0]))

for fitter in regression_models:
Expand Down Expand Up @@ -684,7 +684,7 @@ def test_efron_newtons_method(self, data_nus):
def test_fit_method(self, data_nus):
cf = CoxPHFitter()
cf.fit(data_nus, duration_col='t', event_col='E')
assert np.abs(cf.hazards_.ix[0][0] - -0.0335) < 0.0001
assert np.abs(cf.hazards_.iloc[0][0] - -0.0335) < 0.0001

def test_using_dataframes_vs_numpy_arrays(self, data_pred2):
cf = CoxPHFitter()
Expand Down Expand Up @@ -924,8 +924,8 @@ def test_hazard_works_as_intended_with_strata_against_R_output(self, rossi):
"""
cp = CoxPHFitter()
cp.fit(rossi, 'week', 'arrest', strata=['race', 'paro', 'mar', 'wexp'])
npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 0)].ix[[14, 35, 37, 43, 52]].values, [0.076600555, 0.169748261, 0.272088807, 0.396562717, 0.396562717], decimal=2)
npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 1)].ix[[27, 43, 48, 52]].values, [0.095499001, 0.204196905, 0.338393113, 0.338393113], decimal=2)
npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 0)].loc[[14, 35, 37, 43, 52]].values, [0.076600555, 0.169748261, 0.272088807, 0.396562717, 0.396562717], decimal=2)
npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 1)].loc[[27, 43, 48, 52]].values, [0.095499001, 0.204196905, 0.338393113, 0.338393113], decimal=2)

def test_baseline_survival_is_the_same_indp_of_location(self, regression_dataset):
df = regression_dataset.copy()
Expand Down Expand Up @@ -963,8 +963,8 @@ def test_survival_prediction_is_the_same_indp_of_location(self, regression_datas
cp2.fit(df_demeaned, event_col='E', duration_col='T')

assert_frame_equal(
cp1.predict_survival_function(df.ix[[0]][['var1', 'var2', 'var3']]),
cp2.predict_survival_function(df_demeaned.ix[[0]][['var1', 'var2', 'var3']])
cp1.predict_survival_function(df.iloc[[0]][['var1', 'var2', 'var3']]),
cp2.predict_survival_function(df_demeaned.iloc[[0]][['var1', 'var2', 'var3']])
)

def test_baseline_survival_is_the_same_indp_of_scale(self, regression_dataset):
Expand All @@ -991,8 +991,8 @@ def test_survival_prediction_is_the_same_indp_of_scale(self, regression_dataset)
cp2.fit(df_scaled, event_col='E', duration_col='T')

assert_frame_equal(
cp1.predict_survival_function(df.ix[[0]][['var1', 'var2', 'var3']]),
cp2.predict_survival_function(df_scaled.ix[[0]][['var1', 'var2', 'var3']])
cp1.predict_survival_function(df.iloc[[0]][['var1', 'var2', 'var3']]),
cp2.predict_survival_function(df_scaled.iloc[[0]][['var1', 'var2', 'var3']])
)

def test_predict_log_hazard_relative_to_mean(self, rossi):
Expand Down Expand Up @@ -1156,7 +1156,7 @@ def test_aalen_additive_fit_no_censor(self, block):
for i in range(d + 1):
ax = plt.subplot(d + 1, 1, i + 1)
col = cumulative_hazards.columns[i]
ax = cumulative_hazards[col].ix[:15].plot(legend=False, ax=ax)
ax = cumulative_hazards[col].loc[:15].plot(legend=False, ax=ax)
ax = aaf.plot(ix=slice(0, 15), ax=ax, columns=[col], legend=False)
plt.show(block=block)
return
Expand Down Expand Up @@ -1186,7 +1186,7 @@ def test_aalen_additive_fit_with_censor(self, block):
for i in range(d + 1):
ax = plt.subplot(d + 1, 1, i + 1)
col = cumulative_hazards.columns[i]
ax = cumulative_hazards[col].ix[:15].plot(legend=False, ax=ax)
ax = cumulative_hazards[col].loc[:15].plot(legend=False, ax=ax)
ax = aaf.plot(ix=slice(0, 15), ax=ax, columns=[col], legend=False)
plt.show(block=block)
return
Expand Down Expand Up @@ -1215,7 +1215,7 @@ def test_crossval_for_aalen_add(self, data_pred2, data_pred1):
def test_predict_cumulative_hazard_inputs(self, data_pred1):
aaf = AalenAdditiveFitter()
aaf.fit(data_pred1, duration_col='t', event_col='E',)
x = data_pred1.ix[:5].drop(['t', 'E'], axis=1)
x = data_pred1.iloc[:5].drop(['t', 'E'], axis=1)
y_df = aaf.predict_cumulative_hazard(x)
y_np = aaf.predict_cumulative_hazard(x.values)
assert_frame_equal(y_df, y_np)
8 changes: 4 additions & 4 deletions tests/test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def test_unequal_intensity_with_random_data():
def test_logrank_test_output_against_R_1():
df = load_g3()
ix = (df['group'] == 'RIT')
d1, e1 = df.ix[ix]['time'], df.ix[ix]['event']
d2, e2 = df.ix[~ix]['time'], df.ix[~ix]['event']
d1, e1 = df.loc[ix]['time'], df.loc[ix]['event']
d2, e2 = df.loc[~ix]['time'], df.loc[~ix]['event']

expected = 0.0138
result = stats.logrank_test(d1, d2, event_observed_A=e1, event_observed_B=e2)
Expand Down Expand Up @@ -115,8 +115,8 @@ def test_unequal_intensity_with_negative_data():
def test_waltons_dataset():
df = load_waltons()
ix = df['group'] == 'miR-137'
waltonT1 = df.ix[ix]['T']
waltonT2 = df.ix[~ix]['T']
waltonT1 = df.loc[ix]['T']
waltonT2 = df.loc[~ix]['T']
result = stats.logrank_test(waltonT1, waltonT2)
assert result.is_significant

Expand Down
Loading

0 comments on commit 65d03e5

Please sign in to comment.