Merge pull request #300 from CamDavidsonPilon/dump-ix

dump .ix from pandas as it is deprecated
CamDavidsonPilon · Jun 11, 2017 · 65d03e5 · 65d03e5
2 parents 149142d + 78630c2
commit 65d03e5
Show file tree

Hide file tree

Showing 13 changed files with 63 additions and 63 deletions.
diff --git a/docs/Quickstart.rst b/docs/Quickstart.rst
@@ -169,7 +169,7 @@ After fitting, you'll have access to properties like ``cumulative_hazards_`` and
 .. code:: python
     
     x = regression_dataset[regression_dataset.columns - ['E', 'T']]
-    aaf.predict_survival_function(x.ix[10:12]).plot()  # get the unique survival functions of the first two subjects 
+    aaf.predict_survival_function(x.iloc[10:12]).plot()  # get the unique survival functions of the first two subjects 
 
 .. image:: images/quickstart_predict_aaf.png  
 

diff --git a/docs/Survival Regression.rst b/docs/Survival Regression.rst
@@ -358,7 +358,7 @@ Prime Minister Stephen Harper.
 .. code:: python
 
     ix = (data['ctryname'] == 'Canada') * (data['start_year'] == 2006)
-    harper = X.ix[ix]
+    harper = X.loc[ix]
     print "Harper's unique data point", harper
 
 .. parsed-literal::

diff --git a/lifelines/fitters/__init__.py b/lifelines/fitters/__init__.py
@@ -85,7 +85,7 @@ def _predict(self, estimate, label):
           """ % (class_name, class_name)
 
         def predict(time):
-            predictor = lambda t: getattr(self, estimate).ix[:t].iloc[-1][label]
+            predictor = lambda t: getattr(self, estimate).loc[:t].iloc[-1][label]
             try:
                 return np.array([predictor(t) for t in time])
             except TypeError:

diff --git a/lifelines/fitters/aalen_additive_fitter.py b/lifelines/fitters/aalen_additive_fitter.py
@@ -189,8 +189,8 @@ def _fit_static(self, dataframe, duration_col, event_col=None,
             except LinAlgError:
                 print("Linear regression error. Try increasing the penalizer term.")
 
-            hazards_.ix[time, id] = v.T
-            variance_.ix[time, id] = V[:, relevant_individuals][:, 0] ** 2
+            hazards_.loc[time, id] = v.T
+            variance_.loc[time, id] = V[:, relevant_individuals][:, 0] ** 2
             previous_hazard = v.T
 
             # update progress bar
@@ -279,8 +279,8 @@ def _fit_varying(self, dataframe, duration_col="T", event_col="E",
             except LinAlgError:
                 print("Linear regression error. Try increasing the penalizer term.")
 
-            hazards_.ix[id, time] = v.T
-            variance_.ix[id, time] = V[:, relevant_individuals][:, 0] ** 2
+            hazards_.loc[id, time] = v.T
+            variance_.loc[id, time] = V[:, relevant_individuals][:, 0] ** 2
             previous_hazard = v.T
 
             # update progress bar
@@ -332,10 +332,10 @@ def _compute_confidence_intervals(self):
                                                   columns=self.cumulative_hazards_.columns
                                                   )
 
-        self.confidence_intervals_.ix['upper'] = self.cumulative_hazards_.values + \
+        self.confidence_intervals_.loc['upper'] = self.cumulative_hazards_.values + \
             alpha2 * np.sqrt(self.variance_.cumsum().values)
 
-        self.confidence_intervals_.ix['lower'] = self.cumulative_hazards_.values - \
+        self.confidence_intervals_.loc['lower'] = self.cumulative_hazards_.values - \
             alpha2 * np.sqrt(self.variance_.cumsum().values)
         return
 
@@ -456,8 +456,8 @@ def shaded_plot(ax, x, y, y_upper, y_lower, **kwargs):
 
         for column in columns:
             y = get_loc(self.cumulative_hazards_[column]).values
-            y_upper = get_loc(self.confidence_intervals_[column].ix['upper']).values
-            y_lower = get_loc(self.confidence_intervals_[column].ix['lower']).values
+            y_upper = get_loc(self.confidence_intervals_[column].loc['upper']).values
+            y_lower = get_loc(self.confidence_intervals_[column].loc['lower']).values
             shaded_plot(ax, x, y, y_upper, y_lower, label=kwargs.get('label', column))
 
         if legend:

diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py
@@ -341,8 +341,8 @@ def _compute_standard_errors(self):
                             index=['se'], columns=self.hazards_.columns)
 
     def _compute_z_values(self):
-        return (self.hazards_.ix['coef'] /
-                self._compute_standard_errors().ix['se'])
+        return (self.hazards_.loc['coef'] /
+                self._compute_standard_errors().loc['se'])
 
     def _compute_p_values(self):
         U = self._compute_z_values() ** 2
@@ -359,13 +359,13 @@ def summary(self):
             Contains columns coef, exp(coef), se(coef), z, p, lower, upper"""
 
         df = pd.DataFrame(index=self.hazards_.columns)
-        df['coef'] = self.hazards_.ix['coef'].values
-        df['exp(coef)'] = exp(self.hazards_.ix['coef'].values)
-        df['se(coef)'] = self._compute_standard_errors().ix['se'].values
+        df['coef'] = self.hazards_.loc['coef'].values
+        df['exp(coef)'] = exp(self.hazards_.loc['coef'].values)
+        df['se(coef)'] = self._compute_standard_errors().loc['se'].values
         df['z'] = self._compute_z_values()
         df['p'] = self._compute_p_values()
-        df['lower %.2f' % self.alpha] = self.confidence_intervals_.ix['lower-bound'].values
-        df['upper %.2f' % self.alpha] = self.confidence_intervals_.ix['upper-bound'].values
+        df['lower %.2f' % self.alpha] = self.confidence_intervals_.loc['lower-bound'].values
+        df['upper %.2f' % self.alpha] = self.confidence_intervals_.loc['upper-bound'].values
         return df
 
     def print_summary(self):
@@ -526,7 +526,7 @@ def _compute_baseline_hazards(self, df, T, E):
             baseline_hazards_ = pd.DataFrame(index=self.durations.unique())
             for stratum in df.index.unique():
                 baseline_hazards_ = baseline_hazards_.merge(
-                    self._compute_baseline_hazard(data=df.ix[[stratum]], durations=T.ix[[stratum]], event_observed=E.ix[[stratum]], name=stratum),
+                    self._compute_baseline_hazard(data=df.loc[[stratum]], durations=T.loc[[stratum]], event_observed=E.loc[[stratum]], name=stratum),
                     left_index=True,
                     right_index=True,
                     how='left')

diff --git a/lifelines/fitters/exponential_fitter.py b/lifelines/fitters/exponential_fitter.py
@@ -97,7 +97,7 @@ def _compute_standard_errors(self):
                             index=['se'], columns=['lambda_'])
 
     def _compute_confidence_bounds_of_parameters(self):
-        se = self._compute_standard_errors().ix['se']
+        se = self._compute_standard_errors().loc['se']
         alpha2 = inv_normal_cdf((1. + self.alpha) / 2.)
         return pd.DataFrame([
             np.array([self.lambda_]) + alpha2 * se,
@@ -116,9 +116,9 @@ def summary(self):
         lower_upper_bounds = self._compute_confidence_bounds_of_parameters()
         df = pd.DataFrame(index=['lambda_'])
         df['coef'] = [self.lambda_]
-        df['se(coef)'] = self._compute_standard_errors().ix['se']
-        df['lower %.2f' % self.alpha] = lower_upper_bounds.ix['lower-bound']
-        df['upper %.2f' % self.alpha] = lower_upper_bounds.ix['upper-bound']
+        df['se(coef)'] = self._compute_standard_errors().loc['se']
+        df['lower %.2f' % self.alpha] = lower_upper_bounds.loc['lower-bound']
+        df['upper %.2f' % self.alpha] = lower_upper_bounds.loc['upper-bound']
         return df
 
     def print_summary(self):

diff --git a/lifelines/fitters/weibull_fitter.py b/lifelines/fitters/weibull_fitter.py
@@ -191,7 +191,7 @@ def _compute_standard_errors(self):
                             index=['se'], columns=['lambda_', 'rho_'])
 
     def _compute_confidence_bounds_of_parameters(self):
-        se = self._compute_standard_errors().ix['se']
+        se = self._compute_standard_errors().loc['se']
         alpha2 = inv_normal_cdf((1. + self.alpha) / 2.)
         return pd.DataFrame([
             np.array([self.lambda_, self.rho_]) + alpha2 * se,
@@ -210,9 +210,9 @@ def summary(self):
         lower_upper_bounds = self._compute_confidence_bounds_of_parameters()
         df = pd.DataFrame(index=['lambda_', 'rho_'])
         df['coef'] = [self.lambda_, self.rho_]
-        df['se(coef)'] = self._compute_standard_errors().ix['se']
-        df['lower %.2f' % self.alpha] = lower_upper_bounds.ix['lower-bound']
-        df['upper %.2f' % self.alpha] = lower_upper_bounds.ix['upper-bound']
+        df['se(coef)'] = self._compute_standard_errors().loc['se']
+        df['lower %.2f' % self.alpha] = lower_upper_bounds.loc['lower-bound']
+        df['upper %.2f' % self.alpha] = lower_upper_bounds.loc['upper-bound']
         return df
 
     def print_summary(self):

diff --git a/lifelines/plotting.py b/lifelines/plotting.py
@@ -276,7 +276,7 @@ def _plot_loglogs(ix=None, iloc=None, show_censors=False, censor_styles=None, **
                 'mew': 1
             }
             cs.update(censor_styles)
-            times = dataframe_slicer(cls.event_table.ix[(cls.event_table['censored'] > 0)]).index.values.astype(float)
+            times = dataframe_slicer(cls.event_table.loc[(cls.event_table['censored'] > 0)]).index.values.astype(float)
             v = cls.predict(times)
             # don't log times, as Pandas will take care of all log-scaling later.
             ax.plot(times, loglog(v), linestyle='None',
@@ -362,7 +362,7 @@ def plot(ix=None, iloc=None, show_censors=False,
                 'mew': 1
             }
             cs.update(censor_styles)
-            times = dataframe_slicer(cls.event_table.ix[(cls.event_table['censored'] > 0)]).index.values.astype(float)
+            times = dataframe_slicer(cls.event_table.loc[(cls.event_table['censored'] > 0)]).index.values.astype(float)
             v = cls.predict(times)
             ax.plot(times, v, linestyle='None',
                     color=colour, **cs)

diff --git a/lifelines/statistics.py b/lifelines/statistics.py
@@ -152,8 +152,8 @@ def pairwise_logrank_test(event_durations, groups, event_observed=None,
         g1, g2 = unique_groups[[i1, i2]]
         ix1, ix2 = (groups == g1), (groups == g2)
         test_name = str(g1) + " vs. " + str(g2)
-        result = logrank_test(event_durations.ix[ix1], event_durations.ix[ix2],
-                              event_observed.ix[ix1], event_observed.ix[ix2],
+        result = logrank_test(event_durations.loc[ix1], event_durations.loc[ix2],
+                              event_observed.loc[ix1], event_observed.loc[ix2],
                               alpha=alpha, t_0=t_0, use_bonferroni=bonferroni,
                               test_name=test_name, **kwargs)
         R[i1, i2], R[i2, i1] = result, result

diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py
@@ -37,7 +37,7 @@ def qth_survival_times(q, survival_functions):
     assert (q <= 1).all() and (0 <= q).all(), 'q must be between 0 and 1'
     survival_functions = pd.DataFrame(survival_functions)
     if survival_functions.shape[1] == 1 and q.shape == (1,):
-        return survival_functions.apply(lambda s: qth_survival_time(q[0], s)).ix[0]
+        return survival_functions.apply(lambda s: qth_survival_time(q[0], s)).iloc[0]
     else:
         return pd.DataFrame({_q: survival_functions.apply(lambda s: qth_survival_time(_q, s)) for _q in q})
 
@@ -135,9 +135,9 @@ def group_survival_table_from_events(groups, durations, event_observed, birth_ti
             data = data.join(survival_table_from_events(T, C, B, columns=columns), how='outer')
 
     data = data.fillna(0)
-    # hmmm pandas its too bad I can't do data.ix[:limit] and leave out the if.
+    # hmmm pandas its too bad I can't do data.loc[:limit] and leave out the if.
     if int(limit) != -1:
-        data = data.ix[:limit]
+        data = data.loc[:limit]
 
     return unique_groups, data.filter(like='removed:'), data.filter(like='observed:'), data.filter(like='censored:')
 
@@ -474,8 +474,8 @@ def k_fold_cross_validation(fitters, df, duration_col, event_col=None,
     for i in range(1, k + 1):
 
         ix = assignments == i
-        training_data = df.ix[~ix]
-        testing_data = df.ix[ix]
+        training_data = df.loc[~ix]
+        testing_data = df.loc[ix]
 
         T_actual = testing_data[duration_col].values
         E_actual = testing_data[event_col].values

diff --git a/tests/test_estimation.py b/tests/test_estimation.py
@@ -148,14 +148,14 @@ def test_predict_method_returns_exact_value_if_given_an_observed_time(self):
         kmf = KaplanMeierFitter()
         kmf.fit(T)
         time = 1
-        assert abs(kmf.predict(time) - kmf.survival_function_.ix[time].values) < 10e-8
+        assert abs(kmf.predict(time) - kmf.survival_function_.iloc[time].values) < 10e-8
 
     def test_predict_method_returns_gives_values_prior_to_the_value_in_the_survival_function(self):
         T = [1, 2, 3]
         kmf = KaplanMeierFitter()
         kmf.fit(T)
-        assert abs(kmf.predict(0.5) - kmf.survival_function_.ix[0].values) < 10e-8
-        assert abs(kmf.predict(1.9999) - kmf.survival_function_.ix[1].values) < 10e-8
+        assert abs(kmf.predict(0.5) - kmf.survival_function_.iloc[0].values) < 10e-8
+        assert abs(kmf.predict(1.9999) - kmf.survival_function_.iloc[1].values) < 10e-8
 
     def test_custom_timeline_can_be_list_or_array(self, positive_sample_lifetimes, univariate_fitters):
         T, C = positive_sample_lifetimes
@@ -397,8 +397,8 @@ def test_kmf_left_censorship_plots(self, block):
 
         kmf = KaplanMeierFitter()
         lcd_dataset = load_lcd()
-        alluvial_fan = lcd_dataset.ix[lcd_dataset['group'] == 'alluvial_fan']
-        basin_trough = lcd_dataset.ix[lcd_dataset['group'] == 'basin_trough']
+        alluvial_fan = lcd_dataset.loc[lcd_dataset['group'] == 'alluvial_fan']
+        basin_trough = lcd_dataset.loc[lcd_dataset['group'] == 'basin_trough']
         kmf.fit(alluvial_fan['T'], alluvial_fan['C'], left_censorship=True, label='alluvial_fan')
         ax = kmf.plot()
 
@@ -413,19 +413,19 @@ def test_kmf_survival_curve_output_against_R(self):
         kmf = KaplanMeierFitter()
 
         expected = np.array([[0.909, 0.779]]).T
-        kmf.fit(df.ix[ix]['time'], df.ix[ix]['event'], timeline=[25, 53])
+        kmf.fit(df.loc[ix]['time'], df.loc[ix]['event'], timeline=[25, 53])
         npt.assert_array_almost_equal(kmf.survival_function_.values, expected, decimal=3)
 
         expected = np.array([[0.833, 0.667, 0.5, 0.333]]).T
-        kmf.fit(df.ix[~ix]['time'], df.ix[~ix]['event'], timeline=[9, 19, 32, 34])
+        kmf.fit(df.loc[~ix]['time'], df.loc[~ix]['event'], timeline=[9, 19, 32, 34])
         npt.assert_array_almost_equal(kmf.survival_function_.values, expected, decimal=3)
 
     def test_kmf_confidence_intervals_output_against_R(self):
         # this uses conf.type = 'log-log'
         df = load_g3()
         ix = df['group'] != 'RIT'
         kmf = KaplanMeierFitter()
-        kmf.fit(df.ix[ix]['time'], df.ix[ix]['event'], timeline=[9, 19, 32, 34])
+        kmf.fit(df.loc[ix]['time'], df.loc[ix]['event'], timeline=[9, 19, 32, 34])
 
         expected_lower_bound = np.array([0.2731, 0.1946, 0.1109, 0.0461])
         npt.assert_array_almost_equal(kmf.confidence_interval_['KM_estimate_lower_0.95'].values,
@@ -472,9 +472,9 @@ def test_censor_nelson_aalen(self, sample_lifetimes):
         naf.fit(T, C)
         npt.assert_almost_equal(naf.cumulative_hazard_.values, self.nelson_aalen(T, C))
 
-    def test_ix_slicing(self, waltons_dataset):
+    def test_loc_slicing(self, waltons_dataset):
         naf = NelsonAalenFitter().fit(waltons_dataset['T'])
-        assert naf.cumulative_hazard_.ix[0:10].shape[0] == 4
+        assert naf.cumulative_hazard_.loc[0:10].shape[0] == 4
 
     def test_iloc_slicing(self, waltons_dataset):
         naf = NelsonAalenFitter().fit(waltons_dataset['T'])
@@ -569,7 +569,7 @@ def test_duration_vector_can_be_normalized(self, regression_models, rossi):
             assert_frame_equal(hazards, hazards_norm)
 
     def test_prediction_methods_respect_index(self, regression_models, rossi):
-        X = rossi.ix[:3].sort_index(ascending=False)
+        X = rossi.iloc[:4].sort_index(ascending=False)
         expected_index = pd.Index(np.array([3, 2, 1, 0]))
 
         for fitter in regression_models:
@@ -684,7 +684,7 @@ def test_efron_newtons_method(self, data_nus):
     def test_fit_method(self, data_nus):
         cf = CoxPHFitter()
         cf.fit(data_nus, duration_col='t', event_col='E')
-        assert np.abs(cf.hazards_.ix[0][0] - -0.0335) < 0.0001
+        assert np.abs(cf.hazards_.iloc[0][0] - -0.0335) < 0.0001
 
     def test_using_dataframes_vs_numpy_arrays(self, data_pred2):
         cf = CoxPHFitter()
@@ -924,8 +924,8 @@ def test_hazard_works_as_intended_with_strata_against_R_output(self, rossi):
         """
         cp = CoxPHFitter()
         cp.fit(rossi, 'week', 'arrest', strata=['race', 'paro', 'mar', 'wexp'])
-        npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 0)].ix[[14, 35, 37, 43, 52]].values, [0.076600555, 0.169748261, 0.272088807, 0.396562717, 0.396562717], decimal=2)
-        npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 1)].ix[[27, 43, 48, 52]].values, [0.095499001, 0.204196905, 0.338393113, 0.338393113], decimal=2)
+        npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 0)].loc[[14, 35, 37, 43, 52]].values, [0.076600555, 0.169748261, 0.272088807, 0.396562717, 0.396562717], decimal=2)
+        npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 1)].loc[[27, 43, 48, 52]].values, [0.095499001, 0.204196905, 0.338393113, 0.338393113], decimal=2)
 
     def test_baseline_survival_is_the_same_indp_of_location(self, regression_dataset):
         df = regression_dataset.copy()
@@ -963,8 +963,8 @@ def test_survival_prediction_is_the_same_indp_of_location(self, regression_datas
         cp2.fit(df_demeaned, event_col='E', duration_col='T')
 
         assert_frame_equal(
-            cp1.predict_survival_function(df.ix[[0]][['var1', 'var2', 'var3']]),
-            cp2.predict_survival_function(df_demeaned.ix[[0]][['var1', 'var2', 'var3']])
+            cp1.predict_survival_function(df.iloc[[0]][['var1', 'var2', 'var3']]),
+            cp2.predict_survival_function(df_demeaned.iloc[[0]][['var1', 'var2', 'var3']])
         )
 
     def test_baseline_survival_is_the_same_indp_of_scale(self, regression_dataset):
@@ -991,8 +991,8 @@ def test_survival_prediction_is_the_same_indp_of_scale(self, regression_dataset)
         cp2.fit(df_scaled, event_col='E', duration_col='T')
 
         assert_frame_equal(
-            cp1.predict_survival_function(df.ix[[0]][['var1', 'var2', 'var3']]),
-            cp2.predict_survival_function(df_scaled.ix[[0]][['var1', 'var2', 'var3']])
+            cp1.predict_survival_function(df.iloc[[0]][['var1', 'var2', 'var3']]),
+            cp2.predict_survival_function(df_scaled.iloc[[0]][['var1', 'var2', 'var3']])
         )
 
     def test_predict_log_hazard_relative_to_mean(self, rossi):
@@ -1156,7 +1156,7 @@ def test_aalen_additive_fit_no_censor(self, block):
         for i in range(d + 1):
             ax = plt.subplot(d + 1, 1, i + 1)
             col = cumulative_hazards.columns[i]
-            ax = cumulative_hazards[col].ix[:15].plot(legend=False, ax=ax)
+            ax = cumulative_hazards[col].loc[:15].plot(legend=False, ax=ax)
             ax = aaf.plot(ix=slice(0, 15), ax=ax, columns=[col], legend=False)
         plt.show(block=block)
         return
@@ -1186,7 +1186,7 @@ def test_aalen_additive_fit_with_censor(self, block):
         for i in range(d + 1):
             ax = plt.subplot(d + 1, 1, i + 1)
             col = cumulative_hazards.columns[i]
-            ax = cumulative_hazards[col].ix[:15].plot(legend=False, ax=ax)
+            ax = cumulative_hazards[col].loc[:15].plot(legend=False, ax=ax)
             ax = aaf.plot(ix=slice(0, 15), ax=ax, columns=[col], legend=False)
         plt.show(block=block)
         return
@@ -1215,7 +1215,7 @@ def test_crossval_for_aalen_add(self, data_pred2, data_pred1):
     def test_predict_cumulative_hazard_inputs(self, data_pred1):
         aaf = AalenAdditiveFitter()
         aaf.fit(data_pred1, duration_col='t', event_col='E',)
-        x = data_pred1.ix[:5].drop(['t', 'E'], axis=1)
+        x = data_pred1.iloc[:5].drop(['t', 'E'], axis=1)
         y_df = aaf.predict_cumulative_hazard(x)
         y_np = aaf.predict_cumulative_hazard(x.values)
         assert_frame_equal(y_df, y_np)
diff --git a/tests/test_statistics.py b/tests/test_statistics.py
@@ -27,8 +27,8 @@ def test_unequal_intensity_with_random_data():
 def test_logrank_test_output_against_R_1():
     df = load_g3()
     ix = (df['group'] == 'RIT')
-    d1, e1 = df.ix[ix]['time'], df.ix[ix]['event']
-    d2, e2 = df.ix[~ix]['time'], df.ix[~ix]['event']
+    d1, e1 = df.loc[ix]['time'], df.loc[ix]['event']
+    d2, e2 = df.loc[~ix]['time'], df.loc[~ix]['event']
 
     expected = 0.0138
     result = stats.logrank_test(d1, d2, event_observed_A=e1, event_observed_B=e2)
@@ -115,8 +115,8 @@ def test_unequal_intensity_with_negative_data():
 def test_waltons_dataset():
     df = load_waltons()
     ix = df['group'] == 'miR-137'
-    waltonT1 = df.ix[ix]['T']
-    waltonT2 = df.ix[~ix]['T']
+    waltonT1 = df.loc[ix]['T']
+    waltonT2 = df.loc[~ix]['T']
     result = stats.logrank_test(waltonT1, waltonT2)
     assert result.is_significant