MNT Replaces numpy alias with builtin typse (scikit-learn#17687)

* MNT Replaces numpy alias with builtin typse * STY Lint error
DanielAtKrypton · Jun 24, 2020 · 7cc0177 · 7cc0177
1 parent e5b99ea
commit 7cc0177
Show file tree

Hide file tree

Showing 101 changed files with 249 additions and 249 deletions.
diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py
@@ -76,7 +76,7 @@ def load_data(dtype=np.float32, order='C', random_state=13):
     data = fetch_covtype(download_if_missing=True, shuffle=True,
                          random_state=random_state)
     X = check_array(data['data'], dtype=dtype, order=order)
-    y = (data['target'] != 1).astype(np.int)
+    y = (data['target'] != 1).astype(int)
 
     # Create train-test split (as [Joachims, 2006])
     print("Creating train-test split...")

diff --git a/benchmarks/bench_hist_gradient_boosting.py b/benchmarks/bench_hist_gradient_boosting.py
@@ -61,7 +61,7 @@ def get_estimator_and_data():
 X, y, Estimator = get_estimator_and_data()
 if args.missing_fraction:
     mask = np.random.binomial(1, args.missing_fraction, size=X.shape).astype(
-        np.bool)
+        bool)
     X[mask] = np.nan
 
 if args.random_sample_weights:

diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py
@@ -64,7 +64,7 @@ def compute_bench(alpha, n_samples, n_features, precompute):
     alpha = 0.01  # regularization parameter
 
     n_features = 10
-    list_n_samples = np.linspace(100, 1000000, 5).astype(np.int)
+    list_n_samples = np.linspace(100, 1000000, 5).astype(int)
     lasso_results, lars_lasso_results = compute_bench(alpha, list_n_samples,
                                             [n_features], precompute=True)
 
@@ -82,7 +82,7 @@ def compute_bench(alpha, n_samples, n_features, precompute):
     plt.axis('tight')
 
     n_samples = 2000
-    list_n_features = np.linspace(500, 3000, 5).astype(np.int)
+    list_n_features = np.linspace(500, 3000, 5).astype(int)
     lasso_results, lars_lasso_results = compute_bench(alpha, [n_samples],
                                            list_n_features, precompute=False)
     plt.subplot(212)

diff --git a/benchmarks/bench_plot_fastkmeans.py b/benchmarks/bench_plot_fastkmeans.py
@@ -93,9 +93,9 @@ def compute_bench_2(chunks):
     from mpl_toolkits.mplot3d import axes3d  # register the 3d projection
     import matplotlib.pyplot as plt
 
-    samples_range = np.linspace(50, 150, 5).astype(np.int)
-    features_range = np.linspace(150, 50000, 5).astype(np.int)
-    chunks = np.linspace(500, 10000, 15).astype(np.int)
+    samples_range = np.linspace(50, 150, 5).astype(int)
+    features_range = np.linspace(150, 50000, 5).astype(int)
+    chunks = np.linspace(500, 10000, 15).astype(int)
 
     results = compute_bench(samples_range, features_range)
     results_2 = compute_bench_2(chunks)

diff --git a/benchmarks/bench_plot_hierarchical.py b/benchmarks/bench_plot_hierarchical.py
@@ -43,7 +43,7 @@ def compute_bench(samples_range, features_range):
 if __name__ == '__main__':
     import matplotlib.pyplot as plt
 
-    samples_range = np.linspace(1000, 15000, 8).astype(np.int)
+    samples_range = np.linspace(1000, 15000, 8).astype(int)
     features_range = np.array([2, 10, 20, 50])
 
     results = compute_bench(samples_range, features_range)

diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py
@@ -84,8 +84,8 @@ def compute_bench(samples_range, features_range):
     from mpl_toolkits.mplot3d import axes3d  # register the 3d projection
     import matplotlib.pyplot as plt
 
-    samples_range = np.linspace(10, 2000, 5).astype(np.int)
-    features_range = np.linspace(10, 2000, 5).astype(np.int)
+    samples_range = np.linspace(10, 2000, 5).astype(int)
+    features_range = np.linspace(10, 2000, 5).astype(int)
     results = compute_bench(samples_range, features_range)
 
     max_time = max(max(t) for t in results.values())

diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py
@@ -99,8 +99,8 @@ def compute_bench(samples_range, features_range):
 
 
 if __name__ == '__main__':
-    samples_range = np.linspace(1000, 5000, 5).astype(np.int)
-    features_range = np.linspace(1000, 5000, 5).astype(np.int)
+    samples_range = np.linspace(1000, 5000, 5).astype(int)
+    features_range = np.linspace(1000, 5000, 5).astype(int)
     results = compute_bench(samples_range, features_range)
     max_time = max(np.max(t) for t in results.values())
 

diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py
@@ -102,7 +102,7 @@
 
 # Determine when to switch to batch computation for matrix norms,
 # in case the reconstructed (dense) matrix is too large
-MAX_MEMORY = np.int(2e9)
+MAX_MEMORY = int(2e9)
 
 # The following datasets can be downloaded manually from:
 # CIFAR 10: https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
@@ -154,17 +154,17 @@ def get_data(dataset_name):
         del X1
         del X2
     elif dataset_name == 'low rank matrix':
-        X = make_low_rank_matrix(n_samples=500, n_features=np.int(1e4),
+        X = make_low_rank_matrix(n_samples=500, n_features=int(1e4),
                                  effective_rank=100, tail_strength=.5,
                                  random_state=random_state)
     elif dataset_name == 'uncorrelated matrix':
         X, _ = make_sparse_uncorrelated(n_samples=500, n_features=10000,
                                         random_state=random_state)
     elif dataset_name == 'big sparse matrix':
-        sparsity = np.int(1e6)
-        size = np.int(1e6)
-        small_size = np.int(1e4)
-        data = np.random.normal(0, 1, np.int(sparsity/10))
+        sparsity = int(1e6)
+        size = int(1e6)
+        small_size = int(1e4)
+        data = np.random.normal(0, 1, int(sparsity/10))
         data = np.repeat(data, 10)
         row = np.random.uniform(0, small_size, sparsity)
         col = np.random.uniform(0, small_size, sparsity)
@@ -356,7 +356,7 @@ def bench_b(power_list):
             X_spectral_norm = norm_diff(X, norm=2, msg=False)
         X_fro_norm = norm_diff(X, norm='fro', msg=False)
 
-        for n_comp in [np.int(rank/2), rank, rank*2]:
+        for n_comp in [int(rank/2), rank, rank*2]:
             label = "rank=%d, n_comp=%d" % (rank, n_comp)
             print(label)
             for pi in power_list:

diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py
@@ -57,8 +57,8 @@ def compute_bench(samples_range, features_range, n_iter=3, rank=50):
     from mpl_toolkits.mplot3d import axes3d  # register the 3d projection
     import matplotlib.pyplot as plt
 
-    samples_range = np.linspace(2, 1000, 4).astype(np.int)
-    features_range = np.linspace(2, 1000, 4).astype(np.int)
+    samples_range = np.linspace(2, 1000, 4).astype(int)
+    features_range = np.linspace(2, 1000, 4).astype(int)
     results = compute_bench(samples_range, features_range)
 
     label = 'scikit-learn singular value decomposition benchmark results'

diff --git a/benchmarks/bench_plot_ward.py b/benchmarks/bench_plot_ward.py
@@ -35,9 +35,9 @@
 plt.imshow(np.log(ratio), aspect='auto', origin="lower")
 plt.colorbar()
 plt.contour(ratio, levels=[1, ], colors='k')
-plt.yticks(range(len(n_features)), n_features.astype(np.int))
+plt.yticks(range(len(n_features)), n_features.astype(int))
 plt.ylabel('N features')
-plt.xticks(range(len(n_samples)), n_samples.astype(np.int))
+plt.xticks(range(len(n_samples)), n_samples.astype(int))
 plt.xlabel('N samples')
 plt.title("Scikit's time, in units of scipy time (log)")
 plt.show()
diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
@@ -139,7 +139,7 @@ def bench_sample(sampling, n_population, n_samples):
     ###########################################################################
     time = {}
     n_samples = np.linspace(start=0, stop=opts.n_population,
-        num=opts.n_steps).astype(np.int)
+                            num=opts.n_steps).astype(int)
 
     ratio = n_samples / opts.n_population
 

diff --git a/benchmarks/bench_sgd_regression.py b/benchmarks/bench_sgd_regression.py
@@ -22,7 +22,7 @@
 print(__doc__)
 
 if __name__ == "__main__":
-    list_n_samples = np.linspace(100, 10000, 5).astype(np.int)
+    list_n_samples = np.linspace(100, 10000, 5).astype(int)
     list_n_features = [10, 100, 1000]
     n_test = 1000
     max_iter = 1000

diff --git a/examples/applications/plot_prediction_latency.py b/examples/applications/plot_prediction_latency.py
@@ -41,7 +41,7 @@ def _not_in_sphinx():
 def atomic_benchmark_estimator(estimator, X_test, verbose=False):
     """Measure runtime prediction of each instance."""
     n_instances = X_test.shape[0]
-    runtimes = np.zeros(n_instances, dtype=np.float)
+    runtimes = np.zeros(n_instances, dtype=float)
     for i in range(n_instances):
         instance = X_test[[i], :]
         start = time.time()
@@ -56,7 +56,7 @@ def atomic_benchmark_estimator(estimator, X_test, verbose=False):
 def bulk_benchmark_estimator(estimator, X_test, n_bulk_repeats, verbose):
     """Measure runtime prediction of the whole input."""
     n_instances = X_test.shape[0]
-    runtimes = np.zeros(n_bulk_repeats, dtype=np.float)
+    runtimes = np.zeros(n_bulk_repeats, dtype=float)
     for i in range(n_bulk_repeats):
         start = time.time()
         estimator.predict(X_test)

diff --git a/examples/applications/plot_tomography_l1_reconstruction.py b/examples/applications/plot_tomography_l1_reconstruction.py
@@ -104,7 +104,7 @@ def generate_synthetic_data():
     mask_outer = (x - l / 2.) ** 2 + (y - l / 2.) ** 2 < (l / 2.) ** 2
     mask = np.zeros((l, l))
     points = l * rs.rand(2, n_pts)
-    mask[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1
+    mask[(points[0]).astype(int), (points[1]).astype(int)] = 1
     mask = ndimage.gaussian_filter(mask, sigma=l / n_pts)
     res = np.logical_and(mask > mask.mean(), mask_outer)
     return np.logical_xor(res, ndimage.binary_erosion(res))

diff --git a/examples/cluster/plot_adjusted_for_chance_measures.py b/examples/cluster/plot_adjusted_for_chance_measures.py
@@ -68,7 +68,7 @@ def ami_score(U, V):
 # 2 independent random clusterings with equal cluster number
 
 n_samples = 100
-n_clusters_range = np.linspace(2, n_samples, 10).astype(np.int)
+n_clusters_range = np.linspace(2, n_samples, 10).astype(int)
 
 plt.figure(1)
 
@@ -97,7 +97,7 @@ def ami_score(U, V):
 # with fixed number of clusters
 
 n_samples = 1000
-n_clusters_range = np.linspace(2, 100, 10).astype(np.int)
+n_clusters_range = np.linspace(2, 100, 10).astype(int)
 n_classes = 10
 
 plt.figure(2)

diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py
@@ -167,7 +167,7 @@
 
         t1 = time.time()
         if hasattr(algorithm, 'labels_'):
-            y_pred = algorithm.labels_.astype(np.int)
+            y_pred = algorithm.labels_.astype(int)
         else:
             y_pred = algorithm.predict(X)
 

diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py
@@ -52,7 +52,7 @@
     labels = est.labels_
 
     ax.scatter(X[:, 3], X[:, 0], X[:, 2],
-               c=labels.astype(np.float), edgecolor='k')
+               c=labels.astype(float), edgecolor='k')
 
     ax.w_xaxis.set_ticklabels([])
     ax.w_yaxis.set_ticklabels([])
@@ -77,7 +77,7 @@
               horizontalalignment='center',
               bbox=dict(alpha=.2, edgecolor='w', facecolor='w'))
 # Reorder the labels to have colors matching the cluster results
-y = np.choose(y, [1, 2, 0]).astype(np.float)
+y = np.choose(y, [1, 2, 0]).astype(float)
 ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor='k')
 
 ax.w_xaxis.set_ticklabels([])

diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
@@ -41,7 +41,7 @@
 roi_size = 15
 snr = 5.
 np.random.seed(0)
-mask = np.ones([size, size], dtype=np.bool)
+mask = np.ones([size, size], dtype=bool)
 
 coef = np.zeros((size, size))
 coef[0:roi_size, 0:roi_size] = -1.

diff --git a/examples/cluster/plot_linkage_comparison.py b/examples/cluster/plot_linkage_comparison.py
@@ -123,7 +123,7 @@
 
         t1 = time.time()
         if hasattr(algorithm, 'labels_'):
-            y_pred = algorithm.labels_.astype(np.int)
+            y_pred = algorithm.labels_.astype(int)
         else:
             y_pred = algorithm.predict(X)
 

diff --git a/examples/cluster/plot_ward_structured_vs_unstructured.py b/examples/cluster/plot_ward_structured_vs_unstructured.py
@@ -58,7 +58,7 @@
 ax.view_init(7, -80)
 for l in np.unique(label):
     ax.scatter(X[label == l, 0], X[label == l, 1], X[label == l, 2],
-               color=plt.cm.jet(np.float(l) / np.max(label + 1)),
+               color=plt.cm.jet(float(l) / np.max(label + 1)),
                s=20, edgecolor='k')
 plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time)
 

diff --git a/examples/covariance/plot_robust_vs_empirical_covariance.py b/examples/covariance/plot_robust_vs_empirical_covariance.py
@@ -67,7 +67,7 @@
 
 range_n_outliers = np.concatenate(
     (np.linspace(0, n_samples / 8, 5),
-     np.linspace(n_samples / 8, n_samples / 2, 5)[1:-1])).astype(np.int)
+     np.linspace(n_samples / 8, n_samples / 2, 5)[1:-1])).astype(int)
 
 # definition of arrays to store results
 err_loc_mcd = np.zeros((range_n_outliers.size, repeat))

diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py
@@ -49,7 +49,7 @@
               horizontalalignment='center',
               bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
 # Reorder the labels to have colors matching the cluster results
-y = np.choose(y, [1, 2, 0]).astype(np.float)
+y = np.choose(y, [1, 2, 0]).astype(float)
 ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral,
            edgecolor='k')
 

diff --git a/examples/exercises/plot_iris_exercise.py b/examples/exercises/plot_iris_exercise.py
@@ -27,7 +27,7 @@
 np.random.seed(0)
 order = np.random.permutation(n_sample)
 X = X[order]
-y = y[order].astype(np.float)
+y = y[order].astype(float)
 
 X_train = X[:int(.9 * n_sample)]
 y_train = y[:int(.9 * n_sample)]

diff --git a/examples/impute/plot_missing_values.py b/examples/impute/plot_missing_values.py
@@ -64,7 +64,7 @@ def add_missing_values(X_full, y_full):
     missing_rate = 0.75
     n_missing_samples = int(n_samples * missing_rate)
 
-    missing_samples = np.zeros(n_samples, dtype=np.bool)
+    missing_samples = np.zeros(n_samples, dtype=bool)
     missing_samples[: n_missing_samples] = True
 
     rng.shuffle(missing_samples)

diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py
@@ -29,7 +29,7 @@
 n_samples = 100
 np.random.seed(0)
 X = np.random.normal(size=n_samples)
-y = (X > 0).astype(np.float)
+y = (X > 0).astype(float)
 X[X > 0] *= 4
 X += .3 * np.random.normal(size=n_samples)
 

diff --git a/examples/linear_model/plot_logistic_l1_l2_sparsity.py b/examples/linear_model/plot_logistic_l1_l2_sparsity.py
@@ -33,7 +33,7 @@
 X = StandardScaler().fit_transform(X)
 
 # classify small against large digits
-y = (y > 4).astype(np.int)
+y = (y > 4).astype(int)
 
 l1_ratio = 0.5  # L1 weight in the Elastic-Net regularization
 

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -73,7 +73,7 @@ def load_mtpl2(n_samples=100000):
     """
     # freMTPL2freq dataset from https://www.openml.org/d/41214
     df_freq = fetch_openml(data_id=41214, as_frame=True)['data']
-    df_freq['IDpol'] = df_freq['IDpol'].astype(np.int)
+    df_freq['IDpol'] = df_freq['IDpol'].astype(int)
     df_freq.set_index('IDpol', inplace=True)
 
     # freMTPL2sev dataset from https://www.openml.org/d/41215

diff --git a/examples/manifold/plot_mds.py b/examples/manifold/plot_mds.py
@@ -25,7 +25,7 @@
 EPSILON = np.finfo(np.float32).eps
 n_samples = 20
 seed = np.random.RandomState(seed=3)
-X_true = seed.randint(0, 20, 2 * n_samples).astype(np.float)
+X_true = seed.randint(0, 20, 2 * n_samples).astype(float)
 X_true = X_true.reshape((n_samples, 2))
 # Center the data
 X_true -= X_true.mean()

diff --git a/examples/miscellaneous/plot_kernel_ridge_regression.py b/examples/miscellaneous/plot_kernel_ridge_regression.py
@@ -119,7 +119,7 @@
 X = 5 * rng.rand(10000, 1)
 y = np.sin(X).ravel()
 y[::5] += 3 * (0.5 - rng.rand(X.shape[0] // 5))
-sizes = np.logspace(1, 4, 7).astype(np.int)
+sizes = np.logspace(1, 4, 7).astype(int)
 for name, estimator in {"KRR": KernelRidge(kernel='rbf', alpha=0.1,
                                            gamma=10),
                         "SVR": SVR(kernel='rbf', C=1e1, gamma=10)}.items():

diff --git a/examples/neighbors/approximate_nearest_neighbors.py b/examples/neighbors/approximate_nearest_neighbors.py
@@ -162,7 +162,7 @@ def _transform(self, X):
         n_neighbors = self.n_neighbors + 1
 
         indices = np.empty((n_samples_transform, n_neighbors),
-                           dtype=np.int)
+                           dtype=int)
         distances = np.empty((n_samples_transform, n_neighbors))
 
         if X is None:

diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
@@ -113,7 +113,7 @@ def _single_linkage_tree(connectivity, n_samples, n_nodes, n_clusters,
 
     # Convert edge list into standard hierarchical clustering format
     single_linkage_tree = _hierarchical._single_linkage_label(mst_array)
-    children_ = single_linkage_tree[:, :2].astype(np.int)
+    children_ = single_linkage_tree[:, :2].astype(int)
 
     # Compute parents
     parent = np.arange(n_nodes, dtype=np.intp)
@@ -490,7 +490,7 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',
             out = _hierarchical.single_linkage_label(mst)
         else:
             out = hierarchy.linkage(X, method=linkage, metric=affinity)
-        children_ = out[:, :2].astype(np.int, copy=False)
+        children_ = out[:, :2].astype(int, copy=False)
 
         if return_distance:
             distances = out[:, 2]

diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
@@ -419,7 +419,7 @@ def fit(self, X, y=None):
                                      key=lambda tup: (tup[1], tup[0]),
                                      reverse=True)
         sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])
-        unique = np.ones(len(sorted_centers), dtype=np.bool)
+        unique = np.ones(len(sorted_centers), dtype=bool)
         nbrs = NearestNeighbors(radius=bandwidth,
                                 n_jobs=self.n_jobs).fit(sorted_centers)
         for i, center in enumerate(sorted_centers):
@@ -433,7 +433,7 @@ def fit(self, X, y=None):
         # ASSIGN LABELS: a point belongs to the cluster that it is closest to
         nbrs = NearestNeighbors(n_neighbors=1,
                                 n_jobs=self.n_jobs).fit(cluster_centers)
-        labels = np.zeros(n_samples, dtype=np.int)
+        labels = np.zeros(n_samples, dtype=int)
         distances, idxs = nbrs.kneighbors(X)
         if self.cluster_all:
             labels = idxs.flatten()