diff --git a/configs/skl_config.json b/configs/skl_config.json index 29c93be4e..dc8c9968c 100755 --- a/configs/skl_config.json +++ b/configs/skl_config.json @@ -65,7 +65,6 @@ "tol": [0.0] }, { - "lib": ["daal4py"], "algorithm": "pca", "dataset": [ { @@ -97,9 +96,18 @@ "training": { "n_samples": 30000 } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 4000, + "training": { + "n_samples": 6000 + } } ], - "svd-solver": ["correlation"], + "svd-solver": ["full"], "n-components": [10] }, { diff --git a/sklearn_bench/dbscan.py b/sklearn_bench/dbscan.py index d019e8ca8..bf0c71554 100644 --- a/sklearn_bench/dbscan.py +++ b/sklearn_bench/dbscan.py @@ -19,7 +19,6 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench -from sklearn.cluster import DBSCAN from sklearn.metrics.cluster import davies_bouldin_score parser = argparse.ArgumentParser(description='scikit-learn DBSCAN benchmark') @@ -30,6 +29,8 @@ 'neighborhood to consider a point a core point') params = bench.parse_args(parser, n_jobs_supported=True) +from sklearn.cluster import DBSCAN + # Load generated data X, _, _, _ = bench.load_data(params, add_dtype=True) diff --git a/sklearn_bench/df_clsf.py b/sklearn_bench/df_clsf.py index 405472034..62199539c 100644 --- a/sklearn_bench/df_clsf.py +++ b/sklearn_bench/df_clsf.py @@ -21,7 +21,6 @@ import bench import numpy as np -from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score parser = argparse.ArgumentParser(description='scikit-learn random forest ' @@ -47,6 +46,8 @@ params = bench.parse_args(parser) +from sklearn.ensemble import RandomForestClassifier + # Load and convert data X_train, X_test, y_train, y_test = bench.load_data(params) diff --git a/sklearn_bench/df_regr.py b/sklearn_bench/df_regr.py index 7c9c335aa..749f8aad0 100644 --- a/sklearn_bench/df_regr.py +++ b/sklearn_bench/df_regr.py @@ -19,7 +19,6 @@ import argparse sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench -from sklearn.ensemble import RandomForestRegressor parser = argparse.ArgumentParser(description='scikit-learn random forest ' 'regression benchmark') @@ -45,6 +44,8 @@ params = bench.parse_args(parser) +from sklearn.ensemble import RandomForestRegressor + # Load and convert data X_train, X_test, y_train, y_test = bench.load_data(params) diff --git a/sklearn_bench/distances.py b/sklearn_bench/distances.py index 0344efceb..b73d49255 100644 --- a/sklearn_bench/distances.py +++ b/sklearn_bench/distances.py @@ -19,7 +19,6 @@ import argparse sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench -from sklearn.metrics.pairwise import pairwise_distances parser = argparse.ArgumentParser(description='scikit-learn pairwise distances ' 'benchmark') @@ -28,6 +27,8 @@ help='Metric to test for pairwise distances') params = bench.parse_args(parser) +from sklearn.metrics.pairwise import pairwise_distances + # Load data X, _, _, _ = bench.load_data(params, generated_data=['X_train'], add_dtype=True) diff --git a/sklearn_bench/elasticnet.py b/sklearn_bench/elasticnet.py index 88bd56ed0..f0ac87973 100755 --- a/sklearn_bench/elasticnet.py +++ b/sklearn_bench/elasticnet.py @@ -20,8 +20,6 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench -from sklearn.linear_model import ElasticNet - parser = argparse.ArgumentParser(description='scikit-learn elastic-net regression ' 'benchmark') parser.add_argument('--no-fit-intercept', dest='fit_intercept', default=False, @@ -37,6 +35,8 @@ help='Tolerance for solver.') params = bench.parse_args(parser) +from sklearn.linear_model import ElasticNet + # Load data X_train, X_test, y_train, y_test = bench.load_data(params) diff --git a/sklearn_bench/kmeans.py b/sklearn_bench/kmeans.py index e0c974c9a..a6e1a5f1e 100644 --- a/sklearn_bench/kmeans.py +++ b/sklearn_bench/kmeans.py @@ -20,7 +20,6 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench import numpy as np -from sklearn.cluster import KMeans from sklearn.metrics.cluster import davies_bouldin_score parser = argparse.ArgumentParser(description='scikit-learn K-means benchmark') @@ -33,6 +32,8 @@ parser.add_argument('--n-clusters', type=int, help='Number of clusters') params = bench.parse_args(parser) +from sklearn.cluster import KMeans + # Load and convert generated data X_train, X_test, _, _ = bench.load_data(params) diff --git a/sklearn_bench/knn_clsf.py b/sklearn_bench/knn_clsf.py index bc7779816..4019a891f 100755 --- a/sklearn_bench/knn_clsf.py +++ b/sklearn_bench/knn_clsf.py @@ -20,7 +20,6 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench import numpy as np -from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score parser = argparse.ArgumentParser( @@ -40,6 +39,8 @@ help='Distance metric to use') params = bench.parse_args(parser) +from sklearn.neighbors import KNeighborsClassifier + # Load generated data X_train, X_test, y_train, y_test = bench.load_data(params) params.n_classes = len(np.unique(y_train)) diff --git a/sklearn_bench/lasso.py b/sklearn_bench/lasso.py index d7c6e03c4..7b2792909 100755 --- a/sklearn_bench/lasso.py +++ b/sklearn_bench/lasso.py @@ -19,7 +19,6 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench -from sklearn.linear_model import Lasso parser = argparse.ArgumentParser(description='scikit-learn lasso regression ' 'benchmark') @@ -34,6 +33,8 @@ help='Tolerance for solver.') params = bench.parse_args(parser) +from sklearn.linear_model import Lasso + # Load data X_train, X_test, y_train, y_test = bench.load_data(params) diff --git a/sklearn_bench/linear.py b/sklearn_bench/linear.py index 45199743e..e059d96ef 100644 --- a/sklearn_bench/linear.py +++ b/sklearn_bench/linear.py @@ -20,7 +20,6 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench -from sklearn.linear_model import LinearRegression parser = argparse.ArgumentParser(description='scikit-learn linear regression ' 'benchmark') @@ -29,6 +28,8 @@ help="Don't fit intercept (assume data already centered)") params = bench.parse_args(parser) +from sklearn.linear_model import LinearRegression + # Load data X_train, X_test, y_train, y_test = bench.load_data( params, generated_data=['X_train', 'y_train']) diff --git a/sklearn_bench/log_reg.py b/sklearn_bench/log_reg.py index 2c580bea3..073fa549f 100644 --- a/sklearn_bench/log_reg.py +++ b/sklearn_bench/log_reg.py @@ -20,8 +20,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench import numpy as np -from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score + parser = argparse.ArgumentParser(description='scikit-learn logistic ' 'regression benchmark') parser.add_argument('--no-fit-intercept', dest='fit_intercept', @@ -45,6 +45,8 @@ 'is 1e-10.') params = bench.parse_args(parser, loop_types=('fit', 'predict')) +from sklearn.linear_model import LogisticRegression + # Load generated data X_train, X_test, y_train, y_test = bench.load_data(params) diff --git a/sklearn_bench/pca.py b/sklearn_bench/pca.py index 9cd2ed79b..6d015e3d9 100644 --- a/sklearn_bench/pca.py +++ b/sklearn_bench/pca.py @@ -19,7 +19,6 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench -from sklearn.decomposition import PCA parser = argparse.ArgumentParser(description='scikit-learn PCA benchmark') parser.add_argument('--svd-solver', type=str, choices=['full'], @@ -30,6 +29,8 @@ help='Perform whitening') params = bench.parse_args(parser) +from sklearn.decomposition import PCA + # Load random data X_train, X_test, _, _ = bench.load_data(params, generated_data=['X_train']) diff --git a/sklearn_bench/ridge.py b/sklearn_bench/ridge.py index 9866e5574..dce3b7c53 100644 --- a/sklearn_bench/ridge.py +++ b/sklearn_bench/ridge.py @@ -19,7 +19,6 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench -from sklearn.linear_model import Ridge parser = argparse.ArgumentParser(description='scikit-learn ridge regression ' 'benchmark') @@ -32,6 +31,8 @@ help='Regularization strength') params = bench.parse_args(parser) +from sklearn.linear_model import Ridge + # Load data X_train, X_test, y_train, y_test = bench.load_data(params, generated_data=['X_train', 'y_train']) diff --git a/sklearn_bench/svm.py b/sklearn_bench/svm.py index 14ecd19b3..a8672cefb 100644 --- a/sklearn_bench/svm.py +++ b/sklearn_bench/svm.py @@ -21,7 +21,6 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench import numpy as np -from sklearn.svm import SVC from sklearn.metrics import accuracy_score parser = argparse.ArgumentParser(description='scikit-learn SVM benchmark') @@ -43,6 +42,8 @@ dest='shrinking', help="Don't use shrinking heuristic") params = bench.parse_args(parser, loop_types=('fit', 'predict')) +from sklearn.svm import SVC + # Load data X_train, X_test, y_train, y_test = bench.load_data(params) diff --git a/sklearn_bench/train_test_split.py b/sklearn_bench/train_test_split.py index c7c6f8e28..4f61d7bac 100644 --- a/sklearn_bench/train_test_split.py +++ b/sklearn_bench/train_test_split.py @@ -20,7 +20,6 @@ import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import bench -from sklearn.model_selection import train_test_split parser = argparse.ArgumentParser( description='scikit-learn train_test_split benchmark') @@ -40,6 +39,8 @@ '(only for IDP scikit-learn)') params = bench.parse_args(parser) +from sklearn.model_selection import train_test_split + # Load generated data X, y, _, _ = bench.load_data(params)