Skip to content

PCA Config Update and Imports Fix #50

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions configs/skl_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
"tol": [0.0]
},
{
"lib": ["daal4py"],
"algorithm": "pca",
"dataset": [
{
Expand Down Expand Up @@ -97,9 +96,18 @@
"training": {
"n_samples": 30000
}
},
{
"source": "synthetic",
"type": "classification",
"n_classes": 2,
"n_features": 4000,
"training": {
"n_samples": 6000
}
}
],
"svd-solver": ["correlation"],
"svd-solver": ["full"],
"n-components": [10]
},
{
Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
from sklearn.cluster import DBSCAN
from sklearn.metrics.cluster import davies_bouldin_score

parser = argparse.ArgumentParser(description='scikit-learn DBSCAN benchmark')
Expand All @@ -30,6 +29,8 @@
'neighborhood to consider a point a core point')
params = bench.parse_args(parser, n_jobs_supported=True)

from sklearn.cluster import DBSCAN

# Load generated data
X, _, _, _ = bench.load_data(params, add_dtype=True)

Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/df_clsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import bench

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

parser = argparse.ArgumentParser(description='scikit-learn random forest '
Expand All @@ -47,6 +46,8 @@

params = bench.parse_args(parser)

from sklearn.ensemble import RandomForestClassifier

# Load and convert data
X_train, X_test, y_train, y_test = bench.load_data(params)

Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/df_regr.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import argparse
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
from sklearn.ensemble import RandomForestRegressor

parser = argparse.ArgumentParser(description='scikit-learn random forest '
'regression benchmark')
Expand All @@ -45,6 +44,8 @@

params = bench.parse_args(parser)

from sklearn.ensemble import RandomForestRegressor

# Load and convert data
X_train, X_test, y_train, y_test = bench.load_data(params)

Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import argparse
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
from sklearn.metrics.pairwise import pairwise_distances

parser = argparse.ArgumentParser(description='scikit-learn pairwise distances '
'benchmark')
Expand All @@ -28,6 +27,8 @@
help='Metric to test for pairwise distances')
params = bench.parse_args(parser)

from sklearn.metrics.pairwise import pairwise_distances

# Load data
X, _, _, _ = bench.load_data(params, generated_data=['X_train'], add_dtype=True)

Expand Down
4 changes: 2 additions & 2 deletions sklearn_bench/elasticnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench

from sklearn.linear_model import ElasticNet

parser = argparse.ArgumentParser(description='scikit-learn elastic-net regression '
'benchmark')
parser.add_argument('--no-fit-intercept', dest='fit_intercept', default=False,
Expand All @@ -37,6 +35,8 @@
help='Tolerance for solver.')
params = bench.parse_args(parser)

from sklearn.linear_model import ElasticNet

# Load data
X_train, X_test, y_train, y_test = bench.load_data(params)

Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics.cluster import davies_bouldin_score

parser = argparse.ArgumentParser(description='scikit-learn K-means benchmark')
Expand All @@ -33,6 +32,8 @@
parser.add_argument('--n-clusters', type=int, help='Number of clusters')
params = bench.parse_args(parser)

from sklearn.cluster import KMeans

# Load and convert generated data
X_train, X_test, _, _ = bench.load_data(params)

Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/knn_clsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

parser = argparse.ArgumentParser(
Expand All @@ -40,6 +39,8 @@
help='Distance metric to use')
params = bench.parse_args(parser)

from sklearn.neighbors import KNeighborsClassifier

# Load generated data
X_train, X_test, y_train, y_test = bench.load_data(params)
params.n_classes = len(np.unique(y_train))
Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/lasso.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
from sklearn.linear_model import Lasso

parser = argparse.ArgumentParser(description='scikit-learn lasso regression '
'benchmark')
Expand All @@ -34,6 +33,8 @@
help='Tolerance for solver.')
params = bench.parse_args(parser)

from sklearn.linear_model import Lasso

# Load data
X_train, X_test, y_train, y_test = bench.load_data(params)

Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
from sklearn.linear_model import LinearRegression

parser = argparse.ArgumentParser(description='scikit-learn linear regression '
'benchmark')
Expand All @@ -29,6 +28,8 @@
help="Don't fit intercept (assume data already centered)")
params = bench.parse_args(parser)

from sklearn.linear_model import LinearRegression

# Load data
X_train, X_test, y_train, y_test = bench.load_data(
params, generated_data=['X_train', 'y_train'])
Expand Down
4 changes: 3 additions & 1 deletion sklearn_bench/log_reg.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

parser = argparse.ArgumentParser(description='scikit-learn logistic '
'regression benchmark')
parser.add_argument('--no-fit-intercept', dest='fit_intercept',
Expand All @@ -45,6 +45,8 @@
'is 1e-10.')
params = bench.parse_args(parser, loop_types=('fit', 'predict'))

from sklearn.linear_model import LogisticRegression

# Load generated data
X_train, X_test, y_train, y_test = bench.load_data(params)

Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
from sklearn.decomposition import PCA

parser = argparse.ArgumentParser(description='scikit-learn PCA benchmark')
parser.add_argument('--svd-solver', type=str, choices=['full'],
Expand All @@ -30,6 +29,8 @@
help='Perform whitening')
params = bench.parse_args(parser)

from sklearn.decomposition import PCA

# Load random data
X_train, X_test, _, _ = bench.load_data(params, generated_data=['X_train'])

Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
from sklearn.linear_model import Ridge

parser = argparse.ArgumentParser(description='scikit-learn ridge regression '
'benchmark')
Expand All @@ -32,6 +31,8 @@
help='Regularization strength')
params = bench.parse_args(parser)

from sklearn.linear_model import Ridge

# Load data
X_train, X_test, y_train, y_test = bench.load_data(params,
generated_data=['X_train', 'y_train'])
Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

parser = argparse.ArgumentParser(description='scikit-learn SVM benchmark')
Expand All @@ -43,6 +42,8 @@
dest='shrinking', help="Don't use shrinking heuristic")
params = bench.parse_args(parser, loop_types=('fit', 'predict'))

from sklearn.svm import SVC

# Load data
X_train, X_test, y_train, y_test = bench.load_data(params)

Expand Down
3 changes: 2 additions & 1 deletion sklearn_bench/train_test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import bench
from sklearn.model_selection import train_test_split

parser = argparse.ArgumentParser(
description='scikit-learn train_test_split benchmark')
Expand All @@ -40,6 +39,8 @@
'(only for IDP scikit-learn)')
params = bench.parse_args(parser)

from sklearn.model_selection import train_test_split

# Load generated data
X, y, _, _ = bench.load_data(params)

Expand Down