Skip to content

Commit

Permalink
Merge ca26160 into c92fea5
Browse files Browse the repository at this point in the history
  • Loading branch information
adithyabsk committed Jun 25, 2018
2 parents c92fea5 + ca26160 commit 4aa9117
Show file tree
Hide file tree
Showing 17 changed files with 329 additions and 32 deletions.
4 changes: 2 additions & 2 deletions docs_sources/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ How many minutes TPOT has to optimize the pipeline.
If not None, this setting will override the <em>generations</em> parameter and allow TPOT to run until <em>max_time_mins</em> minutes elapse.
</blockquote>

<strong>max_eval_time_mins</strong>: integer, optional (default=5)
<strong>max_eval_time_mins</strong>: float, optional (default=5)
<blockquote>
How many minutes TPOT has to evaluate a single pipeline.
<br /><br />
Expand Down Expand Up @@ -588,7 +588,7 @@ How many minutes TPOT has to optimize the pipeline.
If not None, this setting will override the <em>generations</em> parameter and allow TPOT to run until <em>max_time_mins</em> minutes elapse.
</blockquote>

<strong>max_eval_time_mins</strong>: integer, optional (default=5)
<strong>max_eval_time_mins</strong>: float, optional (default=5)
<blockquote>
How many minutes TPOT has to evaluate a single pipeline.
<br /><br />
Expand Down
2 changes: 1 addition & 1 deletion docs_sources/using.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ If provided, this setting will override the "generations" parameter and allow TP
<tr>
<td>-maxeval</td>
<td>MAX_EVAL_MINS</td>
<td>Any positive integer</td>
<td>Any positive float</td>
<td>How many minutes TPOT has to evaluate a single pipeline.
<br /><br />
Setting this parameter to higher values will allow TPOT to consider more complex pipelines but will also allow TPOT to run longer.</td>
Expand Down
16 changes: 10 additions & 6 deletions tests/export_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,9 +505,14 @@ def test_indent():

def test_pipeline_score_save():
"""Assert that the TPOTClassifier can generate a scored pipeline export correctly."""
tpot_obj = TPOTClassifier(random_state=39)
tpot_obj = TPOTClassifier()
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline = tpot_obj._toolbox.individual()
pipeline_string = (
'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
'DecisionTreeClassifier__min_samples_leaf=5, DecisionTreeClassifier__min_samples_split=5)'
)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
expected_code = """import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectPercentile, f_classif
Expand All @@ -521,16 +526,15 @@ def test_pipeline_score_save():
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'].values, random_state=42)
# Score on the training set was:0.929813743
# Average CV score on the training set was:0.929813743
exported_pipeline = make_pipeline(
SelectPercentile(score_func=f_classif, percentile=65),
DecisionTreeClassifier(criterion="gini", max_depth=7, min_samples_leaf=4, min_samples_split=18)
SelectPercentile(score_func=f_classif, percentile=20),
DecisionTreeClassifier(criterion="gini", max_depth=8, min_samples_leaf=5, min_samples_split=5)
)
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""

assert_equal(expected_code, export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, pipeline_score=0.929813743))


Expand Down
80 changes: 80 additions & 0 deletions tests/feature_transformers_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from sklearn.datasets import load_iris
from tpot.builtins import CategoricalSelector, ContinuousSelector
from nose.tools import assert_equal, assert_raises

iris_data = load_iris().data

def test_CategoricalSelector():
"""Assert that CategoricalSelector works as expected."""
cs = CategoricalSelector()
X_transformed = cs.transform(iris_data[0:16, :])

assert_equal(X_transformed.shape[1],2)


def test_CategoricalSelector_2():
"""Assert that CategoricalSelector works as expected with threshold=5."""
cs = CategoricalSelector(threshold=5)
X_transformed = cs.transform(iris_data[0:16, :])

assert_equal(X_transformed.shape[1],1)


def test_CategoricalSelector_3():
"""Assert that CategoricalSelector works as expected with threshold=20."""
cs = CategoricalSelector(threshold=20)
X_transformed = cs.transform(iris_data[0:16, :])

assert_equal(X_transformed.shape[1],7)


def test_CategoricalSelector_4():
"""Assert that CategoricalSelector rasies ValueError without categorical features."""
cs = CategoricalSelector()

assert_raises(ValueError, cs.transform, iris_data)


def test_CategoricalSelector_fit():
"""Assert that fit() in CategoricalSelector does nothing."""
op = CategoricalSelector()
ret_op = op.fit(iris_data)

assert ret_op==op


def test_ContinuousSelector():
"""Assert that ContinuousSelector works as expected."""
cs = ContinuousSelector(svd_solver='randomized')
X_transformed = cs.transform(iris_data[0:16, :])

assert_equal(X_transformed.shape[1],2)


def test_ContinuousSelector_2():
"""Assert that ContinuousSelector works as expected with threshold=5."""
cs = ContinuousSelector(threshold=5, svd_solver='randomized')
X_transformed = cs.transform(iris_data[0:16, :])
assert_equal(X_transformed.shape[1],3)


def test_ContinuousSelector_3():
"""Assert that ContinuousSelector works as expected with svd_solver='full'"""
cs = ContinuousSelector(threshold=10, svd_solver='full')
X_transformed = cs.transform(iris_data[0:16, :])
assert_equal(X_transformed.shape[1],2)


def test_ContinuousSelector_4():
"""Assert that ContinuousSelector rasies ValueError without categorical features."""
cs = ContinuousSelector()

assert_raises(ValueError, cs.transform, iris_data[0:10,:])


def test_ContinuousSelector_fit():
"""Assert that fit() in ContinuousSelector does nothing."""
op = ContinuousSelector()
ret_op = op.fit(iris_data)

assert ret_op==op
30 changes: 27 additions & 3 deletions tests/one_hot_encoder_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from sklearn.model_selection import cross_val_score, KFold
from nose.tools import assert_equal

from tpot.builtins.one_hot_encoder import OneHotEncoder, _auto_select_categorical_features
from tpot.builtins import OneHotEncoder, auto_select_categorical_features, _transform_selected


iris_data = load_iris().data
Expand Down Expand Up @@ -144,8 +144,9 @@ def fit_then_transform_dense(expected, input,

def test_auto_detect_categorical():
"""Assert that automatic selection of categorical features works as expected with a threshold of 10."""
selected = _auto_select_categorical_features(iris_data[0:16, :], threshold=10)
selected = auto_select_categorical_features(iris_data[0:16, :], threshold=10)
expected = [False, False, True, True]

assert_equal(selected, expected)


Expand Down Expand Up @@ -295,10 +296,33 @@ def test_transform():
assert np.sum(output) == 3


def test_transform_selected():
"""Assert _transform_selected return original X when selected is empty list"""
ohe = OneHotEncoder(categorical_features=[])
X = _transform_selected(
dense1,
ohe._fit_transform,
ohe.categorical_features,
copy=True
)
assert np.allclose(X, dense1)


def test_transform_selected_2():
"""Assert _transform_selected return original X when selected is a list of False values"""
ohe = OneHotEncoder(categorical_features=[False, False, False])
X = _transform_selected(
dense1,
ohe._fit_transform,
ohe.categorical_features,
copy=True
)
assert np.allclose(X, dense1)


def test_k_fold_cv():
"""Test OneHotEncoder with categorical_features='auto'."""
boston = load_boston()

clf = make_pipeline(
OneHotEncoder(
categorical_features='auto',
Expand Down
8 changes: 8 additions & 0 deletions tests/zero_count_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,11 @@ def test_ZeroCount():

assert np.allclose(zero_col, X_transformed[:, 0])
assert np.allclose(non_zero, X_transformed[:, 1])


def test_ZeroCount_fit():
"""Assert that fit() in ZeroCount does nothing."""
op = ZeroCount()
ret_op = op.fit(X)

assert ret_op==op
8 changes: 7 additions & 1 deletion tpot/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def __init__(self, generations=100, population_size=100, offspring_size=None,
How many minutes TPOT has to optimize the pipeline.
If provided, this setting will override the "generations" parameter and allow
TPOT to run until it runs out of time.
max_eval_time_mins: int, optional (default: 5)
max_eval_time_mins: float, optional (default: 5)
How many minutes TPOT has to optimize a single pipeline.
Setting this parameter to higher values will allow TPOT to explore more
complex pipelines, but will also allow TPOT to run longer.
Expand Down Expand Up @@ -863,6 +863,12 @@ def predict_proba(self, features):
else:
if not (hasattr(self.fitted_pipeline_, 'predict_proba')):
raise RuntimeError('The fitted pipeline does not have the predict_proba() function.')

features = features.astype(np.float64)

if np.any(np.isnan(features)):
features = self._impute_values(features)

return self.fitted_pipeline_.predict_proba(features.astype(np.float64))

def set_params(self, **params):
Expand Down
3 changes: 2 additions & 1 deletion tpot/builtins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@
from .zero_count import ZeroCount
from .combine_dfs import CombineDFs
from .stacking_estimator import StackingEstimator
from .one_hot_encoder import OneHotEncoder
from .one_hot_encoder import OneHotEncoder, auto_select_categorical_features, _transform_selected
from .feature_transformers import CategoricalSelector, ContinuousSelector
Loading

0 comments on commit 4aa9117

Please sign in to comment.