Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refine __init__ function in TPOT #740

Merged
merged 6 commits into from
Aug 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
37 changes: 21 additions & 16 deletions tests/export_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,16 @@
training_features, testing_features, training_target, testing_target = \
train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42)

tpot_obj = TPOTClassifier()
tpot_obj._fit_init()

tpot_obj_reg = TPOTRegressor()
tpot_obj_reg._fit_init()

def test_export_random_ind():
"""Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
tpot_obj = TPOTClassifier(random_state=39)
tpot_obj._fit_init()
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline = tpot_obj._toolbox.individual()
expected_code = """import numpy as np
Expand Down Expand Up @@ -86,7 +92,6 @@ def test_export_random_ind():

def test_export():
"""Assert that TPOT's export function throws a RuntimeError when no optimized pipeline exists."""
tpot_obj = TPOTClassifier()
assert_raises(RuntimeError, tpot_obj.export, "test_export.py")
pipeline_string = (
'KNeighborsClassifier(CombineDFs('
Expand All @@ -106,7 +111,8 @@ def test_export():

def test_generate_pipeline_code():
"""Assert that generate_pipeline_code() returns the correct code given a specific pipeline."""
tpot_obj = TPOTClassifier()

tpot_obj._fit_init()
pipeline = [
'KNeighborsClassifier',
[
Expand Down Expand Up @@ -148,7 +154,7 @@ def test_generate_pipeline_code():

def test_generate_pipeline_code_2():
"""Assert that generate_pipeline_code() returns the correct code given a specific pipeline with two CombineDFs."""
tpot_obj = TPOTClassifier()

pipeline = [
'KNeighborsClassifier',
[
Expand Down Expand Up @@ -200,7 +206,7 @@ def test_generate_pipeline_code_2():

def test_generate_import_code():
"""Assert that generate_import_code() returns the correct set of dependancies for a given pipeline."""
tpot_obj = TPOTClassifier()

pipeline = creator.Individual.from_string('GaussianNB(RobustScaler(input_matrix))', tpot_obj._pset)

expected_code = """import numpy as np
Expand All @@ -215,7 +221,7 @@ def test_generate_import_code():

def test_generate_import_code_2():
"""Assert that generate_import_code() returns the correct set of dependancies and dependancies are importable."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'KNeighborsClassifier(CombineDFs('
'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
Expand All @@ -241,7 +247,6 @@ def test_generate_import_code_2():

def test_operators():
"""Assert that the TPOT operators match the output of their sklearn counterparts."""
tpot_obj = TPOTClassifier(random_state=42)
for op in tpot_obj.operators:
check_export.description = ("Assert that the TPOT {} operator exports "
"as expected".format(op.__name__))
Expand All @@ -263,7 +268,7 @@ def check_export(op, tpot_obj):

def test_export_pipeline():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed pipeline."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'KNeighborsClassifier(CombineDFs('
'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
Expand Down Expand Up @@ -305,7 +310,7 @@ def test_export_pipeline():

def test_export_pipeline_2():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline (only one classifier)."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'KNeighborsClassifier('
'input_matrix, '
Expand Down Expand Up @@ -336,7 +341,7 @@ def test_export_pipeline_2():

def test_export_pipeline_3():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with a preprocessor."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
Expand Down Expand Up @@ -370,7 +375,7 @@ def test_export_pipeline_3():

def test_export_pipeline_4():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with input_matrix in CombineDFs."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'KNeighborsClassifier(CombineDFs('
'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
Expand Down Expand Up @@ -413,14 +418,13 @@ def test_export_pipeline_4():

def test_export_pipeline_5():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with SelectFromModel."""
tpot_obj = TPOTRegressor()
pipeline_string = (
'DecisionTreeRegressor(SelectFromModel(input_matrix, '
'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj_reg._pset)
expected_code = """import numpy as np
import pandas as pd
from sklearn.ensemble import ExtraTreesRegressor
Expand All @@ -443,7 +447,7 @@ def test_export_pipeline_5():
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)
assert expected_code == export_pipeline(pipeline, tpot_obj_reg.operators, tpot_obj_reg._pset)


def test_operator_export():
Expand All @@ -465,22 +469,22 @@ def test_operator_export_2():

def test_get_by_name():
"""Assert that the Operator class returns operators by name appropriately."""
tpot_obj = TPOTClassifier()

assert get_by_name("SelectPercentile", tpot_obj.operators).__class__ == TPOTSelectPercentile.__class__
assert get_by_name("SelectFromModel", tpot_obj.operators).__class__ == TPOTSelectFromModel.__class__


def test_get_by_name_2():
"""Assert that get_by_name raises TypeError with a incorrect operator name."""
tpot_obj = TPOTClassifier()

assert_raises(TypeError, get_by_name, "RandomForestRegressor", tpot_obj.operators)
# use correct name
ret_op_class = get_by_name("RandomForestClassifier", tpot_obj.operators)


def test_get_by_name_3():
"""Assert that get_by_name raises ValueError with duplicate operators in operator dictionary."""
tpot_obj = TPOTClassifier()

# no duplicate
ret_op_class = get_by_name("SelectPercentile", tpot_obj.operators)
# add a copy of TPOTSelectPercentile into operator list
Expand All @@ -506,6 +510,7 @@ def test_indent():
def test_pipeline_score_save():
"""Assert that the TPOTClassifier can generate a scored pipeline export correctly."""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline_string = (
'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
Expand Down
7 changes: 5 additions & 2 deletions tests/stats_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@

def test_dict_initialization():
"""Asserts that gp_deap.initialize_stats_dict initializes individual statistics correctly"""
tpot = TPOTClassifier()
tb = tpot._toolbox
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
tb = tpot_obj._toolbox

test_ind = tb.individual()
initialize_stats_dict(test_ind)
Expand All @@ -48,6 +49,7 @@ def test_dict_initialization():
def test_mate_operator_stats_update():
"""Assert that self._mate_operator updates stats as expected."""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
ind1 = creator.Individual.from_string(
'KNeighborsClassifier('
'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
Expand Down Expand Up @@ -99,6 +101,7 @@ def test_mate_operator_stats_update():
def test_mut_operator_stats_update():
"""Asserts that self._random_mutation_operator updates stats as expected."""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
ind = creator.Individual.from_string(
'KNeighborsClassifier('
'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
Expand Down