Skip to content

Commit

Permalink
fix all unit tests and fix a issue for warm start
Browse files Browse the repository at this point in the history
  • Loading branch information
weixuanfu committed Aug 9, 2018
1 parent a11dbd9 commit 7c7e939
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 65 deletions.
37 changes: 21 additions & 16 deletions tests/export_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,16 @@
training_features, testing_features, training_target, testing_target = \
train_test_split(mnist_data.data.astype(np.float64), mnist_data.target.astype(np.float64), random_state=42)

tpot_obj = TPOTClassifier()
tpot_obj._fit_init()

tpot_obj_reg = TPOTRegressor()
tpot_obj_reg._fit_init()

def test_export_random_ind():
"""Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
tpot_obj = TPOTClassifier(random_state=39)
tpot_obj._fit_init()
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline = tpot_obj._toolbox.individual()
expected_code = """import numpy as np
Expand Down Expand Up @@ -86,7 +92,6 @@ def test_export_random_ind():

def test_export():
"""Assert that TPOT's export function throws a RuntimeError when no optimized pipeline exists."""

assert_raises(RuntimeError, tpot_obj.export, "test_export.py")
pipeline_string = (
'KNeighborsClassifier(CombineDFs('
Expand All @@ -106,7 +111,8 @@ def test_export():

def test_generate_pipeline_code():
"""Assert that generate_pipeline_code() returns the correct code given a specific pipeline."""
tpot_obj = TPOTClassifier()

tpot_obj._fit_init()
pipeline = [
'KNeighborsClassifier',
[
Expand Down Expand Up @@ -148,7 +154,7 @@ def test_generate_pipeline_code():

def test_generate_pipeline_code_2():
"""Assert that generate_pipeline_code() returns the correct code given a specific pipeline with two CombineDFs."""
tpot_obj = TPOTClassifier()

pipeline = [
'KNeighborsClassifier',
[
Expand Down Expand Up @@ -200,7 +206,7 @@ def test_generate_pipeline_code_2():

def test_generate_import_code():
"""Assert that generate_import_code() returns the correct set of dependancies for a given pipeline."""
tpot_obj = TPOTClassifier()

pipeline = creator.Individual.from_string('GaussianNB(RobustScaler(input_matrix))', tpot_obj._pset)

expected_code = """import numpy as np
Expand All @@ -215,7 +221,7 @@ def test_generate_import_code():

def test_generate_import_code_2():
"""Assert that generate_import_code() returns the correct set of dependancies and dependancies are importable."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'KNeighborsClassifier(CombineDFs('
'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
Expand All @@ -241,7 +247,6 @@ def test_generate_import_code_2():

def test_operators():
"""Assert that the TPOT operators match the output of their sklearn counterparts."""
tpot_obj = TPOTClassifier(random_state=42)
for op in tpot_obj.operators:
check_export.description = ("Assert that the TPOT {} operator exports "
"as expected".format(op.__name__))
Expand All @@ -263,7 +268,7 @@ def check_export(op, tpot_obj):

def test_export_pipeline():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed pipeline."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'KNeighborsClassifier(CombineDFs('
'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
Expand Down Expand Up @@ -305,7 +310,7 @@ def test_export_pipeline():

def test_export_pipeline_2():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline (only one classifier)."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'KNeighborsClassifier('
'input_matrix, '
Expand Down Expand Up @@ -336,7 +341,7 @@ def test_export_pipeline_2():

def test_export_pipeline_3():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with a preprocessor."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
'DecisionTreeClassifier__criterion=gini, DecisionTreeClassifier__max_depth=8,'
Expand Down Expand Up @@ -370,7 +375,7 @@ def test_export_pipeline_3():

def test_export_pipeline_4():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with input_matrix in CombineDFs."""
tpot_obj = TPOTClassifier()

pipeline_string = (
'KNeighborsClassifier(CombineDFs('
'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, '
Expand Down Expand Up @@ -413,14 +418,13 @@ def test_export_pipeline_4():

def test_export_pipeline_5():
"""Assert that exported_pipeline() generated a compile source file as expected given a fixed simple pipeline with SelectFromModel."""
tpot_obj = TPOTRegressor()
pipeline_string = (
'DecisionTreeRegressor(SelectFromModel(input_matrix, '
'SelectFromModel__ExtraTreesRegressor__max_features=0.05, SelectFromModel__ExtraTreesRegressor__n_estimators=100, '
'SelectFromModel__threshold=0.05), DecisionTreeRegressor__max_depth=8,'
'DecisionTreeRegressor__min_samples_leaf=5, DecisionTreeRegressor__min_samples_split=5)'
)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj_reg._pset)
expected_code = """import numpy as np
import pandas as pd
from sklearn.ensemble import ExtraTreesRegressor
Expand All @@ -443,7 +447,7 @@ def test_export_pipeline_5():
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset)
assert expected_code == export_pipeline(pipeline, tpot_obj_reg.operators, tpot_obj_reg._pset)


def test_operator_export():
Expand All @@ -465,22 +469,22 @@ def test_operator_export_2():

def test_get_by_name():
"""Assert that the Operator class returns operators by name appropriately."""
tpot_obj = TPOTClassifier()

assert get_by_name("SelectPercentile", tpot_obj.operators).__class__ == TPOTSelectPercentile.__class__
assert get_by_name("SelectFromModel", tpot_obj.operators).__class__ == TPOTSelectFromModel.__class__


def test_get_by_name_2():
"""Assert that get_by_name raises TypeError with a incorrect operator name."""
tpot_obj = TPOTClassifier()

assert_raises(TypeError, get_by_name, "RandomForestRegressor", tpot_obj.operators)
# use correct name
ret_op_class = get_by_name("RandomForestClassifier", tpot_obj.operators)


def test_get_by_name_3():
"""Assert that get_by_name raises ValueError with duplicate operators in operator dictionary."""
tpot_obj = TPOTClassifier()

# no duplicate
ret_op_class = get_by_name("SelectPercentile", tpot_obj.operators)
# add a copy of TPOTSelectPercentile into operator list
Expand All @@ -506,6 +510,7 @@ def test_indent():
def test_pipeline_score_save():
"""Assert that the TPOTClassifier can generate a scored pipeline export correctly."""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline_string = (
'DecisionTreeClassifier(SelectPercentile(input_matrix, SelectPercentile__percentile=20),'
Expand Down
7 changes: 5 additions & 2 deletions tests/stats_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@

def test_dict_initialization():
"""Asserts that gp_deap.initialize_stats_dict initializes individual statistics correctly"""
tpot = TPOTClassifier()
tb = tpot._toolbox
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
tb = tpot_obj._toolbox

test_ind = tb.individual()
initialize_stats_dict(test_ind)
Expand All @@ -48,6 +49,7 @@ def test_dict_initialization():
def test_mate_operator_stats_update():
"""Assert that self._mate_operator updates stats as expected."""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
ind1 = creator.Individual.from_string(
'KNeighborsClassifier('
'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
Expand Down Expand Up @@ -99,6 +101,7 @@ def test_mate_operator_stats_update():
def test_mut_operator_stats_update():
"""Asserts that self._random_mutation_operator updates stats as expected."""
tpot_obj = TPOTClassifier()
tpot_obj._fit_init()
ind = creator.Individual.from_string(
'KNeighborsClassifier('
'BernoulliNB(input_matrix, BernoulliNB__alpha=10.0, BernoulliNB__fit_prior=False),'
Expand Down

0 comments on commit 7c7e939

Please sign in to comment.