## Prepare train_data and test_data

In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from hypergbm import make_experiment

In [3]:
X,y = datasets.load_breast_cancer(as_frame=True,return_X_y=True)
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,random_state=335)
train_data = pd.concat([X_train,y_train],axis=1)

# Set the maximum trial number

set the maximum trial number with `max_trials`, default is 10.

In [4]:
experiment = make_experiment(train_data.copy(), target='target', reward_metric='precision',
                             max_trials=5)
estimator = experiment.run()
print(estimator)

Pipeline(steps=[('data_clean',
                 DataCleanStep(cv=True,
                               data_cleaner_args={'correct_object_dtype': True,
                                                  'drop_columns': None,
                                                  'drop_constant_columns': True,
                                                  'drop_duplicated_columns': False,
                                                  'drop_idness_columns': True,
                                                  'drop_label_nan_rows': True,
                                                  'int_convert_to': 'float',
                                                  'nan_chars': None,
                                                  'reduce_mem_usage': False,
                                                  'reserve_columns': None},
                               name='data_clean')),
                ('estimator',
                 GreedyEnsemble(weight=[0.95, 0.05, 0.0, 0.0, 0.0], score

# Set cv policy

use `num_folds` to change the number of cross-validated folds, to set `cv=False` to disable  cross-validation.

In [5]:
experiment = make_experiment(train_data.copy(), target='target', reward_metric='precision',
                             cv=True, num_folds=5)
estimator = experiment.run()

# Select searcher

Select searcher with `searcher`, possible values:
* one of 'evolution', 'mcts', 'random' 
* or one of EvolutionSearcher, MCTSSearcher, RandomSearcher, or subclass of hypernets Searcher 

In [6]:
#searcher = ['evolution', 'mcts', 'random']
experiment = make_experiment(train_data.copy(), target='target', reward_metric='precision',
                             searcher='mcts')
estimator = experiment.run()

# Set early_stopping policy

Use these settings to set search early stopping policy:
* early_stopping_rounds:　int, optional, zero or None to disable it, default is 10.
* early_stopping_time_limit: int, optional, zero or None to disable it, default is 3600 seconds.
* early_stopping_reward: float, optional, zero or None to disable it, default is None.

In [7]:
experiment = make_experiment(train_data.copy(), target='target', reward_metric='precision',
                             early_stopping_rounds=10,
                             early_stopping_time_limit=1800,
                             early_stopping_reward=0.96)
estimator = experiment.run()

# Enable TrialStore

Use `trial_store` to set the trial_store location, keep None to disable it.

In [8]:
experiment = make_experiment(train_data.copy(), target='target', reward_metric='precision',
                             max_trials=5, trial_store='/tmp/trial_store')
estimator = experiment.run() 

In [9]:
!find /tmp/trial_store

/tmp/trial_store
/tmp/trial_store/9207154dea51e7eb80424205f7437164
/tmp/trial_store/9207154dea51e7eb80424205f7437164/ee08b2934d47e8a68597c134569b766c
/tmp/trial_store/9207154dea51e7eb80424205f7437164/ee08b2934d47e8a68597c134569b766c/1,0,0,0,3,4,0,5,0.pkl
/tmp/trial_store/9207154dea51e7eb80424205f7437164/ee08b2934d47e8a68597c134569b766c/1,3,2,3,1,0,2,5,2.pkl
/tmp/trial_store/9207154dea51e7eb80424205f7437164/7d235d739432a9a9efb07afb1f061359
/tmp/trial_store/9207154dea51e7eb80424205f7437164/7d235d739432a9a9efb07afb1f061359/0,2,1,2,410,0,2,3,2.pkl
/tmp/trial_store/9207154dea51e7eb80424205f7437164/7d235d739432a9a9efb07afb1f061359/0,1,0,0,305,0,0,1,4.pkl
/tmp/trial_store/9207154dea51e7eb80424205f7437164/7d235d739432a9a9efb07afb1f061359/0,3,2,0,455,3,1,2,2.pkl


# Set ensemble_size

Use `ensemble_size` to set the estimator number in model ensemble, set `0` to disable it.

In [10]:
experiment = make_experiment(train_data.copy(), target='target', reward_metric='precision',
                             max_trials=5,ensemble_size=0)
estimator = experiment.run()

# Set parallelism

Use `n_jobs` to set the number of parallel threads or processes.

In [11]:
experiment = make_experiment(train_data.copy(), target='target', reward_metric='precision',
                             max_trials=5, n_jobs=4)
estimator = experiment.run()
print(estimator.steps[-1][-1].estimators)

[HyperGBMEstimator(task=binary, reward_metric=precision, cv=True,
data_pipeline: DataFrameMapper(df_out=True,
                df_out_dtype_transforms=[(ColumnSelector(include:['object']),
                                          'int')],
                features=[(ColumnSelector(include:['object', 'category', 'bool']),
                           Pipeline(steps=[('categorical_imputer_0',
                                            SafeSimpleImputer(strategy='constant')),
                                           ('categorical_label_encoder_0',
                                            MultiLabelEncoder())])),
                          (ColumnSelector(include:number, exclude:timedelta),
                           Pipeline(steps=[('numeric_imputer_0',
                                            FloatOutputImputer(strategy='constant')),
                                           ('numeric_pass_through_0',
                                            PassThroughEstimator())]))],
        

# Set random_state


In [12]:
experiment = make_experiment(train_data.copy(), target='target', reward_metric='precision',
                             max_trials=5, random_state=8888)
estimator = experiment.run()

# Set log_level 

Use `log_level` to set logging level in th experiment, possible values:
- logging.CRITICAL
- logging.FATAL
- logging.ERROR
- logging.WARNING
- logging.WARN
- logging.INFO
- logging.DEBUG

In [13]:
experiment = make_experiment(train_data.copy(), target='target', reward_metric='precision',
                             max_trials=5,log_level='info')
estimator = experiment.run()