## Prepare train_data and test_data

In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from hypergbm import make_experiment
from hypernets.tabular.metrics import metric_to_scoring

In [2]:
X,y = datasets.load_breast_cancer(as_frame=True,return_X_y=True)
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,random_state=335)
train_data = pd.concat([X_train,y_train],axis=1)
eval_data = pd.concat([X_test,y_test],axis=1)

# Define your own SearchSpace with GeneralSearchSpaceGenerator

In [3]:
from hypergbm.search_space import GeneralSearchSpaceGenerator

## 1.Set n_estimators

In [4]:
my_space1=GeneralSearchSpaceGenerator(n_estimators=500)

# 2.Choose gbm model 

In [5]:
my_space2=GeneralSearchSpaceGenerator(
                                    enable_lightgbm=True,
                                    enable_xgb=False,
                                    enable_catboost=False,
                                    enable_histgb=False)

## 3.Enabel GPU devices

In [6]:
my_space3=GeneralSearchSpaceGenerator(
                                    enable_lightgbm = False,
                                    xgb_init_kwargs={'tree_method': 'gpu_hist',
                                                     'gpu_id':'1'},
                                    catboost_init_kwargs={'task_type': 'GPU',
                                                          'devices':'1'}
                                    )

## 4.Advanced setting

In [7]:
from hypernets.core.search_space import Choice,Int
my_space4=GeneralSearchSpaceGenerator(
                                    enable_lightgbm=False,
                                    xgb_init_kwargs = {
                                    'booster': 'dart',
                                    'max_depth':Int(3, 15),
                                    'n_estimators': Choice([100,150,200,250,300]),
                                    'learning_rate': 0.1,
                                    'min_child_weight': 5,
                                    'gamma': 0.5,
                                    'reg_alpha': 10,
                                    'reg_lambda': 0.01}
                                    )

In [8]:
experiment1 = make_experiment(train_data.copy(),target='target',
                            search_space=my_space1)
experiment2 = make_experiment(train_data.copy(),target='target',
                            search_space=my_space2)
experiment3 = make_experiment(train_data.copy(),target='target',
                            search_space=my_space3)
experiment4 = make_experiment(train_data.copy(),target='target',
                            search_space=my_space4)

In [9]:
experiment1.run()

Pipeline(steps=[('data_clean',
                 DataCleanStep(cv=True,
                               data_cleaner_args={'correct_object_dtype': True,
                                                  'drop_columns': None,
                                                  'drop_constant_columns': True,
                                                  'drop_duplicated_columns': False,
                                                  'drop_idness_columns': True,
                                                  'drop_label_nan_rows': True,
                                                  'int_convert_to': 'float',
                                                  'nan_chars': None,
                                                  'reduce_mem_usage': False,
                                                  'reserve_columns': None},
                               name='data_clean')),
                ('est...
                 GreedyEnsemble(weight=[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

In [10]:
experiment2.run()

Pipeline(steps=[('data_clean',
                 DataCleanStep(cv=True,
                               data_cleaner_args={'correct_object_dtype': True,
                                                  'drop_columns': None,
                                                  'drop_constant_columns': True,
                                                  'drop_duplicated_columns': False,
                                                  'drop_idness_columns': True,
                                                  'drop_label_nan_rows': True,
                                                  'int_convert_to': 'float',
                                                  'nan_chars': None,
                                                  'reduce_mem_usage': False,
                                                  'reserve_columns': None},
                               name='data_clean')),
                ('est...
                 GreedyEnsemble(weight=[0.6, 0.1, 0.25, 0.0, 0.0, 0.0, 0.0, 0.

In [11]:
# uncomment next line if your GPUs are ready
# experiment3.run() 

In [12]:
experiment4.run()

Pipeline(steps=[('data_clean',
                 DataCleanStep(cv=True,
                               data_cleaner_args={'correct_object_dtype': True,
                                                  'drop_columns': None,
                                                  'drop_constant_columns': True,
                                                  'drop_duplicated_columns': False,
                                                  'drop_idness_columns': True,
                                                  'drop_label_nan_rows': True,
                                                  'int_convert_to': 'float',
                                                  'nan_chars': None,
                                                  'reduce_mem_usage': False,
                                                  'reserve_columns': None},
                               name='data_clean')),
                ('est...
                 GreedyEnsemble(weight=[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0