# Objectives example

This is an example about how using NumOfFeatures to reduce model complexity


## Import modules and prepare data

In [1]:
from hypernets.core.random_state import set_random_state
set_random_state(1234)

from sklearn.preprocessing import LabelEncoder

from hypernets.utils import logging as hyn_logging
from hypernets.examples.plain_model import PlainModel, PlainSearchSpace
from hypernets.experiment import make_experiment

from hypernets.tabular import get_tool_box
from hypernets.tabular.datasets import dsutils
from hypernets.tabular.sklearn_ex import MultiLabelEncoder


hyn_logging.set_level(hyn_logging.WARN)

df = dsutils.load_bank().head(10000)
df['y'] = LabelEncoder().fit_transform(df['y'])
tb = get_tool_box(df)
df_train, df_test = tb.train_test_split(df, test_size=0.2, random_state=9527)

  from .autonotebook import tqdm as notebook_tqdm


## NumberOfFeatures objective example

In [2]:
import numpy as np

class CatPlainModel(PlainModel):

    def export_trial_configuration(self, trial):
        pass

    def __init__(self, searcher, dispatcher=None, callbacks=None, reward_metric=None, task=None,
                 discriminator=None):
        super(CatPlainModel, self).__init__(searcher, dispatcher=dispatcher, callbacks=callbacks,
                                            reward_metric=reward_metric, task=task)
        self.transformer = MultiLabelEncoder


experiment = make_experiment(CatPlainModel, df_train,
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['nf'],  # use NumberOfFeatures as objective
                             drift_detection_threshold=0.4,
                             drift_detection_min_features=3,
                             drift_detection_remove_size=0.5,
                             search_space=PlainSearchSpace(enable_dt=True, enable_lr=False, enable_nn=True))

estimators = experiment.run(max_trials=30)

In [3]:
df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True][['trial_no', 'succeeded', 'non_dominated', 'reward_logloss', 'reward_nf', 'model_index']]

Unnamed: 0,trial_no,succeeded,non_dominated,reward_logloss,reward_nf,model_index
2,3,True,True,0.267146,0.125,0.0
11,14,True,True,0.254731,0.6875,1.0
18,23,True,True,0.284658,0.0,2.0


## Prediction performance objective example

In [4]:
experiment = make_experiment(CatPlainModel, df_train,
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['pred_perf'],  # use PredictionPerformanceObjective as objective
                             drift_detection_threshold=0.4,
                             drift_detection_min_features=3,
                             drift_detection_remove_size=0.5,
                             search_space=PlainSearchSpace(enable_dt=True, enable_lr=False, enable_nn=True))
estimators = experiment.run(max_trials=30)

In [5]:
df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True][['trial_no', 'succeeded', 'non_dominated', 'reward_logloss', 'reward_pred_perf', 'model_index']]

Unnamed: 0,trial_no,succeeded,non_dominated,reward_logloss,reward_pred_perf,model_index
0,1,True,True,0.320388,0.0,0.0
10,12,True,True,0.309584,0.011593,1.0
11,14,True,True,0.313908,0.003825,2.0
19,24,True,True,0.291687,0.015628,3.0
