# MOEA/D Searcher example

This is an example about how using MOEADSearcher for multi-objectives optimization.

## 1. Import modules and prepare data

In [1]:
from hypernets.core.random_state import set_random_state
set_random_state(1234)

from hypernets.utils import logging as hyn_logging
from hypernets.examples.plain_model import PlainModel, PlainSearchSpace
from hypernets.searchers.nsga_searcher import RNSGAIISearcher

from hypergbm import make_experiment

from hypernets.tabular import get_tool_box
from hypernets.tabular.datasets import dsutils
from hypernets.tabular.sklearn_ex import MultiLabelEncoder


hyn_logging.set_level(hyn_logging.WARN)

df = dsutils.load_bank().head(1000)
tb = get_tool_box(df)
df_train, df_test = tb.train_test_split(df, test_size=0.2, random_state=9527)

  from .autonotebook import tqdm as notebook_tqdm


## 2. Run an experiment within NSGAIISearcher

In [2]:
experiment = make_experiment(df_train,
                             eval_data=df_test.copy(),
                             callbacks=[],
                             random_state=1234,
                             search_callbacks=[],
                             target='y',
                             searcher='moead',  # available MOO searcher: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['nf'],
                             drift_detection=False,
                             early_stopping_rounds=30)

estimators = experiment.run(max_trials=30)
hyper_model = experiment.hyper_model_
hyper_model.searcher

MOEADSearcher(objectives=[PredictionObjective(name=logloss, scorer=make_scorer(log_loss, needs_proba=True), direction=min), NumOfFeatures(name=nf, sample_size=1000, direction=min)], n_neighbors=2, recombination=SinglePointCrossOver(random_state=RandomState(MT19937)), mutation=SinglePointMutation(random_state=RandomState(MT19937), proba=0.3), population_size=6)

In [3]:
len(hyper_model.history.trials) > 0

True

In [4]:
from hypernets.core.trial import DominateBasedTrialHistory
super(DominateBasedTrialHistory, hyper_model.history).to_df(include_params=False)

Unnamed: 0,trial_no,succeeded,reward,elapsed,scores
0,1,True,"[0.35221276771413107, 0.3125]",0.418392,
1,2,True,"[0.5695202678255886, 0.0]",2.553146,
2,3,True,"[0.5648588411327944, 0.0]",4.134358,
3,4,True,"[0.563789653698866, 0.0]",3.318031,
4,5,True,"[0.33582202590638843, 0.5]",0.319535,
5,6,True,"[0.6243262551262284, 0.0]",0.396044,
6,7,True,"[0.5612684342508204, 0.0]",2.65104,
7,8,True,"[0.35221276771413107, 0.3125]",0.357057,
8,9,True,"[0.34442090598487324, 0.0625]",2.582907,
9,10,True,"[0.3594343809466066, 0.5]",0.351461,


## 3. Summary trails

In [5]:
df_trials = hyper_model.history.to_df().copy().drop(['scores', 'reward'], axis=1)
df_trials[df_trials['non_dominated'] == True]

IndexError: tuple index out of range

## 4. Plot pareto font

We can pick model accord to Decision Maker's preferences from the pareto plot, the number in the figure indicates the index of pipeline models.


In [None]:
fig, ax  = hyper_model.history.plot_best_trials()
fig.show()

## 5. Plot population

In [None]:
fig, ax  = hyper_model.searcher.plot_population()
fig.show()

## 6. Evaluate the selected model

In [None]:
print(f"Number of pipeline: {len(estimators)} ")

pipeline_model = estimators[0]  # selection the first pipeline model
X_test = df_test.copy()
y_test = X_test.pop('y')

preds = pipeline_model.predict(X_test)
proba = pipeline_model.predict_proba(X_test)

tb.metrics.calc_score(y_test, preds, proba, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'], pos_label="yes")