In [4]:
import numpy as np

from hypertoy.toyknn import KnnModel
from hypertoy.estimator import ComplexKnn
from hypertoy.pipeline import Pipeline

from hypernets.searchers import RandomSearcher
from hypernets.core.ops import ModuleChoice, HyperInput
from hypernets.core.search_space import Choice, Real, Int, Bool
from hypernets.core.search_space import HyperSpace
from hypernets.tabular import column_selector

from sklearn_.transformers import SimpleImputer, SafeOrdinalEncoder

In [2]:
# with a simpler search space
def search_space_v1():
    space = HyperSpace()
    with space.as_default():
        hyper_input = HyperInput(name='input1')
        cs = column_selector.column_object_category_bool
        cat_pipeline = Pipeline([
        SimpleImputer(missing_values=np.nan, strategy='constant', name=f'categorical_imputer_{0}'),
        SafeOrdinalEncoder(name=f'categorical_label_encoder_{0}', dtype='int32')],
        columns=cs,
        name=f'categorical_pipeline_simple_{0}',
        )(hyper_input)
    

        knn_params = {'n_neighbors': Choice([1, 3, 5]),
                'weights': Choice(['uniform', 'distance']),
                'algorithm': Choice(['auto', 'ball_tree', 'kd_tree', 'brute']),
                'leaf_size': Choice([10, 20 ,30]),
                'p': Choice([1, 2]),
                'metric': 'minkowski',
                'metric_params': None,
                'n_jobs': None
        }

        knn_est = ComplexKnn(fit_kwargs={}, **knn_params)
        knn_est(cat_pipeline)
        space.set_inputs(hyper_input)    
    return space

In [14]:
searcher = RandomSearcher(search_space_v1, optimize_direction='max')
test_space = searcher.sample()
test_space

key,value
signature,81cc4082bf13e3a83ed3ef66075b9c64
vectors,"[2, 1, 2, 0, 0]"
0-Module_ComplexKnn_1.n_neighbors,5
,
1-Module_ComplexKnn_1.weights,distance
,
2-Module_ComplexKnn_1.algorithm,kd_tree
,
3-Module_ComplexKnn_1.leaf_size,10
,


In [11]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

X, y = load_digits(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=10)

In [12]:
sampled_model = KnnModel(searcher, task='multiclass')
sampled_model.search(X_train, y_train, X_eval=X_test, y_eval=y_test)

06-25 14:25:37 I hypernets.u.common.py 124 - 10 class detected, inferred as a [multiclass classification] task
06-25 14:25:37 I hypernets.c.meta_learner.py 22 - Initialize Meta Learner: dataset_id:4c49cd6494d6004b32d4a46cc11b473a
06-25 14:25:37 I hypernets.d.in_process_dispatcher.py 77 - Trial 1 done, reward: 0.9888888888888889, best_trial_no:1, best_reward:0.9888888888888889

06-25 14:25:37 I hypernets.d.in_process_dispatcher.py 77 - Trial 2 done, reward: 0.9777777777777777, best_trial_no:1, best_reward:0.9888888888888889

06-25 14:25:37 I hypernets.d.in_process_dispatcher.py 77 - Trial 3 done, reward: 0.9722222222222222, best_trial_no:1, best_reward:0.9888888888888889

06-25 14:25:37 I hypernets.d.in_process_dispatcher.py 77 - Trial 4 done, reward: 0.9833333333333333, best_trial_no:1, best_reward:0.9888888888888889

06-25 14:25:37 I hypernets.d.in_process_dispatcher.py 77 - Trial 5 done, reward: 0.9888888888888889, best_trial_no:1, best_reward:0.9888888888888889

06-25 14:25:37 I hyp

In [13]:
best_classifier = sampled_model.load_estimator(sampled_model.get_best_trial().model_file)
best_classifier.knn_model.score(X_train, y_train)

0.9913419913419913

In [19]:
best_classifier.data_pipeline

Pipeline(steps=[('categorical_imputer_0', SimpleImputer(strategy='constant')),
                ('categorical_label_encoder_0',
                 SafeOrdinalEncoder(dtype='int32'))])