In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score

np.random.seed(42)

In [2]:
iris = load_iris()
x, y = iris.data, iris.target

In [None]:
from HPSearchSpace import SearchSpace
search_space = SearchSpace('example.yaml')

In [4]:
def objective(config):
    estimator = config['estimators']['estimator_class']
    params = config['estimators']['params']
    model = estimator(**params)
    score = cross_val_score(model, x, y, cv=5).mean()
    return -score

- Hyperopt

In [5]:
from hyperopt import fmin, Trials, tpe
# space = hp.choice(
#     "classifier", [
#         {
#             "model": SVC,
#             "params": {
#                 "C": hp.loguniform("svm_C", 1e-10, 1),
#                 "kernel": "linear"
#             }
#         },
#         {
#             "model": RandomForestClassifier,
#             "params": {
#                 "n_estimators": scope.int(hp.quniform("rf_n_estimators", 10, 1000, 10)),
#                 "max_depth": scope.int(hp.quniform("rf_max_depth", 2, 32, 1))
#             }
#         },
#         {
#             "model": GradientBoostingClassifier,
#             "params": {
#                 "n_estimators": scope.int(hp.quniform("gb_n_estimators", 10, 1000, 10)),
#                 "max_depth": scope.int(hp.quniform("gb_max_depth", 2, 32, 1))
#             }
#         },
#         {
#             "model": KNeighborsClassifier,
#             "params": {
#                 "n_neighbors": scope.int(hp.quniform("knn_n_neighbors", 2, 10, 1))
#             }
#         }
#     ]
# )

# def objective_hp(params):
#     model = params["model"]
#     params = params["params"]
#     model = model(**params)
#     score = cross_val_score(model, x, y, n_jobs=-1, cv=3)
#     return -score.mean()

hp_space = search_space.get_hyperopt_space()
hp_trials = Trials()
hp_best = fmin(objective, hp_space, trials=hp_trials, algo=tpe.suggest, max_evals=100)

100%|██████████| 100/100 [01:37<00:00,  1.02trial/s, best loss: -0.9866666666666667]


In [20]:
print(hp_trials.best_trial["result"], hp_trials.best_trial["misc"]["vals"])

{'loss': -0.9866666666666667, 'status': 'ok'} {'estimators': [np.int64(0)], 'estimators_sklearn.ensemble.GradientBoostingClassifier_max_depth': [], 'estimators_sklearn.ensemble.GradientBoostingClassifier_n_estimators': [], 'estimators_sklearn.ensemble.RandomForestClassifier_max_depth': [], 'estimators_sklearn.ensemble.RandomForestClassifier_n_estimators': [], 'estimators_sklearn.neighbors.KNeighborsClassifier_n_neighbors': [], 'estimators_sklearn.svm.SVC_C': [np.float64(1.3264537099115026)], 'estimators_sklearn.svm.SVC_kernel': [np.int64(0)]}


- Optuna

In [6]:
import optuna

# def objective_optuna(trial_):
#     classifier_name = trial_.suggest_categorical("classifier", [
#         "SVC", "RandomForest", "KNeighbors", "GradientBoosting"
#     ])
#     if classifier_name == "SVC":
#         svc_c = trial_.suggest_float("C", 1e-10, 1, log=True)
#         model = SVC(C=svc_c, kernel="linear")
#     elif classifier_name == "RandomForest":
#         rf_max_depth = trial_.suggest_int("rf_max_depth", 2, 32)
#         rf_n_estimators = trial_.suggest_int("rf_n_estimators", 10, 1000)
#         model = RandomForestClassifier(
#             n_estimators=rf_n_estimators, max_depth=rf_max_depth
#         )
#     elif classifier_name == "KNeighbors":
#         knn_n_neighbors = trial_.suggest_int("knn_n_neighbors", 2, 10)
#         model = KNeighborsClassifier(n_neighbors=knn_n_neighbors)
#     elif classifier_name == "GradientBoosting":
#         gb_max_depth = trial_.suggest_int("gb_max_depth", 2, 32)
#         gb_n_estimators = trial_.suggest_int("gb_n_estimators", 10, 1000)
#         model = GradientBoostingClassifier(
#             n_estimators=gb_n_estimators, max_depth=gb_max_depth
#         )
#     else:
#         raise ValueError("Invalid classifier name")

#     score = cross_val_score(model, x, y, n_jobs=-1, cv=3)
#     return score.mean()

def objective_optuna(trial_):
    config = search_space.get_optuna_space(trial_)
    return objective(config)

study = optuna.create_study(direction="minimize")
study.optimize(objective_optuna, n_trials=100)

[I 2025-02-05 18:48:57,378] A new study created in memory with name: no-name-f4945cc9-94a9-440e-be6c-a50bcf6dcef6
[I 2025-02-05 18:48:59,786] Trial 0 finished with value: -0.9666666666666668 and parameters: {'estimators': 'sklearn.ensemble.GradientBoostingClassifier', 'estimators_sklearn.ensemble.GradientBoostingClassifier_n_estimators': 360, 'estimators_sklearn.ensemble.GradientBoostingClassifier_max_depth': 18}. Best is trial 0 with value: -0.9666666666666668.
[I 2025-02-05 18:48:59,803] Trial 1 finished with value: -0.9800000000000001 and parameters: {'estimators': 'sklearn.neighbors.KNeighborsClassifier', 'estimators_sklearn.neighbors.KNeighborsClassifier_n_neighbors': 7}. Best is trial 1 with value: -0.9800000000000001.
[I 2025-02-05 18:49:00,897] Trial 2 finished with value: -0.96 and parameters: {'estimators': 'sklearn.ensemble.RandomForestClassifier', 'estimators_sklearn.ensemble.RandomForestClassifier_n_estimators': 246, 'estimators_sklearn.ensemble.RandomForestClassifier_max_

In [19]:
print(study.best_trial.value, study.best_trial.params)

-0.9866666666666667 {'estimators': 'sklearn.svm.SVC', 'estimators_sklearn.svm.SVC_C': 0.5926824836417408, 'estimators_sklearn.svm.SVC_kernel': 'linear'}


- Flaml

In [15]:
from flaml.tune import tune

result = tune.run(objective, 
                  config=search_space.get_flaml_space(),
                  mode="min", num_samples=100, use_ray=False)

[flaml.tune.tune: 02-05 19:01:44] {582} INFO - Using search algorithm BlendSearch.


[I 2025-02-05 19:01:44,943] A new study created in memory with name: optuna


[flaml.tune.tune: 02-05 19:01:44] {884} INFO - trial 1 config: {'estimators': {'params': {'n_estimators': 106, 'max_depth': 5}, 'estimator_name': 'sklearn.ensemble.GradientBoostingClassifier', 'estimator_class': <class 'sklearn.ensemble._gb.GradientBoostingClassifier'>}}
[flaml.tune.tune: 02-05 19:01:45] {884} INFO - trial 2 config: {'estimators': {'params': {'C': 9.662068742486023e-06, 'kernel': 'linear'}, 'estimator_name': 'sklearn.svm.SVC', 'estimator_class': <class 'sklearn.svm._classes.SVC'>}}
[flaml.tune.tune: 02-05 19:01:46] {884} INFO - trial 3 config: {'estimators': {'params': {'n_estimators': 97, 'max_depth': 22}, 'estimator_name': 'sklearn.ensemble.GradientBoostingClassifier', 'estimator_class': <class 'sklearn.ensemble._gb.GradientBoostingClassifier'>}}
[flaml.tune.tune: 02-05 19:01:46] {884} INFO - trial 4 config: {'estimators': {'params': {'C': 0.00013343220665499586, 'kernel': 'linear'}, 'estimator_name': 'sklearn.svm.SVC', 'estimator_class': <class 'sklearn.svm._classes

In [18]:
print(result.best_result)

{'_metric': np.float64(-0.9666666666666668), 'training_iteration': 0, 'config': {'estimators': {'estimator_name': 'sklearn.ensemble.GradientBoostingClassifier', 'estimator_class': <class 'sklearn.ensemble._gb.GradientBoostingClassifier'>, 'params': {'n_estimators': 88, 'max_depth': 18}}}, 'config/estimators': {'estimator_name': 'sklearn.ensemble.GradientBoostingClassifier', 'estimator_class': <class 'sklearn.ensemble._gb.GradientBoostingClassifier'>, 'params': {'n_estimators': 88, 'max_depth': 18}}, 'experiment_tag': 'exp', 'time_total_s': 0.7611761093139648}
