Методы (фреймворки) автоматического поиска гиперпараметров

# Optuna
https://github.com/optuna/

https://datagy.io/python-optuna/

In [5]:
# !pip install optuna

In [6]:
import optuna

import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm

In [7]:
def objective(trial):
    iris = sklearn.datasets.load_iris()
    x, y = iris.data, iris.target

    classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
    if classifier_name == "SVC":
        svc_c = trial.suggest_float("svc_c", 1e-10, 1e10, log=True)
        classifier_obj = sklearn.svm.SVC(C=svc_c, gamma="auto")
    else:
        rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
        classifier_obj = sklearn.ensemble.RandomForestClassifier(
            max_depth=rf_max_depth, n_estimators=10
        )

    score = sklearn.model_selection.cross_val_score(classifier_obj, x, y, n_jobs=-1, cv=3)
    accuracy = score.mean()
    return accuracy

In [8]:
from optuna.trial import TrialState

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2025-11-27 03:04:59,340] A new study created in memory with name: no-name-05c291a3-0d65-4e5d-97de-040d7819e98e
[I 2025-11-27 03:04:59,545] Trial 0 finished with value: 0.7466666666666667 and parameters: {'classifier': 'SVC', 'svc_c': 0.024733287131787896}. Best is trial 0 with value: 0.7466666666666667.
[I 2025-11-27 03:04:59,733] Trial 1 finished with value: 0.94 and parameters: {'classifier': 'SVC', 'svc_c': 0.10246647169411519}. Best is trial 1 with value: 0.94.
[I 2025-11-27 03:04:59,893] Trial 2 finished with value: 0.9533333333333333 and parameters: {'classifier': 'RandomForest', 'rf_max_depth': 7}. Best is trial 2 with value: 0.9533333333333333.
[I 2025-11-27 03:04:59,931] Trial 3 finished with value: 0.96 and parameters: {'classifier': 'RandomForest', 'rf_max_depth': 2}. Best is trial 3 with value: 0.96.
[I 2025-11-27 03:04:59,948] Trial 4 finished with value: 0.32 and parameters: {'classifier': 'SVC', 'svc_c': 1.011714134127127e-08}. Best is trial 3 with value: 0.96.
[I 202

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  0
  Number of complete trials:  100
Best trial:
  Value:  0.9733333333333333
  Params: 
    classifier: RandomForest
    rf_max_depth: 9


In [9]:
study.best_params

{'classifier': 'RandomForest', 'rf_max_depth': 9}

In [10]:
import optuna.visualization as vis

vis.plot_param_importances(study)

# SMAC

https://github.com/automl/SMAC3

In [11]:
# !pip install ConfigSpace smac

^C


https://automl.github.io/SMAC3/main/examples/1_basics/2_svm_cv.html#sphx-glr-examples-1-basics-2-svm-cv-py

In [None]:
import numpy as np
from ConfigSpace import Categorical, Configuration, ConfigurationSpace, Float, Integer
from ConfigSpace.conditions import InCondition
from sklearn import datasets, svm
from sklearn.model_selection import cross_val_score

from smac import HyperparameterOptimizationFacade, Scenario


# We load the iris-dataset (a widely used benchmark)
iris = datasets.load_iris()


class SVM:
    @property
    def configspace(self) -> ConfigurationSpace:
        # Build Configuration Space which defines all parameters and their ranges
        cs = ConfigurationSpace(seed=0)

        # First we create our hyperparameters
        kernel = Categorical("kernel", ["linear", "poly", "rbf", "sigmoid"], default="poly")
        C = Float("C", (0.001, 1000.0), default=1.0, log=True)
        shrinking = Categorical("shrinking", [True, False], default=True)
        degree = Integer("degree", (1, 5), default=3)
        coef = Float("coef0", (0.0, 10.0), default=0.0)
        gamma = Categorical("gamma", ["auto", "value"], default="auto")
        gamma_value = Float("gamma_value", (0.0001, 8.0), default=1.0, log=True)

        # Then we create dependencies
        use_degree = InCondition(child=degree, parent=kernel, values=["poly"])
        use_coef = InCondition(child=coef, parent=kernel, values=["poly", "sigmoid"])
        use_gamma = InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"])
        use_gamma_value = InCondition(child=gamma_value, parent=gamma, values=["value"])

        # Add hyperparameters and conditions to our configspace
        cs.add([kernel, C, shrinking, degree, coef, gamma, gamma_value])
        cs.add([use_degree, use_coef, use_gamma, use_gamma_value])

        return cs

    def train(self, config: Configuration, seed: int = 0) -> float:
        """Creates a SVM based on a configuration and evaluates it on the
        iris-dataset using cross-validation."""
        config_dict = dict(config)
        if "gamma" in config:
            config_dict["gamma"] = config_dict["gamma_value"] if config_dict["gamma"] == "value" else "auto"
            config_dict.pop("gamma_value", None)

        classifier = svm.SVC(**config_dict, random_state=seed)
        scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
        cost = 1 - np.mean(scores)

        return cost

In [None]:
classifier = SVM()

# Next, we create an object, holding general information about the run
scenario = Scenario(
    classifier.configspace,
    n_trials=250,  # We want to run max 250 trials (combination of config and seed)
)

# We want to run the facade's default initial design, but we want to change the number
# of initial configs to 5.
initial_design = HyperparameterOptimizationFacade.get_initial_design(scenario, n_configs=5)

# Now we use SMAC to find the best hyperparameters
smac = HyperparameterOptimizationFacade(
    scenario,
    classifier.train,
    initial_design=initial_design,
    overwrite=True,  # If the run exists, we overwrite it; alternatively, we can continue from last state
)

[INFO][abstract_initial_design.py:82] Using `n_configs` and ignoring `n_configs_per_hyperparameter`.
[INFO][abstract_initial_design.py:147] Using 5 initial design configurations and 0 additional configurations.


In [None]:
incumbent = smac.optimize()

# Get cost of default configuration
default_cost = smac.validate(classifier.configspace.get_default_configuration())
print(f"Default cost: {default_cost}")

# Let's calculate the cost of the incumbent
incumbent_cost = smac.validate(incumbent)
print(f"Incumbent cost: {incumbent_cost}")

[INFO][abstract_intensifier.py:516] Added config 16b2dc as new incumbent because there are no incumbents yet.
[INFO][abstract_intensifier.py:595] Added config 22dc7a and rejected config 16b2dc as incumbent because it is not better than the incumbents on 2 instances:
[INFO][smbo.py:320] Finished 50 trials.
[INFO][smbo.py:320] Finished 100 trials.
[INFO][smbo.py:320] Finished 150 trials.
[INFO][smbo.py:320] Finished 200 trials.
[INFO][smbo.py:320] Finished 250 trials.
[INFO][smbo.py:328] Configuration budget is exhausted:
[INFO][smbo.py:329] --- Remaining wallclock time: inf
[INFO][smbo.py:330] --- Remaining cpu time: inf
[INFO][smbo.py:331] --- Remaining trials: 0
Default cost: 0.03333333333333344
Incumbent cost: 0.013333333333333308


In [None]:
incumbent

Configuration(values={
  'C': 1.2760639488344,
  'kernel': 'linear',
  'shrinking': False,
})