In [24]:
import time

from scipy.stats import randint, uniform
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split

In [25]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [26]:
param_distributions = {
    "n_estimators": randint(50, 300),
    "learning_rate": uniform(0.01, 0.3),
    "max_depth": randint(2, 10),
    "min_samples_split": randint(2, 20),
    "subsample": uniform(0.6, 0.4),
}

In [27]:
random_search = RandomizedSearchCV(
    GradientBoostingClassifier(random_state=42),
    param_distributions=param_distributions,
    n_iter=243,
    cv=5,
    scoring="accuracy",
    random_state=42,
    n_jobs=-1,
    return_train_score=True,
)

random_search_t0 = time.time()
random_search.fit(X_train, y_train)
random_search_t1 = time.time()

In [28]:
print("Meilleurs paramètres : ", random_search.best_params_)
print("Meilleurs score : ", random_search.best_score_)
print("Temps d'éxécution : ", random_search_t1 - random_search_t0)

Meilleurs paramètres :  {'learning_rate': np.float64(0.2287021504122962), 'max_depth': 2, 'min_samples_split': 6, 'n_estimators': 283, 'subsample': np.float64(0.7433862914177091)}
Meilleurs score :  0.9802197802197803
Temps d'éxécution :  63.832239389419556


In [29]:
param_grid = {
    "n_estimators": randint(50, 300).rvs(3),
    "learning_rate": uniform(0.01, 0.3).rvs(3),
    "max_depth": randint(2, 10).rvs(3),
    "min_samples_split": randint(2, 20).rvs(3),
    "subsample": uniform(0.6, 0.4).rvs(3),
}

In [30]:
grid_search = GridSearchCV(
    GradientBoostingClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=-1,
    return_train_score=True,
)

grid_search_t0 = time.time()
grid_search.fit(X_train, y_train)
grid_search_t1 = time.time()

In [31]:
print("Meilleurs paramètres : ", grid_search.best_params_)
print("Meilleurs score : ", grid_search.best_score_)
print("Temps d'éxécution : ", grid_search_t1 - grid_search_t0)

Meilleurs paramètres :  {'learning_rate': np.float64(0.25324864315217116), 'max_depth': np.int64(3), 'min_samples_split': np.int64(14), 'n_estimators': np.int64(166), 'subsample': np.float64(0.8281463737247352)}
Meilleurs score :  0.9780219780219781
Temps d'éxécution :  44.01078915596008
