In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load the dataset
X, y = load_breast_cancer(return_X_y=True)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Define the hyperparameter search space
param_dist = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [2, 4, 6, 8],
    'min_samples_split': [2, 4, 6, 8],
    'min_samples_leaf': [1, 2, 4, 6],
    'bootstrap': [True, False],
}

# Define the classifier
clf = RandomForestClassifier(random_state=1)

# Initialize the randomized search
random_search = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=20, cv=5, n_jobs=-1, random_state=1)

# Fit the randomized search to the training data
random_search.fit(X_train, y_train)

# Print the best hyperparameters and performance
print("Best hyperparameters: ", random_search.best_params_)
print("Best mean cross-validation score: {:.2f}".format(random_search.best_score_))
print("Test set score: {:.2f}".format(random_search.score(X_test, y_test)))


Best hyperparameters:  {'n_estimators': 100, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_depth': 4, 'bootstrap': False}
Best mean cross-validation score: 0.95
Test set score: 0.96
