In [1]:
%pip install optuna joblib scikit-learn
import optuna
import joblib
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

Note: you may need to restart the kernel to use updated packages.


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 1. Load Data
iris = load_iris()
X, y = iris.data, iris.target
# We still split because we want a final "Holdout" set that Optuna NEVER sees
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# 2. Define the Objective Function (The "Exam")
def objective(trial):
    # A. Suggest Hyperparameters
    # "trial" is the object that picks the numbers
    n_estimators = trial.suggest_int("n_estimators", 10, 500)
    max_depth = trial.suggest_int("max_depth", 2, 20)
    min_samples_split = trial.suggest_int("min_samples_split", 2, 20)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 10)
    max_features = trial.suggest_categorical("max_features", ["sqrt", "log2", None])
    
    # B. Build the Pipeline with these specific numbers
    # Note: We set n_jobs=-1 to use all CPU cores for speed
    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        random_state=42, 
        n_jobs=-1 
    )
    
    pipeline = Pipeline([
        ("scaler", StandardScaler()),
        ("rf", model)
    ])
    
    # C. Evaluate (Cross Validation)
    # We use 3-fold CV. It returns a list of 3 scores. We take the average.
    scores = cross_val_score(pipeline, X_train, y_train, cv=3, scoring="accuracy")
    accuracy = scores.mean()
    
    # D. Return the score to Optuna
    return accuracy

# 3. Create the Study
# direction="maximize" because we want higher accuracy
study = optuna.create_study(direction="maximize")

# 4. Run the Optimization
study.optimize(objective, n_trials=200)

# 5. Review the Results
print(f"\nBest Trial Accuracy: {study.best_value:.4f}")
print("Best Parameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")



[32m[I 2026-02-09 20:52:13,133][0m A new study created in memory with name: no-name-db7349a7-759c-4d3b-9462-2f5ddfcd10e5[0m


[32m[I 2026-02-09 20:52:13,959][0m Trial 0 finished with value: 0.9416666666666668 and parameters: {'n_estimators': 335, 'max_depth': 7, 'min_samples_split': 9, 'min_samples_leaf': 9, 'max_features': None}. Best is trial 0 with value: 0.9416666666666668.[0m
[32m[I 2026-02-09 20:52:15,034][0m Trial 1 finished with value: 0.9416666666666668 and parameters: {'n_estimators': 467, 'max_depth': 14, 'min_samples_split': 13, 'min_samples_leaf': 10, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9416666666666668.[0m
[32m[I 2026-02-09 20:52:15,871][0m Trial 2 finished with value: 0.9416666666666668 and parameters: {'n_estimators': 346, 'max_depth': 17, 'min_samples_split': 8, 'min_samples_leaf': 8, 'max_features': None}. Best is trial 0 with value: 0.9416666666666668.[0m
[32m[I 2026-02-09 20:52:16,479][0m Trial 3 finished with value: 0.9416666666666668 and parameters: {'n_estimators': 253, 'max_depth': 10, 'min_samples_split': 20, 'min_samples_leaf': 2, 'max_features': 'log2'}


Best Trial Accuracy: 0.9667
Best Parameters:
  n_estimators: 44
  max_depth: 18
  min_samples_split: 17
  min_samples_leaf: 3
  max_features: sqrt


In [4]:
%pip install plotly nbformat


Note: you may need to restart the kernel to use updated packages.


In [6]:
from optuna.visualization import plot_slice
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_optimization_history

# Generate the plot
fig = plot_optimization_history(study)
fig.show()
fig = plot_param_importances(study)
fig.show() 
fig = plot_slice(study)
fig.show() 