In [1]:
import optuna

In [2]:
optuna.__version__

'4.1.0'

# Hyperparameter Tuning with Optuna

ML params       A 3 B 8 C 17

A 1 2 3 4
B 7 8 11
C 12 14 17

In [3]:
import optuna
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [4]:
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [6]:
def objective(trial):
    # Define hyperparametrs to tune
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 5, 30)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)

    # Create and train the model with the sampled hyperparameters

    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split)
    model.fit(X_train, y_train)

    # Make predictions and evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

In [7]:
study = optuna.create_study(direction = 'maximize')

[I 2025-01-14 13:42:48,202] A new study created in memory with name: no-name-ad42aacf-42bb-4aa1-8645-bd587725d5b6


In [8]:
study.optimize(objective, n_trials=100)

[I 2025-01-14 13:42:48,300] Trial 0 finished with value: 0.88 and parameters: {'n_estimators': 81, 'max_depth': 24, 'min_samples_split': 3}. Best is trial 0 with value: 0.88.
[I 2025-01-14 13:42:48,435] Trial 1 finished with value: 0.8766666666666667 and parameters: {'n_estimators': 125, 'max_depth': 25, 'min_samples_split': 12}. Best is trial 0 with value: 0.88.
[I 2025-01-14 13:42:48,505] Trial 2 finished with value: 0.8733333333333333 and parameters: {'n_estimators': 65, 'max_depth': 29, 'min_samples_split': 15}. Best is trial 0 with value: 0.88.
[I 2025-01-14 13:42:48,625] Trial 3 finished with value: 0.8766666666666667 and parameters: {'n_estimators': 108, 'max_depth': 29, 'min_samples_split': 3}. Best is trial 0 with value: 0.88.
[I 2025-01-14 13:42:48,812] Trial 4 finished with value: 0.86 and parameters: {'n_estimators': 179, 'max_depth': 9, 'min_samples_split': 15}. Best is trial 0 with value: 0.88.
[I 2025-01-14 13:42:48,968] Trial 5 finished with value: 0.8766666666666667 an

In [9]:
study.best_params

{'n_estimators': 150, 'max_depth': 12, 'min_samples_split': 3}

In [10]:
study.best_value

0.9066666666666666

In [15]:
optuna.visualization.plot_param_importances(study)

In [16]:
optuna.visualization.plot_slice(study)

In [17]:
optuna.visualization.plot_parallel_coordinate(study)

In [18]:
optuna.visualization.plot_optimization_history(study)

In [19]:
best_params = study.best_params

In [20]:
best_params

{'n_estimators': 150, 'max_depth': 12, 'min_samples_split': 3}

### Use this to train new model

In [22]:
best_model = RandomForestClassifier(**best_params)

In [23]:
best_model.fit(X_train, y_train)

In [24]:
import joblib
joblib.dump(best_model, "optimized_model.pkl")

['optimized_model.pkl']

In [25]:
loaded_model = joblib.load('optimized_model.pkl')