In [3]:
pip install optuna


Collecting optuna
  Downloading optuna-3.4.0-py3-none-any.whl (409 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m409.6/409.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.12.1-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.8/226.8 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.0-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.0 alembic-1.12.1 colorlog-6.7.0 optuna-3.4.0


In [4]:
pip install scikit-learn




In [5]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

# Load Diabetes dataset
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
import optuna
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

def objective(trial):
    # Define hyperparameters to be optimized
    n_estimators = trial.suggest_int('n_estimators', 10, 100)
    max_depth = trial.suggest_int('max_depth', 2, 32, log=True)

    # Create a RandomForestRegressor with the suggested hyperparameters
    model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth)

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions on the test set
    predictions = model.predict(X_test)

    # Evaluate the model using mean squared error
    mse = mean_squared_error(y_test, predictions)

    return mse


In [7]:
# Create an Optuna study
study = optuna.create_study(direction='minimize')  # Note: We use 'minimize' since we're dealing with mean squared error

# Optimize the study, specifying the number of trials
study.optimize(objective, n_trials=50)

# Print the best parameters and their corresponding value
print('Best trial:')
trial = study.best_trial

print('Mean Squared Error: {}'.format(trial.value))
print('Best hyperparameters: {}'.format(trial.params))


[I 2023-11-27 17:54:28,676] A new study created in memory with name: no-name-7e941d40-3265-4b87-a7dd-77d40d03e60a
[I 2023-11-27 17:54:28,771] Trial 0 finished with value: 3211.708963795256 and parameters: {'n_estimators': 15, 'max_depth': 28}. Best is trial 0 with value: 3211.708963795256.
[I 2023-11-27 17:54:28,839] Trial 1 finished with value: 2725.3139026387366 and parameters: {'n_estimators': 22, 'max_depth': 3}. Best is trial 1 with value: 2725.3139026387366.
[I 2023-11-27 17:54:29,112] Trial 2 finished with value: 2800.9693175516527 and parameters: {'n_estimators': 81, 'max_depth': 3}. Best is trial 1 with value: 2725.3139026387366.
[I 2023-11-27 17:54:29,541] Trial 3 finished with value: 2827.425876703407 and parameters: {'n_estimators': 76, 'max_depth': 4}. Best is trial 1 with value: 2725.3139026387366.
[I 2023-11-27 17:54:29,890] Trial 4 finished with value: 2991.406410503665 and parameters: {'n_estimators': 99, 'max_depth': 13}. Best is trial 1 with value: 2725.3139026387366

Best trial:
Mean Squared Error: 2647.184468749125
Best hyperparameters: {'n_estimators': 19, 'max_depth': 3}


In [8]:
best_params = study.best_params
final_model = RandomForestRegressor(**best_params)

# Train the final model with all your data
final_model.fit(X, y)

# Make predictions or use the model as needed


In [9]:
# Use the best hyperparameters
best_params = study.best_params
final_model = RandomForestRegressor(**best_params)

# Train the final model with all your data
final_model.fit(X, y)

# Make predictions or use the model as needed


**This means that, according to the optimization process, a RandomForestRegressor with 66 estimators and a maximum depth of 3 resulted in the best performance on your Diabetes dataset**

In [10]:
pip install joblib




In [11]:
import joblib

# Use the best hyperparameters
best_params = study.best_params
final_model = RandomForestRegressor(**best_params)

# Train the final model with all your data
final_model.fit(X, y)

# Save the model to a file
joblib.dump(final_model, 'Automated_HyperParameter_Tuning_ModelRF.joblib')


['Automated_HyperParameter_Tuning_ModelRF.joblib']