In [None]:
# Bayesian Optimization with Random Forest

In [7]:
!pip install bayesian-optimization scikit-learn pandas

Collecting bayesian-optimization
  Downloading bayesian_optimization-1.5.1-py3-none-any.whl.metadata (16 kB)
Collecting colorama<0.5.0,>=0.4.6 (from bayesian-optimization)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading bayesian_optimization-1.5.1-py3-none-any.whl (28 kB)
Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama, bayesian-optimization
Successfully installed bayesian-optimization-1.5.1 colorama-0.4.6


In [8]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from bayes_opt import BayesianOptimization

In [9]:
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv', sep=';')
X = data.drop('quality', axis=1)
y = data['quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
def objective(n_estimators, max_depth, min_samples_split, max_features):
    model = RandomForestRegressor(n_estimators=int(n_estimators),
                                  max_depth=int(max_depth),
                                  min_samples_split=int(min_samples_split),
                                  max_features=min(max_features, 0.999),  # Fraction, must be <= 1.0
                                  random_state=42)

    return -1.0 * cross_val_score(model, X_train, y_train, cv=3, scoring="neg_mean_squared_error").mean()

In [11]:
# Bounds for hyperparameters
param_bounds = {
    'n_estimators': (10, 250),
    'max_depth': (1, 50),
    'min_samples_split': (2, 25),
    'max_features': (0.1, 0.999),
}

In [12]:
optimizer = BayesianOptimization(f=objective, pbounds=param_bounds, random_state=42)
optimizer.maximize(init_points=5, n_iter=15)

|   iter    |  target   | max_depth | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------------------
| [39m1        [39m | [39m0.3948   [39m | [39m19.35    [39m | [39m0.9547   [39m | [39m18.84    [39m | [39m153.7    [39m |
| [35m2        [39m | [35m0.3985   [39m | [35m8.645    [39m | [35m0.2402   [39m | [35m3.336    [39m | [35m217.9    [39m |
| [39m3        [39m | [39m0.3797   [39m | [39m30.45    [39m | [39m0.7366   [39m | [39m2.473    [39m | [39m242.8    [39m |
| [39m4        [39m | [39m0.3808   [39m | [39m41.79    [39m | [39m0.2909   [39m | [39m6.182    [39m | [39m54.02    [39m |
| [39m5        [39m | [39m0.3886   [39m | [39m15.91    [39m | [39m0.5718   [39m | [39m11.93    [39m | [39m79.89    [39m |
| [39m6        [39m | [39m0.3925   [39m | [39m8.632    [39m | [39m0.3537   [39m | [39m4.404    [39m | [39m217.6    [39m |
| [35m7        [39m | [35m0.4288   [39m | [

In [13]:
best_params = optimizer.max['params']
best_params

{'max_depth': 1.0,
 'max_features': 0.1,
 'min_samples_split': 2.0,
 'n_estimators': 210.79678776201987}

In [14]:
final_model = RandomForestRegressor(n_estimators=int(best_params['n_estimators']),
                                   max_depth=int(best_params['max_depth']),
                                   min_samples_split=int(best_params['min_samples_split']),
                                   max_features=best_params['max_features'],
                                   random_state=42)
final_model.fit(X_train, y_train)
score = final_model.score(X_test, y_test)
print(f"Test R^2 Score: {score}")

Test R^2 Score: 0.11323487205843263


In [16]:
best_params_formatted = {
    'n_estimators': int(best_params['n_estimators']),
    'max_depth': int(best_params['max_depth']),
    'min_samples_split': int(best_params['min_samples_split']),
    'max_features': best_params['max_features']
}
best_params_formatted

{'n_estimators': 210,
 'max_depth': 1,
 'min_samples_split': 2,
 'max_features': 0.1}

In [17]:
optimized_rf = RandomForestRegressor(**best_params_formatted, random_state=42)

In [18]:
optimized_rf.fit(X_train, y_train)

In [19]:
score = optimized_rf.score(X_test, y_test)
print(f"Test R^2 Score with Optimized Hyperparameters: {score}")

Test R^2 Score with Optimized Hyperparameters: 0.11323487205843263
