In [3]:
from sklearn.datasets import make_regression

# Generate regression toy data
n_samples = 1000
n_features = 5

X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=42)

# Split data into training and testing sets
train_ratio = 0.8
train_size = int(train_ratio * n_samples)

X_train = X[:train_size]
y_train = y[:train_size]

# Test data
X_test = X[train_size:]
y_test = y[train_size:]

In [4]:
from sklearn.model_selection import cross_val_score


def bo_params_generic(model, params, X_train, y_train):
    # Create the model instance with the specified parameters
    regressor = model(**params)
    
    # Assuming you have X_train, y_train defined for regression
    scores = cross_val_score(regressor, X_train, y_train, cv=10, scoring='neg_root_mean_squared_error')
    return -scores.mean()

In [10]:
dt_bo = BayesianOptimization(f=lambda n_estimators, max_depth, min_samples_leaf, min_weight_fraction_leaf,
                                    max_features, max_leaf_nodes: bo_params_generic(model, {
                                        'n_estimators': int(round(n_estimators)),
                                        'max_depth': int(round(max_depth)),
                                        'min_samples_leaf': round(min_samples_leaf),
                                        'min_weight_fraction_leaf': min_weight_fraction_leaf,
                                        'max_features': max_features,
                                        'max_leaf_nodes': int(round(max_leaf_nodes))
                                    }, X_train, y_train),
                             pbounds=params_ranges)
results = dt_bo.maximize(n_iter=5, init_points=20)

|   iter    |  target   | max_depth | max_fe... | max_le... | min_sa... | min_we... | n_esti... |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m46.61    [0m | [0m4.134    [0m | [0m0.46     [0m | [0m22.08    [0m | [0m9.013    [0m | [0m0.2209   [0m | [0m41.34    [0m |
| [0m2        [0m | [0m36.43    [0m | [0m12.73    [0m | [0m0.1274   [0m | [0m62.41    [0m | [0m1.791    [0m | [0m0.04717  [0m | [0m29.28    [0m |
| [0m3        [0m | [0m46.17    [0m | [0m15.91    [0m | [0m0.3348   [0m | [0m43.93    [0m | [0m8.521    [0m | [0m0.1384   [0m | [0m30.4     [0m |
| [95m4        [0m | [95m48.93    [0m | [95m6.856    [0m | [95m0.3888   [0m | [95m32.59    [0m | [95m9.715    [0m | [95m0.1876   [0m | [95m41.59    [0m |
| [0m5        [0m | [0m46.0     [0m | [0m1.771    [0m | [0m0.453    [0m | [0m85.14    [0m | [0m4.254    [0m | [0m0.1906   [0m | [0m39

In [5]:
all_models = {}

# Random Forest

**Feature selection**

In [15]:
from sklearn.ensemble import RandomForestRegressor

# Assuming you have X_train and y_train defined for training data

# Initialize the Random Forest model
rf_model = RandomForestRegressor()

# Fit the Random Forest model to the training data
rf_model.fit(X_train, y_train)

# Get feature importances
importances = rf_model.feature_importances_

# Create a list of feature names or indices paired with their importances
feature_importances = list(zip(range(X_train.shape[1]), importances))

# Sort the features based on importance in descending order
feature_importances.sort(key=lambda x: x[1], reverse=True)

# Print the ranked feature importances
print("Feature Importances:")
for feature_index, importance in feature_importances:
    print(f"Feature {feature_index}: {importance}")


Feature Importances:
Feature 2: 0.5484287743797435
Feature 3: 0.17394567943882905
Feature 4: 0.13497297114048568
Feature 1: 0.08208743352080566
Feature 0: 0.06056514152013607


**Hyperparameter optimalisatie**

In [13]:
import subprocess
import sys

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

install("bayesian-optimization")
from sklearn.ensemble import RandomForestRegressor
from bayes_opt import BayesianOptimization

params_ranges = {
    'n_estimators': (10, 100),
    'max_depth': (1, 20),
    'min_samples_leaf': (1, 10),
    'min_weight_fraction_leaf': (0.0, 0.5),
    'max_features': (0.1, 1),
    'max_leaf_nodes': (10, 100)
}

# Example usage with Random Forest
model = RandomForestRegressor
dt_bo = BayesianOptimization(f=lambda n_estimators, max_depth, min_samples_leaf, min_weight_fraction_leaf,
                                    max_features, max_leaf_nodes: bo_params_generic(model, {
                                        'n_estimators': int(round(n_estimators)),
                                        'max_depth': int(round(max_depth)),
                                        'min_samples_leaf': round(min_samples_leaf),
                                        'min_weight_fraction_leaf': min_weight_fraction_leaf,
                                        'max_features': max_features,
                                        'max_leaf_nodes': int(round(max_leaf_nodes))
                                    }, X_train, y_train),
                             pbounds=params_ranges)
results = dt_bo.maximize(n_iter=5, init_points=20)
params = dt_bo.max['params']

# Creating a model with the best hyperparameters
best_model = model(
    n_estimators=int(round(params['n_estimators'])),
    max_depth=int(round(params['max_depth'])),
    min_samples_leaf=round(params['min_samples_leaf']),
    min_weight_fraction_leaf=params['min_weight_fraction_leaf'],
    max_features=params['max_features'],
    max_leaf_nodes=int(round(params['max_leaf_nodes']))
)

# Fit the model
best_model.fit(X_train, y_train)

|   iter    |  target   | max_depth | max_fe... | max_le... | min_sa... | min_we... | n_esti... |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m56.57    [0m | [0m6.18     [0m | [0m0.1356   [0m | [0m96.53    [0m | [0m1.01     [0m | [0m0.417    [0m | [0m20.46    [0m |
| [0m2        [0m | [0m44.13    [0m | [0m7.106    [0m | [0m0.7864   [0m | [0m77.59    [0m | [0m7.03     [0m | [0m0.2144   [0m | [0m52.05    [0m |
| [0m3        [0m | [0m49.95    [0m | [0m14.63    [0m | [0m0.7363   [0m | [0m33.25    [0m | [0m3.595    [0m | [0m0.3243   [0m | [0m10.47    [0m |
| [0m4        [0m | [0m53.18    [0m | [0m11.58    [0m | [0m0.4948   [0m | [0m30.3     [0m | [0m1.18     [0m | [0m0.4085   [0m | [0m45.14    [0m |
| [0m5        [0m | [0m56.2     [0m | [0m17.34    [0m | [0m0.2173   [0m | [0m58.35    [0m | [0m9.396    [0m | [0m0.4473   [0m | [0m95.6     

In [7]:
# Append model to list
all_models['Random forest'] = best_model