In [2]:
from sklearn.datasets import make_regression

# Generate regression toy data
n_samples = 1000
n_features = 5

X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=42)

# Split data into training and testing sets
train_ratio = 0.8
train_size = int(train_ratio * n_samples)

X_train = X[:train_size]
y_train = y[:train_size]

# Test data
X_test = X[train_size:]
y_test = y[train_size:]

In [5]:
from sklearn.model_selection import cross_val_score


def bo_params_generic(model, params, X_train, y_train):
    # Create the model instance with the specified parameters
    regressor = model(**params)
    
    # Assuming you have X_train, y_train defined for regression
    scores = cross_val_score(regressor, X_train, y_train, cv=10, scoring='neg_root_mean_squared_error')
    return -scores.mean()

In [9]:
all_models = {}

# Random Forest

**Feature selection**

**Hyperparameter optimalisatie**

In [6]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from bayes_opt import BayesianOptimization


params_ranges = {
    'splitter': (0, 1),  # Numeric mapping of string values: 'best' -> 0, 'random' -> 1
    'max_depth': (1, 20),
    'min_samples_leaf': (1, 10),
    'min_weight_fraction_leaf': (0.0, 0.5),
    'max_features': (0.1, 1),
    'max_leaf_nodes': (10, 100)
}

# Example usage with Decision Trees
model = DecisionTreeRegressor
dt_bo = BayesianOptimization(f=lambda splitter, max_depth, min_samples_leaf, min_weight_fraction_leaf,
                                    max_features, max_leaf_nodes: bo_params_generic(model, {
                                        'splitter': 'best' if splitter < 0.5 else 'random',
                                        'max_depth': round(max_depth),
                                        'min_samples_leaf': round(min_samples_leaf),
                                        'min_weight_fraction_leaf': min_weight_fraction_leaf,
                                        'max_features': max_features,
                                        'max_leaf_nodes': round(max_leaf_nodes)
                                    }, X_train, y_train),
                             pbounds=params_ranges)

results = dt_bo.maximize(n_iter=5, init_points=20)
params = dt_bo.max['params']

# Creating a model with the best hyperparameters
best_model = model(
    splitter='best' if params['splitter'] < 0.5 else 'random',
    max_depth=int(round(params['max_depth'])),
    min_samples_leaf=round(params['min_samples_leaf']),
    min_weight_fraction_leaf=params['min_weight_fraction_leaf'],
    max_features=params['max_features'],
    max_leaf_nodes=int(round(params['max_leaf_nodes']))
)

# Fit the model
best_model.fit(X_train, y_train)


|   iter    |  target   | max_depth | max_fe... | max_le... | min_sa... | min_we... | splitter  |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m64.01    [0m | [0m12.57    [0m | [0m0.1446   [0m | [0m77.06    [0m | [0m2.398    [0m | [0m0.2613   [0m | [0m0.9289   [0m |
| [0m2        [0m | [0m58.44    [0m | [0m8.551    [0m | [0m0.6838   [0m | [0m33.33    [0m | [0m3.446    [0m | [0m0.1171   [0m | [0m0.6876   [0m |
| [0m3        [0m | [0m57.91    [0m | [0m3.285    [0m | [0m0.9585   [0m | [0m34.65    [0m | [0m2.952    [0m | [0m0.258    [0m | [0m0.8719   [0m |
| [0m4        [0m | [0m62.84    [0m | [0m19.14    [0m | [0m0.8052   [0m | [0m20.08    [0m | [0m1.248    [0m | [0m0.3745   [0m | [0m0.8301   [0m |
| [0m5        [0m | [0m42.8     [0m | [0m6.383    [0m | [0m0.9121   [0m | [0m85.0     [0m | [0m9.198    [0m | [0m0.01511  [0m | [0m0.8201   

In [None]:
# Append model to list
all_models['Random forest'] = best_model