In [1]:
from sklearn.datasets import make_regression

# Generate regression toy data
n_samples = 1000
n_features = 5

X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=42)

# Split data into training and testing sets
train_ratio = 0.8
train_size = int(train_ratio * n_samples)

X_train = X[:train_size]
y_train = y[:train_size]

# Test data
X_test = X[train_size:]
y_test = y[train_size:]

In [2]:
from sklearn.model_selection import cross_val_score


def bo_params_generic(model, params, X_train, y_train):
    # Create the model instance with the specified parameters
    regressor = model(**params)
    
    # Assuming you have X_train, y_train defined for regression
    scores = cross_val_score(regressor, X_train, y_train, cv=10, scoring='neg_root_mean_squared_error')
    return -scores.mean()

In [9]:
all_models = {}

# Random Forest

**Feature selection**

**Hyperparameter optimalisatie**

In [4]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from bayes_opt import BayesianOptimization


params_ranges = {
    'splitter': (0, 1),  # Numeric mapping of string values: 'best' -> 0, 'random' -> 1
    'max_depth': (1, 20),
    'min_samples_leaf': (1, 10),
    'min_weight_fraction_leaf': (0.0, 0.5),
    'max_features': (0.1, 1),
    'max_leaf_nodes': (10, 100)
}

# Example usage with Decision Trees
model = DecisionTreeRegressor
dt_bo = BayesianOptimization(f=lambda splitter, max_depth, min_samples_leaf, min_weight_fraction_leaf,
                                    max_features, max_leaf_nodes: bo_params_generic(model, {
                                        'splitter': 'best' if splitter < 0.5 else 'random',
                                        'max_depth': round(max_depth),
                                        'min_samples_leaf': round(min_samples_leaf),
                                        'min_weight_fraction_leaf': min_weight_fraction_leaf,
                                        'max_features': max_features,
                                        'max_leaf_nodes': round(max_leaf_nodes)
                                    }, X_train, y_train),
                             pbounds=params_ranges)

results = dt_bo.maximize(n_iter=5, init_points=20)
params = dt_bo.max['params']

# Creating a model with the best hyperparameters
best_model = model(
    splitter='best' if params['splitter'] < 0.5 else 'random',
    max_depth=int(round(params['max_depth'])),
    min_samples_leaf=round(params['min_samples_leaf']),
    min_weight_fraction_leaf=params['min_weight_fraction_leaf'],
    max_features=params['max_features'],
    max_leaf_nodes=int(round(params['max_leaf_nodes']))
)

# Fit the model
best_model.fit(X_train, y_train)


|   iter    |  target   | max_depth | max_fe... | max_le... | min_sa... | min_we... | splitter  |
-------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m52.59    [0m | [0m19.83    [0m | [0m0.8834   [0m | [0m16.19    [0m | [0m6.064    [0m | [0m0.4299   [0m | [0m0.2575   [0m |
| [95m2        [0m | [95m63.49    [0m | [95m5.666    [0m | [95m0.1317   [0m | [95m79.53    [0m | [95m2.119    [0m | [95m0.08336  [0m | [95m0.5748   [0m |
| [0m3        [0m | [0m48.46    [0m | [0m13.45    [0m | [0m0.8559   [0m | [0m28.66    [0m | [0m3.64     [0m | [0m0.1634   [0m | [0m0.3241   [0m |
| [0m4        [0m | [0m51.66    [0m | [0m3.851    [0m | [0m0.645    [0m | [0m73.52    [0m | [0m7.612    [0m | [0m0.1988   [0m | [0m0.1656   [0m |
| [95m5        [0m | [95m64.51    [0m | [95m9.96     [0m | [95m0.9325   [0m | [95m96.31    [0m | [95m9.241    [0m | [95m0.4936   [0m |

In [None]:
# Append model to list
all_models['Random forest'] = best_model