In [12]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.datasets import make_regression
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split


### Create regression synthetic dataset

In [14]:
X, y = make_regression(n_samples=100, n_features=5, noise=1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=29,train_size=0.9,)
print(X_train.shape[0],X_test.shape[0])
X_test, X_val, y_test, y_val = train_test_split(
    X_test, y_test, random_state=29,train_size=0.9
)
# diabetes = load_diabetes()
# X, y = diabetes.data, diabetes.target
# n_features = X.shape[1]

90 10


### Initialize model

In [3]:
# gradient boosted trees tend to do well on problems like this
reg = GradientBoostingRegressor(n_estimators=50, random_state=29)


### Hyperparameter tuning

In [15]:
from skopt.space import Real, Integer
from skopt.utils import use_named_args

n_features = X.shape[1]
# The list of hyper-parameters we want to optimize. For each one we define the
# bounds, the corresponding scikit-learn parameter name, as well as how to
# sample values from that dimension (`'log-uniform'` for the learning rate)
space  = [Integer(1, 5, name='max_depth'),
          Real(10**-5, 10**0, "log-uniform", name='learning_rate'),
          Integer(1, n_features, name='max_features'),
          Integer(2, 100, name='min_samples_split'),
          Integer(1, 100, name='min_samples_leaf')]


# this decorator allows your objective function to receive a the parameters as
# keyword arguments. This is particularly convenient when you want to set
# scikit-learn estimator parameters
{"a":1,"b":2}
objective(a=1,b=2)
@use_named_args(space)
def objective(**params):
    reg.set_params(**params)

    return -np.mean(cross_val_score(reg, X_train, y_train, cv=5, n_jobs=-1,
                                    scoring="neg_mean_absolute_error"))

### Get best model parameters

In [5]:
from skopt import gp_minimize
res_gp = gp_minimize(objective, space, n_calls=50, random_state=0)
"Best score=%.4f" % res_gp.fun

'Best score=48.5300'

In [6]:
print("""Best parameters:
- max_depth=%d
- learning_rate=%.6f
- max_features=%d
- min_samples_split=%d
- min_samples_leaf=%d""" % (res_gp.x[0], res_gp.x[1],
                            res_gp.x[2], res_gp.x[3],
                            res_gp.x[4]))


Best parameters:
- max_depth=4
- learning_rate=0.168623
- max_features=2
- min_samples_split=2
- min_samples_leaf=20


### Feature importance

In [16]:
reg_fitted = GradientBoostingRegressor(max_depth=4,
learning_rate=0.168623,
max_features=2,
min_samples_split=2,
min_samples_leaf=20).fit(X,y)


In [17]:
from sklearn.inspection import permutation_importance
# Assuming model, X_val, and y_val are defined
results = permutation_importance(reg_fitted, X_val, y_val, scoring='accuracy')
importances = results.importances_mean
importances

ValueError: continuous is not supported

In [None]:
print("""Best parameters:
- max_depth=%d
- learning_rate=%.6f
- max_features=%d
- min_samples_split=%d
- min_samples_leaf=%d""" % (res_gp.x[0], res_gp.x[1],
                            res_gp.x[2], res_gp.x[3],
                            res_gp.x[4]))

In [None]:
###Genetic Optimization

In [19]:
import pandas as pd
pd.DataFrame(X)

Unnamed: 0,0,1,2,3,4
0,0.975120,-0.677162,-0.012247,-0.897254,0.075805
1,0.081874,-0.485364,0.758969,-0.772825,-0.236819
2,-1.412304,-0.908024,-0.562288,-1.012831,0.314247
3,-0.645120,0.361636,1.356240,-0.072010,1.003533
4,-0.622700,0.280992,-1.952088,-0.151785,0.588317
...,...,...,...,...,...
95,-1.514847,-0.846794,0.714000,0.473238,-0.072829
96,-0.773010,0.097676,-0.401220,0.224092,0.012592
97,1.031000,0.611676,0.324084,-0.385082,-0.676922
98,-0.234587,0.005113,-1.463515,0.296120,0.261055


In [24]:
pd.DataFrame(y)

Unnamed: 0,0
0,-58.617987
1,-47.475843
2,-258.696834
3,116.096971
4,-123.443012
...,...
95,-104.006385
96,-49.580547
97,98.036917
98,-80.419593


In [28]:
i = 100
YC = []
num_features = 5
chromosomes  = []
for  i in range(i):
    chromosome = [np.random.random(size=num_features)]
    y_chromosome = reg_fitted.predict(chromosome)
    print(f'chromsome={chromosome} y_chromosome={y_chromosome}')
    YC.append(y_chromosome[0])
    chromosomes.append(chromosome)
    

chromsome=[array([0.24972385, 0.1780544 , 0.31337234, 0.2604028 , 0.06816358])] y_chromosome=[77.14614143]
chromsome=[array([0.15568669, 0.53448311, 0.77766452, 0.07148093, 0.27847306])] y_chromosome=[167.02923778]
chromsome=[array([0.64596425, 0.69170855, 0.3055367 , 0.72932294, 0.78221831])] y_chromosome=[256.94381171]
chromsome=[array([0.807408  , 0.52681091, 0.81651171, 0.17434748, 0.21529317])] y_chromosome=[230.95268853]
chromsome=[array([0.15456247, 0.43289599, 0.93980882, 0.17357881, 0.6687629 ])] y_chromosome=[154.42339108]
chromsome=[array([0.52713685, 0.61145693, 0.98724421, 0.43358316, 0.17685887])] y_chromosome=[229.20818599]
chromsome=[array([0.97324271, 0.00453741, 0.45612286, 0.41523508, 0.69806702])] y_chromosome=[193.13461026]
chromsome=[array([0.45088433, 0.03831957, 0.28420165, 0.82304482, 0.60965237])] y_chromosome=[154.32654301]
chromsome=[array([0.0056963 , 0.76004061, 0.16914481, 0.6728487 , 0.61768938])] y_chromosome=[181.29857528]
chromsome=[array([0.660554  ,

In [27]:
np.argmin(YC)

83

In [29]:
chromosomes[np.argmin(YC)]

[array([0.34293628, 0.04698463, 0.16362981, 0.17135532, 0.71536212])]