In [1]:
#直方图算法：把连续值离散化成k个整数，并且构造宽度为k的直方图，遍历数据后，
#将离散化后的值作为索引在直方图中累积统计量。
#Leaf-wise：每次从当前的叶子中，找到分裂增益最大的一个叶子，然后分裂，继续循环。

#lightbgm参数：
#learning_rate:学习率
#n_estimators:树的个数，迭代的次数
#max_depth:树的深度，深度大可能过拟合
#num_leaves:一棵树最大的叶子数量，num_leaves = 2^(max_depth)，但是它的值的设置应该小于 2^(max_depth)，否则可能会导致过拟合。
#min_child_samples:它的值取决于训练数据的样本个数和num_leaves,可以降低过拟合
#min_child_weight:
#reg_alpha:l1正则化
#reg_lambda：l2正则化
#colsample_bylevel, colsample_bytree, colsample_bynode ：分别表示各个层、各棵树、各个节点的列采样率
#bagging_fraction：表示每次迭代所使用的数据分数（即所占百分比，用小数表示）。将此值设置得较低，以提高训练速度。


In [2]:
import numpy as np
import pandas as pd
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Imputer,MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV
import lightgbm as lgb

iris = datasets.load_iris()
X = iris.data
y = iris.target
scaler=MinMaxScaler(feature_range=(0,1))
iris = datasets.load_iris()
X=scaler.fit_transform(X)




In [3]:
def GridSearch(model,params,X,y):
    lgbr=GridSearchCV(model,params,scoring='neg_mean_squared_error',cv=5)
    lgbr.fit(X,y)
    print('GridSearchCV_best_score:',lgbr.best_score_)
    print('GridSearchCV_best_params：',lgbr.best_params_) 
    print('GridSearchCV_best_model：',lgbr.best_estimator_) 

In [4]:
params={
    'objective':'regression',
    'n_estimators':30,
    'max_depth':5,
    'min_child_samples':20,
    'reg_lambda':0.1,
    'reg_alpha':0.1,
    'metric':'rmse',
    'colsample_bytree':1,
    'subsample':0.8,
    'num_leaves':30,
    'random_state':60
}
model=lgb.LGBMRegressor(**params)
change_params={
                 'n_estimators': range(20, 200, 20),
              #   'min_child_weight': range(3, 15, 2),
              #   'colsample_bytree': np.arange(0.4, 1.0),
               #  'max_depth': range(5, 15, 2),
              #   'subsample': np.arange(0.5, 1.0, 0.1),
               #  'reg_lambda': np.arange(0.1, 1.0, 0.2),
               #  'reg_alpha': np.arange(0.1, 1.0, 0.2),
                 #'min_child_samples': range(10, 30)
}
GridSearch(model,change_params,X,y)

GridSearchCV_best_score: -0.050241248298760095
GridSearchCV_best_params： {'n_estimators': 60}
GridSearchCV_best_model： LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1,
              importance_type='split', learning_rate=0.1, max_depth=5,
              metric='rmse', min_child_samples=20, min_child_weight=0.001,
              min_split_gain=0.0, n_estimators=60, n_jobs=-1, num_leaves=30,
              objective='regression', random_state=60, reg_alpha=0.1,
              reg_lambda=0.1, silent=True, subsample=0.8,
              subsample_for_bin=200000, subsample_freq=0)


In [5]:
#RandomSearchCV:并不是所有的参数都尝试，具有随机性
def RandomSearch(model,params,X,y):
    lgbr=RandomizedSearchCV(model,params,scoring='neg_mean_squared_error',cv=5)
    lgbr.fit(X,y)
    print('RandomizedSearchCV_best_score:',lgbr.best_score_)
    print('RandomizedSearchCV_best_params：',lgbr.best_params_) 
    print('RandomizedSearchCV_best_model：',lgbr.best_estimator_) 

In [6]:
params={
    'objective':'regression',
    'n_estimators':30,
    'max_depth':5,
    'min_child_samples':20,
    'reg_lambda':0.1,
    'reg_alpha':0.1,
    'metric':'rmse',
    'colsample_bytree':1,
    'subsample':0.8,
    'num_leaves':30,
    'random_state':60
}
model=lgb.LGBMRegressor(**params)
change_params={
                 'n_estimators': range(20, 200, 20),
              #   'min_child_weight': range(3, 15, 2),
               #  'colsample_bytree': np.arange(0.4, 1.0),
              #   'max_depth': range(5, 15, 2),
             #    'subsample': np.arange(0.5, 1.0, 0.1),
               #  'reg_lambda': np.arange(0.1, 1.0, 0.2),
              #   'reg_alpha': np.arange(0.1, 1.0, 0.2),
               #  'min_child_samples': range(10, 30)
}
RandomSearch(model,change_params ,X,y)



RandomizedSearchCV_best_score: -0.050241248298760095
RandomizedSearchCV_best_params： {'n_estimators': 60}
RandomizedSearchCV_best_model： LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1,
              importance_type='split', learning_rate=0.1, max_depth=5,
              metric='rmse', min_child_samples=20, min_child_weight=0.001,
              min_split_gain=0.0, n_estimators=60, n_jobs=-1, num_leaves=30,
              objective='regression', random_state=60, reg_alpha=0.1,
              reg_lambda=0.1, silent=True, subsample=0.8,
              subsample_for_bin=200000, subsample_freq=0)


In [7]:
#贝叶斯优化
from sklearn.model_selection import train_test_split,cross_val_score	
from bayes_opt import BayesianOptimization
def BayesianSearch(model,params):
    num_iters=30
    init_points=5
    bayes= BayesianOptimization(model, params)
    bayes.maximize(init_points=init_points,n_iter=num_iters)
    params = bayes.res
    index = []
    for i in bayes.res:
        index.append(i['target'])
    max_index = index.index(max(index))

    print(params[max_index])
    return max_index
        

In [8]:

def evaluate(min_child_samples, min_child_weight, colsample_bytree, max_depth, subsample, reg_alpha, reg_lambda):
    param={
        'objective': 'regression',
        'n_estimators': 50,
        'metric': 'rmse',
        'random_state': 60
    }#固定参数
    param['min_child_samples'] = int(min_child_samples)
    param['min_child_weight'] = int(min_child_weight)
    param['colsample_bytree'] = float(colsample_bytree),
    param['max_depth'] = int(max_depth),
    param['subsample'] = float(subsample),
    param['reg_lambda'] = float(reg_lambda),
    param['reg_alpha'] = float(reg_alpha),
    #超参数
    cvs=cross_val_score(lgb.LGBMRegressor(**param),X,y,scoring='neg_mean_squared_error',cv=5).mean()
    return cvs


    

In [9]:
change_params={
               
                 'min_child_weight': (3, 15),
                 'colsample_bytree': (0.4, 1.0),
                 'max_depth': (5, 15),
                 'subsample': (0.5, 1.0),
                 'reg_lambda': (0.1, 1.0),
                 'reg_alpha': (0.1, 1.0),
                 'min_child_samples':(10, 30)
}

In [10]:
BayesianSearch(evaluate,change_params)

|   iter    |  target   | colsam... | max_depth | min_ch... | min_ch... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m-0.0522  [0m | [0m 0.9909  [0m | [0m 13.44   [0m | [0m 20.13   [0m | [0m 3.751   [0m | [0m 0.7496  [0m | [0m 0.2698  [0m | [0m 0.9786  [0m |
| [95m 2       [0m | [95m-0.04859 [0m | [95m 0.454   [0m | [95m 6.287   [0m | [95m 20.69   [0m | [95m 7.471   [0m | [95m 0.3531  [0m | [95m 0.2169  [0m | [95m 0.776   [0m |
| [0m 3       [0m | [0m-0.05315 [0m | [0m 0.8002  [0m | [0m 14.33   [0m | [0m 10.71   [0m | [0m 12.1    [0m | [0m 0.1457  [0m | [0m 0.9825  [0m | [0m 0.5292  [0m |
| [0m 4       [0m | [0m-0.05289 [0m | [0m 0.4796  [0m | [0m 9.376   [0m | [0m 13.91   [0m | [0m 5.33    [0m | [0m 0.4195  [0m | [0m 0.4779  [0m | [0m 0.6597  [0m |
| [0m 5       [0m | [0m-0.05109 [0m | 

8