# 网格搜索

<font color=red>网格搜索同样是交叉验证确定最优参数</font>

In [None]:
def param_grid_search(train):
    """
    网格搜索参数寻优
    :param train:训练集
    :return:最优的分类器模型
    """
    print('param_grid_search')
    features = train.columns.tolist()
    features.remove("card_id")
    features.remove("target")
    
    parameter_space = {
        "n_estimators": [80,160],
        "min_samples_leaf": [30],
        "min_samples_split": [60,120],#
        "max_depth": [3,5,7,9],
        "max_features": ['sqrt','log2',50,100]
    }

    print("Tuning hyper-parameters for mse")
    clf = RandomForestRegressor(
        criterion="squared_error",
        min_weight_fraction_leaf=0.,#类似于min_samples_leaf，不过这里按照权重而不是数目
        max_leaf_nodes=None,
        min_impurity_decrease=0., #设置最低不纯度，低于这个值就不再分叉了
        bootstrap=True,
        oob_score=False,
        n_jobs=8,
        random_state=2020,
        verbose=0,
        warm_start=False)
    grid = GridSearchCV(clf, parameter_space, cv=2, scoring="neg_mean_squared_error")
    grid.fit(train[features].values, train['target'].values)

    print("best_params_:")
    print(grid.best_params_)
    print('====================================================================')
    means = grid.cv_results_["mean_test_score"]
    stds = grid.cv_results_["std_test_score"]
    i = 1
    for mean, std, params in zip(means, stds, grid.cv_results_["params"]):
        print(f'第{i}组参数:')
        i+=1
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    return grid.best_estimator_


# 贝叶斯优化

## hyperopt

In [None]:
def params_append(params):
    """
    默认情况下，在构造LightGBM数据集对象时，将基于min_data_in_leaf的值过滤掉某些特征。
    举一个简单的例子，考虑一个具有一个名为feature_1的特征的1000个观测数据集。 
    feature_1仅采用两个值：25.0（995个观测值）和50.0（5个观测值）。如果min_data_in_leaf = 10,则此特征没有拆分。
    在构建数据集之前，LightGBM不会在重新训练时重新考虑此特征并每次迭代时都忽略它，而是在训练之前将其过滤掉。
    可以通过设置feature_pre_filter = False来覆盖此默认行为，以免在hyperopt调参报错
    """
    params['feature_pre_filter'] = False
    params['objective'] = 'regression'
    params['metric'] = 'rmse'
    params['bagging_seed'] = 2020
    return params


def param_hyperopt(train):
    """
    返回最佳参数
    :param train:
    :return:
    """
    label = 'target'
    features = train.columns.tolist()
    features.remove('card_id')
    features.remove('target')
    train_data = lgb.Dataset(train[features], train[label], silent=True)
    def hyperopt_objective(params):#优化的目标函数
        """

        :param params:
        :return:
        """
        params = params_append(params)
        print(params)
        callbacks = [lgb.early_stopping(20)]
        res = lgb.cv(params, train_data, 1000,
                     nfold=2,
                     stratified=False,
                     shuffle=True,
                     metrics='rmse',
                     callbacks=callbacks,
                     show_stdv=False,
                     seed=2020)
        return min(res['rmse-mean'])#目标函数 可以自己选择
    
    params_space = {
        'learning_rate': hp.uniform('learning_rate', 1e-2, 5e-1), #均匀分布
        'bagging_fraction': hp.uniform('bagging_fraction', 0.5, 1),
        'feature_fraction': hp.uniform('feature_fraction', 0.5, 1),
        'num_leaves': hp.choice('num_leaves', list(range(10, 300, 10))),
        'reg_alpha': hp.randint('reg_alpha', 0, 10),#随机整数
        'reg_lambda': hp.uniform('reg_lambda', 0, 10),
        'bagging_freq': hp.randint('bagging_freq', 1, 10),
        'min_child_samples': hp.choice('min_child_samples', list(range(1, 30, 5)))
        }
    #fmin:Minimize a function over a hyperparameter space.
    params_best = fmin(
        hyperopt_objective,
        space=params_space,
        algo=tpe.suggest,
        max_evals=30,
        rstate=np.random.default_rng(2020))
    return params_best


## beyasian