In [56]:
import numpy as np
from sklearn.model_selection import GridSearchCV # 网格调参
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier # 决策树模型
from sklearn.preprocessing import StandardScaler

In [57]:
# Loading the Digits dataset
data = load_boston()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)

# 然后用StandardScaler做归一化处理
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
y_train = y_scaler.fit_transform(y_train.reshape(-1,1))
X_test = X_scaler.transform(X_test)
y_test = y_scaler.transform(y_test.reshape(-1,1))

In [58]:
param_grid = {
    'criterion': ('gini','entropy'),
    'splitter': ('best','random'),
    'max_depth': range(1,10),
    'min_samples_leaf': range(1,50,5),
    'min_impurity_decrease': np.linspace(0,0.5,20) # 信息增益的最小值
}

In [59]:
clf = DecisionTreeClassifier(random_state=25)
grid = GridSearchCV(clf, param_grid, cv=5)
grid.fit(X_train, y_train.astype(int))
print('网格搜索-最佳度量值:', grid.best_score_) # 获取最佳度量值
print('网格搜索-最佳参数:', grid.best_params_) # 获取最佳度量值时的代定参数的值。是一个字典
print('网格搜索-最佳模型:', grid.best_estimator_) # 获取最佳度量时的分类器模型

网格搜索-最佳度量值: 0.8232189973614775
网格搜索-最佳参数: {'criterion': 'entropy', 'max_depth': 4, 'min_impurity_decrease': 0.02631578947368421, 'min_samples_leaf': 6, 'splitter': 'random'}
网格搜索-最佳模型: DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.02631578947368421,
                       min_impurity_split=None, min_samples_leaf=6,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       presort=False, random_state=25, splitter='random')
