# 任务：构建GBDT与XGBoost回归模型预测波士顿房价

In [7]:

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn import ensemble
from sklearn.metrics import mean_squared_error, r2_score
 
 
# 加载sklearn自带的波士顿房价数据集
dataset = load_boston()
 
# 提取特征数据和目标数据
X = dataset.data
y = dataset.target
 
# 将数据集以9:1的比例随机分为训练集和测试集，为了重现随机分配设置随机种子，即random_state参数
# X_train, X_test, y_train, y_test = ____________________
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=13)

In [8]:
# 实例化GBDT回归模型
# 可以调整参数，比较模型
params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,
          'learning_rate': 0.01, 'loss': 'ls'}
# gbr = ____________________
gbr = ensemble.GradientBoostingRegressor(**params)
# GBDT回归模型拟合训练数据
# ____________________
gbr.fit(X_train, y_train)
# 训练好的GBDT回归模型对测试数据进行预测
# y_pred = ____________________
y_pred = gbr.predict(X_test)
# 输出特征重要性列表
print(gbr.feature_importances_)
# 输出模型的均方误差与R方分数
# ____________________
# ____________________
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))
# 利用Scikit中xgboost构建XGBoost回归模型预测波士顿房价，与GBDT比较性能
# 请在下方补充
# ____________________
import xgboost as xgb
# 实例化XGBoost回归模型
xgb_reg = xgb.XGBRegressor(max_depth=4, n_estimators=500, learning_rate=0.01, n_jobs=10)
xgb_reg.fit(X_train, y_train)
y_pred = xgb_reg.predict(X_test)
print(xgb_reg.feature_importances_)
print('Mean squared error: %.2f' % mean_squared_error(y_test, y_pred))
print('Coefficient of determination: %.2f' % r2_score(y_test, y_pred))

[2.11994480e-02 2.30118138e-04 2.50900265e-03 3.76128570e-04
 2.98635942e-02 4.75097945e-01 8.58320121e-03 8.62155693e-02
 1.89694240e-03 1.69920928e-02 2.64336967e-02 1.54661921e-02
 3.15136069e-01]
Mean squared error: 11.55
Coefficient of determination: 0.88
[0.01858689 0.00333837 0.01090753 0.01258292 0.05032131 0.35306114
 0.01207663 0.06285112 0.00809404 0.02334753 0.02701837 0.01622056
 0.40159371]
Mean squared error: 11.49
Coefficient of determination: 0.88
