# 数据集导入，划分训练集和测试集

In [85]:
# 从sklearn 调入所需要的包
from sklearn.model_selection import train_test_split #数据分隔出训练集和验证集.
from sklearn import datasets
import numpy as np
import pandas as pd
# 导入精度和召回.
from sklearn.metrics import precision_score, recall_score

#导入鸢尾花数据
iris = datasets.load_iris()
data = iris.data
label = iris.target
data1 = pd.DataFrame(data)
data1.columns = ['sepal_l', 'sepal_w', 'petal_l', 'petal_w']
label1 = pd.DataFrame(label)
label1.columns = ['label']
print(data1.head())
print(label1.head())

   sepal_l  sepal_w  petal_l  petal_w
0      5.1      3.5      1.4      0.2
1      4.9      3.0      1.4      0.2
2      4.7      3.2      1.3      0.2
3      4.6      3.1      1.5      0.2
4      5.0      3.6      1.4      0.2
   label
0      0
1      0
2      0
3      0
4      0


In [86]:
# 划分训练集和测试集合.
train_x, test_x, train_y, test_y = train_test_split(data1, label1, test_size=0.3, random_state=42)
print('训练集长度：',len(train_y))
print('测试集长度：',len(test_y))

训练集长度： 105
测试集长度： 45


## 1.原生态xgboost的使用形式

In [87]:
import xgboost as xgb

# 转换为DMatrix数据格式.
dtrain = xgb.DMatrix(train_x,label=train_y)
dtest = xgb.DMatrix(test_x)

# 设置参数
###multi：softmax是使用softmax后产生的分类结果，而multi:softprob是输出的概率矩阵。
xgb_params = {
    'eta':0.3, #学习率
    'silent':True, #输出运行讯息
    'objective':'multi:softprob', # 使用多分类生成概率矩阵格式multi:softprob,若使用multi：softmax则产生的分类结果.
    'num_class':3, #共有3个类别
    'max_depth':3, #深度
}
num_iter = 20 #迭代的次数

# 模型训练
model = xgb.train(xgb_params, dtrain, num_iter)

# 模型预测
test_pre1 = model.predict(dtest)

# 选择表示最高概率的列
test_pre1 = np.asarray([np.argmax(row) for row in test_pre1])

# 模型评估
print('验证集精确率：', precision_score(test_y, test_pre1, average='macro'))
print('验证集召回率：', recall_score(test_y, test_pre1, average='macro'))

验证集精确率： 1.0
验证集召回率： 1.0


## 2.使用sklearn接口形式使用xgboost

In [88]:
from xgboost import XGBClassifier
model = XGBClassifier(
    learning_rate = 0.01, #学习率
    n_estimators = 3000, # 步长
    max_depth = 4, #深度
    objective='binary:logistic',
    seed=27
)
model.fit(train_x,train_y)

#预测
test_pre2 = model.predict(test_x)

# 模型评估
print('验证集精确率：', precision_score(test_y, test_pre2, average='macro'))
print('验证集召回率：', recall_score(test_y, test_pre2, average='macro'))

验证集精确率： 1.0
验证集召回率： 1.0


## 3. lightgbm原生使用形式

In [89]:
# 转换为DMatrix数据形式.
import lightgbm as lgb
dtrain = lgb.Dataset(train_x, train_y)
dtest = lgb.Dataset(test_x, test_y)

# 设置参数
lgb_params = {
   'boosting_type': 'gbdt',  
    'objective': 'multiclass',
    'metric': 'multi_error', 
    'verbose': 10 , # <0 显示致命的, =0 显示错误 (警告), >0 显示信息
     'num_class':3 #lightgbm.basic.LightGBMError: b'Number of classes should be specified and greater than 1 for multiclass training'
    }


# 模型训练
clf = lgb.train(lgb_params,dtrain,num_boost_round =10,
                valid_sets = [dtrain,dtest],
                verbose_eval = 10)
# 模型预测
test_pre3 = clf.predict(test_x, num_iteration=clf.best_iteration)

# 选择表示最高概率的列
test_pre3 = np.asarray([np.argmax(row) for row in test_pre3])

# 模型评估
print('验证集精准率：',precision_score(test_y, test_pre3, average='macro')) 
print('验证集召回率：',recall_score(test_y, test_pre3, average='macro'))  

[10]	training's multi_error: 0.0666667	valid_1's multi_error: 0
验证集精准率： 1.0
验证集召回率： 1.0


## 4. sklearn接口形式使用lightgbm

In [90]:
from lightgbm import LGBMClassifier
lgb_params = {
    'learning_rate':0.1,
    'max_bin':150,
    'num_leaves':32,
    'max_depth':11,
    'objective':'multiclass',
    'n_estimators':300
}
model = LGBMClassifier(**lgb_params)
model.fit(train_x,train_y)

# 预测
test_pre4 = model.predict(test_x)

# 模型评估
print('验证集精准率：',precision_score(test_y, test_pre4, average='macro')) 
print('验证集召回率：',recall_score(test_y, test_pre4, average='macro'))  

验证集精准率： 1.0
验证集召回率： 1.0
