In [None]:
# 演示LightGBM分类模型和回归模型：
X = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
y = [1, 2, 3, 4, 5]

# 从两个模型中选择一个：
from lightgbm import LGBMClassifier
# 分类模型
model = LGBMClassifier()

from lightgbm import LGBMRegressor
# 回归模型
model = LGBMRegressor()

model.fit(X, y)

In [None]:
# 前三步：读取数据，提取特征变量和目标变量，划分训练集和测试集
# 都与XGBoost模型相同

# 1.读取数据
import pandas as pd
df = pd.read_excel('客户信息与违约表现.xlsx')
# 2.提取特征变量和目标变量
X = df.drop(columns='是否违约')
y = df['是否违约']
# 3.划分训练集和测试集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, random_state=123)

# 4.模型训练和搭建
from lightgbm import LGBMClassifier
model = LGBMClassifier()
model.fit(X_train, y_train)

# 5.模型预测及评估
# 预测值与实际值对比
y_pred = model.predict(X_test)
a = pd.DataFrame()
a['预测值'] = list(y_pred)
a['实际值'] = list(y_test)
# 查看模型整体的预测准确度
from sklearn.metrics import accuracy_score
score = accuracy_score(y_pred, y_test)
model.score(X_test, y_test)
# 还可以通过绘制ROC曲线来评估模型预测的效果
y_pred_proba = model.predict_proba(X_test)
from sklearn.metrics import roc_curve
fpr, tpr, thres = roc_curve(y_test, y_pred_proba[:,1])
import matplotlib.pyplot as plt
plt.plot(fpr, tpr)
plt.show()
# 求出模型的AUC值
from sklearn.metrics import roc_auc_score
score = roc_auc_score(y_test.values, y_pred_proba[:,1])
# 整理特征重要性，方便结果呈现
features = X.columns # 获取特征名称
importances = model.feature_importances_ # 获取特征重要性
# 通过二维表格形式显示
importances_df = pd.DataFrame()
importances_df['特征名称'] = features
importances_df['特征重要性'] = importances
importances_df.sort_values('特征重要性', ascending=False)

# 模型参数调优
from sklearn.model_selection import GridSearchCv
parameters = {
    'num_leaves': [10, 15, 31],
    'n_estimators': [10, 20, 30],
    'learning_rate': [0.05, 0.1, 0.2]
}
model = LGBMClassfier()
grid_search = GridSearchCv(model, parameters, scoring='roc_auc', cv=5)
grid_search.fit(X_train, y_train) # 传入数据
grid_search.best_params_ # 输出参数的最优值