# 基于模型泛化效果、可解释性考虑，初定选定以下三种模型框架进行尝试

## 1.具有强解释性的决策树模型

## 2.以bagging思想进行集成集成的随机森林模型

## 3.以boosting思想进行集成的xgb模型

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree  
import xgboost as xgb
from sklearn.model_selection import GridSearchCV 
from sklearn.metrics import roc_auc_score
import joblib

In [2]:
def auc_score_fig(clf,x,y_test):  #计算auc，三参数分别为分类器，特征与标签
    y_probs = clf.predict_proba(x)
    y_probs = y_probs[:,1]
    auc = roc_auc_score(y_test, y_probs)
    return auc

In [3]:
#读取数据
df_train = pd.read_csv(r"C:\Users\17738\贷款违约率预测\数据集\train_clean.csv")
df_test = pd.read_csv(r"C:\Users\17738\贷款违约率预测\数据集\test_clean.csv")

在训练数据中单独划分一个测试集用来评估模型的泛化效果

In [4]:
train_data, test_data = train_test_split(df_train, test_size=0.2, random_state=1)   

In [5]:
train_data.groupby("isDefault").count()

Unnamed: 0_level_0,id,loanAmnt,term,interestRate,installment,grade,subGrade,employmentTitle,employmentLength,homeOwnership,...,n5,n6,n7,n8,n9,n10,n11,n12,n13,n14
isDefault,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,512351,512351,512351,512351,512351,512351,512351,512351,512351,512351,...,512351,512351,512351,512351,512351,512351,512351,512351,512351,512351
1,127649,127649,127649,127649,127649,127649,127649,127649,127649,127649,...,127649,127649,127649,127649,127649,127649,127649,127649,127649,127649


In [6]:
test_data.groupby("isDefault").count()

Unnamed: 0_level_0,id,loanAmnt,term,interestRate,installment,grade,subGrade,employmentTitle,employmentLength,homeOwnership,...,n5,n6,n7,n8,n9,n10,n11,n12,n13,n14
isDefault,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,128039,128039,128039,128039,128039,128039,128039,128039,128039,128039,...,128039,128039,128039,128039,128039,128039,128039,128039,128039,128039
1,31961,31961,31961,31961,31961,31961,31961,31961,31961,31961,...,31961,31961,31961,31961,31961,31961,31961,31961,31961,31961


In [7]:
#将特征与标签从数据集中剥离出来
train_features = train_data.drop(columns = ["isDefault"]).copy()
train_target = train_data[["isDefault"]].copy()

test_features = test_data.drop(columns = ["isDefault"]).copy()
test_target = test_data[["isDefault"]].copy()


In [8]:
# max_depth:决策树最大深度。初步可以取5-10,搜索范围一般为3-15。太小会欠拟合,太大会过拟合。
# min_samples_split:分割节点时最小样本数。初步取2,搜索范围一般为2-100。太小容易过拟合,太大学习不足。
# min_samples_leaf:叶子节点最小样本数。初步取1,搜索范围1-50。原理同上。
# max_features:分割时考虑的最大特征数。对于分类一般取sqrt(n_features),搜索范围可以取0.5-1.0倍的特征数。
# criterion:特征选择准则,gini或entropy,二者效果相近。
# max_leaf_nodes:最大叶子节点数,一般不需要限制,可以搜索100-500个节点数的范围。
# min_impurity_decrease:提前停止划分的纯度增益阈值,默认不需要调整。
# class_weight:类别权重,如果类别不均衡可以尝试平衡一下权重。

# 决策树


确定决策树的核心参数以及参数范围

In [9]:
max_depth = [i for i in range(5,11,2)] 
min_samples_split = [i for i in range(2,100,20)] 
min_samples_leaf = [i for i in range(1,50,10)] 
max_features = [i/10 for i in range(1,10,2)] 
criterion = ["gini","entropy"]
max_leaf_nodes = [100,500,50]
class_weight = [{0:1,1:1},{0:2,1:1},{0:3,1:1},{0:4,1:1}]

将参数设定为网格搜索可使用的字典形式，因排列组合过多，可分批次进行搜索探索

In [10]:
params_state1 = {"max_depth":max_depth,"min_samples_split":min_samples_split,"min_samples_leaf":min_samples_leaf}
params_state2 = {"max_features":max_features,"criterion":criterion,"max_leaf_nodes":max_leaf_nodes}
params_state3 = {"class_weight":class_weight}

In [11]:
best_params = {}
clf_dt = tree.DecisionTreeClassifier()
for each in [params_state1,params_state2,params_state3]:
    clf_dt_mid = GridSearchCV(estimator=clf_dt, param_grid=each, scoring='roc_auc', cv=5,verbose=2)
    clf_dt_mid.fit(train_features,train_target)
    best_params.update(clf_dt_mid.best_params_)
    clf_dt = tree.DecisionTreeClassifier(**best_params)
#模型训练
clf_dt.fit(train_features,train_target)

Fitting 5 folds for each of 75 candidates, totalling 375 fits
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2; total time=   4.1s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2; total time=   4.3s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2; total time=   4.3s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2; total time=   4.3s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=2; total time=   4.3s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=22; total time=   4.2s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=22; total time=   4.2s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=22; total time=   4.3s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=22; total time=   4.3s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=22; total time=   4.2s
[CV] END max_depth=5, min_samples_leaf=1, min_samples_split=42; total time=   4.3s
[CV] END max_depth=5, min_samp

[CV] END max_depth=5, min_samples_leaf=31, min_samples_split=82; total time=   4.5s
[CV] END max_depth=5, min_samples_leaf=31, min_samples_split=82; total time=   4.4s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=2; total time=   4.5s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=2; total time=   4.2s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=2; total time=   4.4s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=2; total time=   4.4s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=2; total time=   4.5s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=22; total time=   4.4s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=22; total time=   4.4s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=22; total time=   4.4s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=22; total time=   4.3s
[CV] END max_depth=5, min_samples_leaf=41, min_samples_split=22; total time=   4.

[CV] END max_depth=7, min_samples_leaf=21, min_samples_split=82; total time=   5.9s
[CV] END max_depth=7, min_samples_leaf=21, min_samples_split=82; total time=   5.9s
[CV] END max_depth=7, min_samples_leaf=21, min_samples_split=82; total time=   5.9s
[CV] END max_depth=7, min_samples_leaf=31, min_samples_split=2; total time=   5.9s
[CV] END max_depth=7, min_samples_leaf=31, min_samples_split=2; total time=   6.0s
[CV] END max_depth=7, min_samples_leaf=31, min_samples_split=2; total time=   6.1s
[CV] END max_depth=7, min_samples_leaf=31, min_samples_split=2; total time=   5.9s
[CV] END max_depth=7, min_samples_leaf=31, min_samples_split=2; total time=   5.8s
[CV] END max_depth=7, min_samples_leaf=31, min_samples_split=22; total time=   5.9s
[CV] END max_depth=7, min_samples_leaf=31, min_samples_split=22; total time=   5.9s
[CV] END max_depth=7, min_samples_leaf=31, min_samples_split=22; total time=   5.9s
[CV] END max_depth=7, min_samples_leaf=31, min_samples_split=22; total time=   6.

[CV] END max_depth=9, min_samples_leaf=11, min_samples_split=82; total time=   7.6s
[CV] END max_depth=9, min_samples_leaf=11, min_samples_split=82; total time=   7.6s
[CV] END max_depth=9, min_samples_leaf=11, min_samples_split=82; total time=   7.6s
[CV] END max_depth=9, min_samples_leaf=11, min_samples_split=82; total time=   7.7s
[CV] END max_depth=9, min_samples_leaf=21, min_samples_split=2; total time=   7.5s
[CV] END max_depth=9, min_samples_leaf=21, min_samples_split=2; total time=   7.5s
[CV] END max_depth=9, min_samples_leaf=21, min_samples_split=2; total time=   7.5s
[CV] END max_depth=9, min_samples_leaf=21, min_samples_split=2; total time=   7.4s
[CV] END max_depth=9, min_samples_leaf=21, min_samples_split=2; total time=   7.5s
[CV] END max_depth=9, min_samples_leaf=21, min_samples_split=22; total time=   7.4s
[CV] END max_depth=9, min_samples_leaf=21, min_samples_split=22; total time=   7.4s
[CV] END max_depth=9, min_samples_leaf=21, min_samples_split=22; total time=   7.

[CV] END criterion=gin, max_features=0.3, max_leaf_nodes=100; total time=   0.2s
[CV] END criterion=gin, max_features=0.3, max_leaf_nodes=500; total time=   0.2s
[CV] END criterion=gin, max_features=0.3, max_leaf_nodes=500; total time=   0.2s
[CV] END criterion=gin, max_features=0.3, max_leaf_nodes=500; total time=   0.2s
[CV] END criterion=gin, max_features=0.3, max_leaf_nodes=500; total time=   0.2s
[CV] END criterion=gin, max_features=0.3, max_leaf_nodes=500; total time=   0.2s
[CV] END .criterion=gin, max_features=0.3, max_leaf_nodes=50; total time=   0.2s
[CV] END .criterion=gin, max_features=0.3, max_leaf_nodes=50; total time=   0.2s
[CV] END .criterion=gin, max_features=0.3, max_leaf_nodes=50; total time=   0.2s
[CV] END .criterion=gin, max_features=0.3, max_leaf_nodes=50; total time=   0.2s
[CV] END .criterion=gin, max_features=0.3, max_leaf_nodes=50; total time=   0.2s
[CV] END criterion=gin, max_features=0.5, max_leaf_nodes=100; total time=   0.2s
[CV] END criterion=gin, max_

[CV] END criterion=entropy, max_features=0.5, max_leaf_nodes=50; total time=   3.4s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=100; total time=   5.4s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=100; total time=   5.5s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=100; total time=   5.3s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=100; total time=   5.6s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=100; total time=   5.4s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=500; total time=   6.1s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=500; total time=   6.1s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=500; total time=   6.1s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=500; total time=   6.2s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=500; total time=   6.3s
[CV] END criterion=entropy, max_features=0.7, max_leaf_nodes=50; t

75 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\17738\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\17738\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 937, in fit
    super().fit(
  File "C:\Users\17738\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 352, in fit
    criterion = CRITERIA_CLF[self.criterion](
KeyError: 'gin'

        nan        nan        nan        nan        nan        nan
        nan        nan        nan 0.68610728 0.69422703 0.67869643
 0.70374736

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] END ..........................class_weight={0: 1, 1: 1}; total time=   8.2s
[CV] END ..........................class_weight={0: 1, 1: 1}; total time=   8.2s
[CV] END ..........................class_weight={0: 1, 1: 1}; total time=   8.2s
[CV] END ..........................class_weight={0: 1, 1: 1}; total time=   8.9s
[CV] END ..........................class_weight={0: 1, 1: 1}; total time=   8.5s
[CV] END ..........................class_weight={0: 2, 1: 1}; total time=   8.6s
[CV] END ..........................class_weight={0: 2, 1: 1}; total time=   8.3s
[CV] END ..........................class_weight={0: 2, 1: 1}; total time=   8.2s
[CV] END ..........................class_weight={0: 2, 1: 1}; total time=   8.1s
[CV] END ..........................class_weight={0: 2, 1: 1}; total time=   8.2s
[CV] END ..........................class_weight={0: 3, 1: 1}; total time=   8.2s
[CV] END ..........................class_weight={

In [18]:
#打印模型最优的参数与分数
# print(clf_dt.best_params_)
# print(clf_dt.best_score_)
clf_dt.get_params()

{'ccp_alpha': 0.0,
 'class_weight': {0: 4, 1: 1},
 'criterion': 'entropy',
 'max_depth': 9,
 'max_features': 0.9,
 'max_leaf_nodes': 100,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 82,
 'min_weight_fraction_leaf': 0.0,
 'random_state': None,
 'splitter': 'best'}

In [20]:
#输出模型最终在测试集上的表现
clf_dt_auc = auc_score_fig(clf_dt,test_features,test_target)
print("决策树模型最终在测试集上的auc为：",clf_dt_auc)

决策树模型最终在测试集上的auc为： 0.7088282541775917


In [21]:
#模型保存
joblib.dump(clf_dt, r'C:\Users\17738\贷款违约率预测\模型存储\决策树_风控模型.pkl')

['C:\\Users\\17738\\贷款违约率预测\\模型存储\\决策树_风控模型.pkl']

# 随机森林

In [26]:
# n_estimators: 树的个数,典型范围100-500
# max_depth: 树的最大深度,典型范围10-100
# min_samples_split: 分割节点的最小样本数,典型范围2-20
# min_samples_leaf: 叶子节点的最小样本数,典型范围1-10
# max_features:划分时考虑的最大特征数量,一般取所有特征的50% - 100%
# bootstrap:是否使用放回采样,通常设为True

In [27]:
n_estimators =[i for i in range(100,500,200)]
max_depth = [i for i in range(10,100,40)] 
min_samples_split = [i for i in range(2,20,8)] 
min_samples_leaf = [i for i in range(1,10,3)] 
max_features = [i/10 for i in range(5,10,2)] 

In [28]:
params_state1 = {"n_estimators":n_estimators,"max_depth":max_depth}
params_state2 = {"min_samples_split":min_samples_split,"min_samples_leaf":min_samples_leaf}
params_state3 = {"max_features":max_features}

In [None]:
best_params = {}
clf_rf = RandomForestClassifier()
for each in [params_state1,params_state2,params_state3]:
    clf_rf_mid = GridSearchCV(estimator=clf_rf, param_grid=each, scoring='roc_auc', cv=5,verbose=2)
    clf_rf_mid.fit(train_features,train_target)
    best_params.update(clf_rf_mid.best_params_)
    clf_rf = tree.DecisionTreeClassifier(**best_params)
#模型训练
clf_rf.fit(train_features,train_target)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=100; total time= 1.7min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=100; total time= 1.7min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=100; total time= 1.7min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=100; total time= 1.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=100; total time= 1.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=300; total time= 6.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=300; total time= 5.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=300; total time= 5.3min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=300; total time= 5.3min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=10, n_estimators=300; total time= 5.2min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=100; total time= 3.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=100; total time= 3.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=100; total time= 3.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=100; total time= 3.5min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=100; total time= 3.5min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=300; total time=10.7min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=300; total time=12.9min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=300; total time=11.5min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=300; total time=11.2min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=50, n_estimators=300; total time=10.7min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=90, n_estimators=100; total time= 3.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=90, n_estimators=100; total time= 3.7min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=90, n_estimators=100; total time= 3.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=90, n_estimators=100; total time= 3.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=90, n_estimators=100; total time= 3.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=90, n_estimators=300; total time=10.6min


  estimator.fit(X_train, y_train, **fit_params)


[CV] END .....................max_depth=90, n_estimators=300; total time=10.6min


  estimator.fit(X_train, y_train, **fit_params)


In [None]:
clf_rf.get_params()

In [None]:
#输出模型最终在测试集上的表现
clf_rf_auc = auc_score_fig(clf_rf,test_features,test_target)
print("决策树模型最终在测试集上的auc为：",clf_rf_auc)

In [None]:
#模型保存
joblib.dump(clf_rf, r'C:\Users\17738\贷款违约率预测\模型存储\随机森林_风控模型.pkl')

# XGBOOST

In [None]:
n_estimators_list=[i for i in range(1,121,40)]   
learning_rate_list=[i/100 for i in range(1,42,5)]  
max_depth_list=[i for i in range(3,11,4)]
max_delta_step_list=[i for i in range(1,11,4)]  
min_child_weight_list=[i for i in range(1,13,4)]
gamma_list=[i/10 for i in range(1,11,4)]
subsample_list=[i/10 for i in range(5,11,4)]
colsample_bytree_list=[i/10 for i in range(5,11,4)]
scale_pos_weight_list=[i/10 for i in range(5,42,15)]   
reg_alpha_list=[0, 0.001, 0.005, 0.01,0.05, 0.1, 1, 2, 3]
reg_lambda_list=[0, 0.001, 0.005, 0.01,0.05, 0.1, 1, 2, 3]

In [None]:
params_state1 = {'n_estimators': n_estimators_list,'learning_rate':learning_rate_list,'objective':['binary:logistic']}
params_state2 = {'scale_pos_weight': scale_pos_weight_list,'max_delta_step':max_delta_step_list}
params_state3 = {'max_depth':max_depth_list,'min_child_weight':min_child_weight_list,'gamma':gamma_list}
params_state4 = {'subsample':subsample_list,'colsample_bytree':colsample_bytree_list}
params_state5 = {'reg_alpha':reg_alpha_list,'reg_lambda':reg_lambda_list}

In [None]:
best_params = {}
clf_xgb = XGBClassifier()
for each in [params_state1,params_state2,params_state3]:
    clf_xgb_mid = GridSearchCV(estimator=clf_xgb, param_grid=each, scoring='roc_auc', cv=5,verbose=2)
    clf_xgb_mid.fit(train_features,train_target)
    best_params.update(clf_xgb_mid.best_params_)
    clf_xgb = tree.DecisionTreeClassifier(**best_params)
#模型训练
clf_xgb.fit(train_features,train_target)

In [None]:
clf_xgb.get_params()

In [None]:
#输出模型最终在测试集上的表现
clf_xgb_auc = auc_score_fig(clf_xgb,test_features,test_target)
print("决策树模型最终在测试集上的auc为：",clf_xgb_auc)

In [None]:
#模型保存
joblib.dump(clf_xgb, r'C:\Users\17738\贷款违约率预测\模型存储\随XGB_风控模型.pkl')

In [None]:
#项目代码仅为展现本人在数据挖掘方面的技能。因时间与算力有限，当前内容仅完成模型搭建的核心环节，后期会逐步完善以下内容：
1.特征衍生：
    时序特征衍生：依据生活规律对时序特征进行拓展衍生
    特征交叉衍生：将相关特征利用常用统计函数/线性组合等方式进行交叉组合衍生；
    经验衍生：依据业务经验，重新构造一些可解释行的特征；
2.调参优化：
    尝试使用随机调参与贝叶斯调参进行参数组合的探索；
3.模型融合：
    尝试使用平均法与投票法进行模型融合；
4.AB测试模块编写：
    编写AB小样本检验逻辑，测试不同模型的实际执行效果；