## model ensemble
1.准备数据-载入各模型的预测概率  
2.融合训练-找到各模型的权重  
3.模型融合-Random Forest,LightGBM,XGBoost_LR,LFM

In [2]:
from sklearn.tree import DecisionTreeClassifier
from lightgbm.sklearn import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
import lightgbm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

### 1.准备数据

In [4]:
rf_pred = pickle.load(open("./result/rf_pred.pkl","rb")).astype('float32')[:,1]
lgbm_pred = pickle.load(open("./result/lgbm_pred.pkl","rb")).astype('float32')[:,1]
xgb_lr_pred = pickle.load(open("./result/xgb_lr_pred.pkl","rb")).astype('float32')[:,1]
lfm_pred = pickle.load(open("./result/lfm_pred.pkl","rb")).astype('float32')

In [5]:
%%time
X_train = np.hstack((rf_pred.reshape(-1,1),lgbm_pred.reshape(-1,1),\
                     xgb_lr_pred.reshape(-1,1),lfm_pred.reshape(-1,1)))

Wall time: 0 ns


In [6]:
X_train.shape

(15398, 4)

In [7]:
y_train = pickle.load(open("./result/y_train.pkl","rb")).astype('int32')

### 2.融合训练

2.1 LR

In [12]:
%%time
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(n_jobs=4)
lr.fit(X_train,y_train)

Wall time: 70 ms


  " = {}.".format(effective_n_jobs(self.n_jobs)))


In [13]:
y_proba = lr.predict_proba(X_train)
auc = roc_auc_score(y_train,y_proba[:,1])
print(auc)

0.9999979589195827


In [14]:
y_pred = lr.predict(X_train)
accuracy = accuracy_score(y_train,y_pred)
print(accuracy)

0.9994155085075984


In [15]:
lr.coef_

array([[1.78050694, 8.35414883, 2.78591986, 1.42960469]])

2.2 LRCV

In [23]:
%%time
from sklearn.linear_model import LogisticRegressionCV
lrcv = LogisticRegressionCV(n_jobs=4,cv=5,random_state=6)
lrcv.fit(X_train,y_train)

Wall time: 2min 18s


In [24]:
y_proba_cv = lrcv.predict_proba(X_train)
auc_cv = roc_auc_score(y_train,y_proba_cv[:,1])
print(auc_cv)

0.8005876565437465


2.3 Tree

In [8]:
%%time
tree = DecisionTreeClassifier(max_depth=8,min_samples_split=50,random_state=6)
tree.fit(X_train,y_train)

Wall time: 10.1 ms


In [9]:
y_proba_tree = tree.predict_proba(X_train)

In [10]:
auc_tree = roc_auc_score(y_train,y_proba_tree[:,1])
print(auc_tree)

0.999999914059772


In [11]:
tree.feature_importances_

array([0., 1., 0., 0.])

### 3.模型融合

In [18]:
blend_w = dict(rf=0.1,lgbm=0.6,xgb_lr=0.22,lfm=0.08)
blend_pred = rf_pred*blend_w['rf']+lgbm_pred*blend_w['lgbm']+\
       xgb_lr_pred*blend_w['xgb_lr']+lfm_pred*blend_w['lfm']

In [19]:
blend_auc = roc_auc_score(y_train,blend_pred)
print(blend_auc)

0.9999980878299248
