In [27]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from xgboost import XGBClassifier

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.datasets import load_svmlight_file
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.externals import joblib
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.externals import joblib
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import Perceptron
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from scipy.sparse import hstack
import time, os, random, sys
import math
import hyperopt.tpe
import hpsklearn.components
import hpsklearn.demo_support
import warnings
warnings.filterwarnings('ignore')
np.random.seed(1)

In [39]:
def get_leaf_indices(ensemble, x):
    x = x.astype(np.float32)
    trees = ensemble.estimators_
    n_trees = trees.shape[0]
    indices = []

    for i in range(n_trees):
        tree = trees[i][0].tree_
        indices.append(tree.apply(x))

    indices = np.column_stack(indices)
    return indices

def gbdt_lr_train_test(File):
    #GBDT
    start = time.clock()
    train_df, test_df = train_test_split(File, train_size = 0.8)
    X_train = train_df.drop(train_df.columns[0], axis=1)
    y_train = train_df[train_df.columns[0]]
    X_test = test_df.drop(test_df.columns[0], axis=1)
    y_test = test_df[test_df.columns[0]]
  
    gbclf = GradientBoostingClassifier(n_estimators=20, max_depth=4, verbose=0)
    tuned_parameter = [{'n_estimators':[20,30,40,50], 'max_depth':[5, 6, 7, 8], 'max_features':[0.5]}]
    gs_clf = GridSearchCV(gbclf, tuned_parameter, cv=5, scoring='roc_auc')
    gs_clf.fit(X_train, y_train)
    print('best parameters set found: ')
    print(gs_clf.best_params_)
    
    y_pred_gbdt = gs_clf.predict_proba(X_test)[:, 1]
    gbdt_auc = roc_auc_score(y_test, y_pred_gbdt)
    print('GBDT AUC: %.5f' % gbdt_auc)
    
    gbclf=GradientBoostingClassifier(**gs_clf.best_params_)
    gbclf.fit(X_train,y_train)
    leaf = get_leaf_indices
    X_train_leaves=leaf(gbclf,X_train.values)
    (train_rows, cols) = X_train_leaves.shape
    X_test_leaves=leaf(gbclf,X_test.values)
    gbdtenc = OneHotEncoder()
    X_trans = gbdtenc.fit_transform(np.concatenate((X_train_leaves, X_test_leaves), axis=0))
    
    #GBDT+LR
    lr = LogisticRegression()
    lr.fit(X_trans[:train_rows,:], y_train)
    y_pred_gbdtlr1 = lr.predict(X_trans[train_rows:,:])
    gbdtlr_auc1 = roc_auc_score(y_test, y_pred_gbdtlr1)
    print('GBDT + LR AUC 1: %.5f' % gbdtlr_auc1)
    
    lr = LogisticRegression(n_jobs=-1)
    X_train_ext = hstack([X_trans[:train_rows,:], X_train])
    lr.fit(X_train_ext, y_train)
    X_test_ext = hstack([X_trans[train_rows:, :], X_test])
    y_pred_gbdtlr2 = lr.predict(X_test_ext)
    gbdtlr_auc2 = roc_auc_score(y_test, y_pred_gbdtlr2)
    print('GBDT + LR AUC 2: %.5f' % gbdtlr_auc2)
    f_time =time.clock()-start
    print('GBDT time taken: %.2f'% f_time)
    
    '''#+NB
    gnb= GaussianNB()
    gnb.fit(X_train_leaves, y_train)
    Y_pred_nb=gbn.predic(X_test_leaves)
    gnb_auc = roc_auc_score(y_test,Y_pred_nb)
    print('NB auc: ', gnb_auc)'''
    start = time.clock()
    
    #svc
    svc=SVC(probability=True)
    svc.fit(X_trans[:train_rows, :], y_train)
    Y_pred_svc=svc.predict(X_trans[train_rows:,:])
    svc_auc=roc_auc_score(y_test,Y_pred_svc)
    print('GBDT + SVC auc: %.5f' % svc_auc)
    svc.fit(X_train_ext, y_train)
    y_pred_svc2=svc.predict(X_test_ext)
    svc_auc2=roc_auc_score(y_test,y_pred_svc2)
    print('GBDT + SVC auc2: %.5f' % svc_auc2)
    
    #KNN
    knn=KNeighborsClassifier(n_neighbors = 3)
    knn.fit(X_trans[:train_rows, :], y_train)
    Y_pred_knn=knn.predict(X_trans[train_rows:,:])
    knn_auc=roc_auc_score(y_test,Y_pred_knn)
    print('GBDT + KNN auc : %.5f' % knn_auc)
    knn.fit(X_train_ext, y_train)
    y_pred_knn2=knn.predict(X_test_ext)
    knn_auc2=roc_auc_score(y_test,y_pred_knn2)
    print('GBDT + KNN auc2: %.5f' % knn_auc2)
    
    '''#perceptron
    perceptron = Perceptron()
    perceptron.fit(X_trans[:train_rows, :], y_train)
    Y_pred_prec=perceptron.predict(X_trans[train_rows:, :])[:, 1]
    perc_auc=roc_auc_score(y_test,Y_pred_prec)
    print('Perceptron auc : ', perc_auc)'''
    
    '''#linear svc
    lin = LinearSVC()
    lin.fit(X_trans[:train_rows, :], y_train)
    Y_pred_lin=lin.predict(X_trans[train_rows:, :])[:, 1]
    lin_auc=roc_auc_score(y_test,Y_pred_lin)
    print('Linear SVC auc : ', lin_auc)'''
    
    #SGD
    sgd = SGDClassifier(loss='log')
    sgd.fit(X_trans[:train_rows, :], y_train)
    Y_pred_sgd=sgd.predict_proba(X_trans[train_rows:, :])[:, 1]
    sgd_auc=roc_auc_score(y_test,Y_pred_sgd)
    print('GBDT + SGD auc : %.5f' % sgd_auc)
    
    sgd.fit(X_train_ext, y_train)
    Y_pred_sgd2=sgd.predict_proba(X_test_ext)[:, 1]
    sgd_auc2=roc_auc_score(y_test,Y_pred_sgd2)
    print('GBDT + SGD auc2 : %.5f' % sgd_auc2)
    f_time =time.clock()-start
    print('GBDT + other classfier time taken: %.2f'% f_time)    
    
    #XGB
    start = time.clock()
    xgb=XGBClassifier()
    xgb.fit(X_train,y_train)
    Y_pred_xgb=xgb.predict_proba(X_test)[:,1]
    xgb_auc= roc_auc_score(y_test,Y_pred_xgb)
    print('XGB auc : %.5f' % xgb_auc)
    
    #XGB with leaves

    xgb.fit(X_trans[:train_rows,:],y_train)
    Y_pred_xgb=xgb.predict(X_trans[train_rows:,:])
    xgb_auc= roc_auc_score(y_test,Y_pred_xgb)
    print('GBDT + XGB auc: %.5f' % xgb_auc)
    
    #XGB with features ext
    
    xgb.fit(X_train_ext, y_train)
    y_pred_xgb2=xgb.predict(X_test_ext)
    xgb_auc2=roc_auc_score(y_test,y_pred_xgb2)
    print('GBDT + XGB auc2: %.5f' %xgb_auc2)
    
    f_time =time.clock()-start
    print('XGB time taken: %.2f'% f_time)
    
    #lightGBM
    start = time.clock()
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
    
    
    
    # specify your configurations as a dict
    params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': {'l2', 'auc'},
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
    
    }

    # train
    gbm = lgb.train(params,
                    lgb_train,
                    num_boost_round=20,
                    valid_sets=lgb_eval,
                    early_stopping_rounds=5,
                    verbose_eval=False)



    y_pred_lgb = gbm.predict(X_test, num_iteration=gbm.best_iteration)
    # eval
    lgb_auc=roc_auc_score(y_test,y_pred_lgb)
    print('lightGBM auc : %.5f' % lgb_auc)
    
    lgb_train=lgb.Dataset(X_trans[:train_rows,:], y_train)
    lgb_eval=lgb.Dataset(X_trans[train_rows:,:], y_test, reference=lgb_train)
    gbm = lgb.train(params,
                  lgb_train,
                    num_boost_round=20,
                    valid_sets=lgb_eval,
                    early_stopping_rounds=5,
                   verbose_eval=False)
    y_pred_lgb2 =gbm.predict(X_trans[train_rows:,:], num_iteration=gbm.best_iteration)
    lgb_auc2=roc_auc_score(y_test, y_pred_lgb2)
    
    print('GBDT + lightGBM auc : %.5f' % lgb_auc2)
    
    lgb_train=lgb.Dataset(X_train_ext, y_train)
    lgb_eval=lgb.Dataset(X_test_ext, y_test, reference=lgb_train)
    gbm = lgb.train(params,
                  lgb_train,
                    num_boost_round=20,
                    valid_sets=lgb_eval,
                    early_stopping_rounds=5,
                   verbose_eval=False)
    y_pred_lgb3 =gbm.predict(X_test_ext, num_iteration=gbm.best_iteration)
    lgb_auc3=roc_auc_score(y_test, y_pred_lgb3)
    
    print('GBDT + lightGBM auc2 : %.5f' % lgb_auc3)
    
    
    f_time=time.clock()-start
    print('lightGBM time taken: %.2f'% f_time)

In [5]:
example = pd.read_csv('example.csv')

In [68]:
gbdt_lr_train_test(example) #GBDT build up & trial 1

best parameters set found: 
{'max_depth': 6, 'max_features': 0.7, 'n_estimators': 60}
gbdt auc: 0.93692
gbdt+lr auc 1: 0.78679
gbdt+lr auc 2: 0.55240
GBDT time taken:  104.3263780000002
XGB auc :  0.855922788101
XGB time taken:  0.7697490000000471


In [69]:
gbdt_lr_train_test(example) # GBDT trial 2

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 60}
gbdt auc: 0.94193
gbdt+lr auc 1: 0.78375
gbdt+lr auc 2: 0.58599
GBDT time taken:  103.21583099999998
XGB auc :  0.860663462535
XGB time taken:  0.7435100000002421


In [70]:
gbdt_lr_train_test(example) # GBDT trial 3

best parameters set found: 
{'max_depth': 6, 'max_features': 0.7, 'n_estimators': 60}
gbdt auc: 0.94117
gbdt+lr auc 1: 0.60895
gbdt+lr auc 2: 0.56951
GBDT time taken:  108.03956199999993
XGB auc :  0.865814311764
XGB time taken:  0.742572999999993


In [99]:
gbdt_lr_train_test(example) # GBDT + other classifier & trial 1

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 50}
gbdt auc: 0.93359
gbdt+lr auc 1: 0.25407
gbdt+lr auc 2: 0.60878
GBDT time taken:  102.75081399999999
SVC auc:  0.848760550543
KNN auc :  0.812629248939
SGD auc :  0.243075129057
other classfier time taken:  110.75349000000006
XGB auc :  0.847886444754
XGB time taken:  0.7494470000001456


In [100]:
gbdt_lr_train_test(example) # trial 2

best parameters set found: 
{'max_depth': 6, 'max_features': 0.7, 'n_estimators': 60}
gbdt auc: 0.94057
gbdt+lr auc 1: 0.70105
gbdt+lr auc 2: 0.57825
GBDT time taken:  103.17751399999997
SVC auc:  0.682340892043
KNN auc :  0.730245417652
SGD auc :  0.628413440271
other classfier time taken:  112.02526200000011
XGB auc :  0.8586925163
XGB time taken:  0.7487710000000334


In [101]:
gbdt_lr_train_test(example) # trial 3

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 60}
gbdt auc: 0.93655
gbdt+lr auc 1: 0.78378
gbdt+lr auc 2: 0.56882
GBDT time taken:  109.79684799999995
SVC auc:  0.774571556096
KNN auc :  0.797352959283
SGD auc :  0.723399732199
other classfier time taken:  118.22488999999996
XGB auc :  0.845128206115
XGB time taken:  0.7863509999997405


In [103]:
gbdt_lr_train_test(example) # tiral 4

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 50}
gbdt auc: 0.93189
gbdt+lr auc 1: 0.74161
gbdt+lr auc 2: 0.67170
GBDT time taken: 106.68
SVC auc: 0.85464
KNN auc : 0.77898
SGD auc :  0.679502246803
other classfier time taken: 8.38
XGB auc : 0.85209
XGB time taken: 0.80


In [105]:
gbdt_lr_train_test(example) # trial 5

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 50}
gbdt auc: 0.93098
gbdt+lr auc 1: 0.85676
gbdt+lr auc 2: 0.59048
GBDT time taken: 107.33
SVC auc: 0.83235
KNN auc : 0.78298
SGD auc : 0.78370
other classfier time taken: 7.92
XGB auc : 0.84239
XGB time taken: 0.75


In [107]:
gbdt_lr_train_test(example) # GBDT other classifier with hstack applied trial 1

best parameters set found: 
{'max_depth': 6, 'max_features': 0.7, 'n_estimators': 60}
gbdt auc: 0.93766
gbdt+lr auc 1: 0.59738
gbdt+lr auc 2: 0.69980
GBDT time taken: 108.29
SVC auc: 0.25691
SVC auc2: 0.64916
KNN auc : 0.36443
KNN auc2: 0.59169
SGD auc : 0.56503
other classfier time taken: 45.78
XGB auc : 0.84985
XGB time taken: 0.75


In [111]:
gbdt_lr_train_test(example) # trial 2

best parameters set found: 
{'max_depth': 6, 'max_features': 0.7, 'n_estimators': 60}
gbdt auc: 0.94607
gbdt+lr auc 1: 0.71121
gbdt+lr auc 2: 0.57279
GBDT time taken: 105.11
SVC auc: 0.87507
SVC auc2: 0.63568
KNN auc : 0.83690
KNN auc2: 0.57425
SGD auc : 0.65079
SGD auc2 : 0.70418
other classfier time taken: 46.35
XGB auc : 0.86396
XGB time taken: 0.76


In [112]:
gbdt_lr_train_test(example) # trial 3

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 50}
gbdt auc: 0.94345
gbdt+lr auc 1: 0.71894
gbdt+lr auc 2: 0.66335
GBDT time taken: 102.57
SVC auc: 0.73859
SVC auc2: 0.64794
KNN auc : 0.80989
KNN auc2: 0.60316
SGD auc : 0.65876
SGD auc2 : 0.69113
other classfier time taken: 44.69
XGB auc : 0.85763
XGB time taken: 0.76


In [120]:
gbdt_lr_train_test(example) # best param applied trial 1

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 60}
gbdt auc: 0.93754
gbdt+lr auc 1: 0.31657
gbdt+lr auc 2: 0.60585
GBDT time taken: 112.12
SVC auc: 0.73184
SVC auc2: 0.64691
KNN auc : 0.65834
KNN auc2: 0.58379
SGD auc : 0.35641
SGD auc2 : 0.28890
other classfier time taken: 100.33
XGB auc : 0.85757
XGB time taken: 0.76


In [121]:
gbdt_lr_train_test(example) # best param applied trial 2

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 60}
gbdt auc: 0.94696
gbdt+lr auc 1: 0.51043
gbdt+lr auc 2: 0.78554
GBDT time taken: 104.17
SVC auc: 0.55622
SVC auc2: 0.63785
KNN auc : 0.56019
KNN auc2: 0.63233
SGD auc : 0.52737
SGD auc2 : 0.40444
other classfier time taken: 98.07
XGB auc : 0.87152
XGB time taken: 0.74


In [123]:
gbdt_lr_train_test(example) # random state applied trial 1

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 60}
gbdt auc: 0.94504
gbdt+lr auc 1: 0.64715
gbdt+lr auc 2: 0.77241
GBDT time taken: 106.52
SVC auc: 0.52633
SVC auc2: 0.63692
KNN auc : 0.56709
KNN auc2: 0.60222
SGD auc : 0.71175
SGD auc2 : 0.66544
other classfier time taken: 99.37
XGB auc : 0.86043
XGB time taken: 0.75


In [124]:
gbdt_lr_train_test(example) # random state applied trial 2

best parameters set found: 
{'max_depth': 6, 'max_features': 0.7, 'n_estimators': 60}
gbdt auc: 0.94366
gbdt+lr auc 1: 0.32087
gbdt+lr auc 2: 0.64378
GBDT time taken: 105.85
SVC auc: 0.35119
SVC auc2: 0.63692
KNN auc : 0.42370
KNN auc2: 0.60279
SGD auc : 0.32598
SGD auc2 : 0.33507
other classfier time taken: 97.21
XGB auc : 0.86043
XGB time taken: 0.77


In [125]:
gbdt_lr_train_test(example) # random state applied trial 3

best parameters set found: 
{'max_depth': 6, 'max_features': 0.7, 'n_estimators': 50}
gbdt auc: 0.94512
gbdt+lr auc 1: 0.34193
gbdt+lr auc 2: 0.64895
GBDT time taken: 105.30
SVC auc: 0.34762
SVC auc2: 0.64156
KNN auc : 0.44609
KNN auc2: 0.60032
SGD auc : 0.38596
SGD auc2 : 0.40221
other classfier time taken: 82.48
XGB auc : 0.86043
XGB time taken: 0.75


In [127]:
gbdt_lr_train_test(example) # more n_estimators from n_estimators':[30, 40, 50, 60]

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 90}
gbdt auc: 0.94659
gbdt+lr auc 1: 0.51817
gbdt+lr auc 2: 0.60234
GBDT time taken: 284.58
SVC auc: 0.68544
SVC auc2: 0.63380
KNN auc : 0.70583
KNN auc2: 0.60326
SGD auc : 0.56060
SGD auc2 : 0.45154
other classfier time taken: 146.06
XGB auc : 0.86043
XGB time taken: 0.76


In [129]:
gbdt_lr_train_test(example) # more n_estimators trial 2

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 90}
gbdt auc: 0.94781
gbdt+lr auc 1: 0.31465
gbdt+lr auc 2: 0.64597
GBDT time taken: 150.51
SVC auc: 0.62778
SVC auc2: 0.63317
KNN auc : 0.47201
KNN auc2: 0.60271
SGD auc : 0.32002
SGD auc2 : 0.34882
other classfier time taken: 147.91
XGB auc : 0.86043
XGB time taken: 0.77


In [130]:
gbdt_lr_train_test(example) #changing gridsaerch [{'n_estimators':[90], 'max_depth':[3, 4, 5, 6], 'max_features':[0.5,0.7,0.9]}]

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 100}
gbdt auc: 0.94683
gbdt+lr auc 1: 0.61263
gbdt+lr auc 2: 0.64518
GBDT time taken: 149.22
SVC auc: 0.48760
SVC auc2: 0.63311
KNN auc : 0.53694
KNN auc2: 0.60320
SGD auc : 0.57953
SGD auc2 : 0.55953
other classfier time taken: 162.01
XGB auc : 0.86043
XGB time taken: 0.75


In [132]:
gbdt_lr_train_test(example) #changing gridsaerch [{'n_estimators':[90], 'max_depth':[3, 4, 5, 6], 'max_features':[0.5,0.7,0.9]}]

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 100}
gbdt auc: 0.94791
gbdt+lr auc 1: 0.40520
gbdt+lr auc 2: 0.60060
GBDT time taken: 35.26
SVC auc: 0.51643
SVC auc2: 0.63318
KNN auc : 0.58031
KNN auc2: 0.60287
SGD auc : 0.50315
SGD auc2 : 0.46836
other classfier time taken: 163.81
XGB auc : 0.86043
XGB time taken: 0.78


In [136]:
gbdt_lr_train_test(example)  # GBM random state testing

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 100}
gbdt auc: 0.94922
gbdt+lr auc 1: 0.65693
gbdt+lr auc 2: 0.68943
GBDT time taken: 36.36
SVC auc: 0.83163
SVC auc2: 0.63333
KNN auc : 0.77722
KNN auc2: 0.60285
SGD auc : 0.59360
SGD auc2 : 0.72324
other classfier time taken: 165.64
XGB auc : 0.86043
XGB time taken: 0.83


In [137]:
gbdt_lr_train_test(example)  # GBM random state testing2

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 100}
gbdt auc: 0.94922
gbdt+lr auc 1: 0.65693
gbdt+lr auc 2: 0.68943
GBDT time taken: 36.92
SVC auc: 0.83163
SVC auc2: 0.63333
KNN auc : 0.77722
KNN auc2: 0.60285
SGD auc : 0.68002
SGD auc2 : 0.54093
other classfier time taken: 162.25
XGB auc : 0.86043
XGB time taken: 0.75


In [138]:
gbdt_lr_train_test(example)  # GBM random state testing3

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 100}
gbdt auc: 0.94922
gbdt+lr auc 1: 0.65693
gbdt+lr auc 2: 0.68943
GBDT time taken: 35.76
SVC auc: 0.83163
SVC auc2: 0.63295
KNN auc : 0.77722
KNN auc2: 0.60285
SGD auc : 0.59181
SGD auc2 : 0.57397
other classfier time taken: 168.57
XGB auc : 0.86043
XGB time taken: 0.79


In [140]:
gbdt_lr_train_test(example) # more n_estimators

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 100}
gbdt auc: 0.94922
gbdt+lr auc 1: 0.65693
gbdt+lr auc 2: 0.68943
GBDT time taken: 57.79
SVC auc: 0.83163
SVC auc2: 0.63333
KNN auc : 0.77722
KNN auc2: 0.60285
SGD auc : 0.55097
SGD auc2 : 0.68116
other classfier time taken: 165.75
XGB auc : 0.86043
XGB time taken: 0.78


In [33]:
gbdt_lr_train_test(example) 

best parameters set found: 
{'max_depth': 7, 'max_features': 0.5, 'n_estimators': 50}
GBDT AUC: 0.93387
GBDT + LR AUC 1: 0.57547
GBDT + LR AUC 2: 0.53239
GBDT time taken: 47.67
GBDT + SVC auc: 0.56686
GBDT + SVC auc2: 0.59621
GBDT + KNN auc : 0.51987
GBDT + KNN auc2: 0.57949
GBDT + SGD auc : 0.56533
GBDT + SGD auc2 : 0.50045
GBDT + other classfier time taken: 85.76
XGB auc : 0.92199
GBDT + XGB auc: 0.49926
GBDT + XGB auc2: 0.73417
XGB time taken: 4.27
lightGBM auc : 0.91580
GBDT + lightGBM auc : 0.45931
GBDT + lightGBM auc2 : 0.80382
lightGBM time taken: 0.81


In [34]:
gbdt_lr_train_test(example) 

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 50}
GBDT AUC: 0.92810
GBDT + LR AUC 1: 0.33413
GBDT + LR AUC 2: 0.58705
GBDT time taken: 43.65
GBDT + SVC auc: 0.45791
GBDT + SVC auc2: 0.58047
GBDT + KNN auc : 0.58497
GBDT + KNN auc2: 0.55411
GBDT + SGD auc : 0.28083
GBDT + SGD auc2 : 0.50180
GBDT + other classfier time taken: 83.33
XGB auc : 0.92070
GBDT + XGB auc: 0.54235
GBDT + XGB auc2: 0.75178
XGB time taken: 3.83
lightGBM auc : 0.91952
GBDT + lightGBM auc : 0.65808
GBDT + lightGBM auc2 : 0.67432
lightGBM time taken: 0.72


In [35]:
gbdt_lr_train_test(example) 

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 50}
GBDT AUC: 0.93956
GBDT + LR AUC 1: 0.63306
GBDT + LR AUC 2: 0.59498
GBDT time taken: 43.24
GBDT + SVC auc: 0.48879
GBDT + SVC auc2: 0.60267
GBDT + KNN auc : 0.59776
GBDT + KNN auc2: 0.59349
GBDT + SGD auc : 0.69463
GBDT + SGD auc2 : 0.49184
GBDT + other classfier time taken: 86.20
XGB auc : 0.93319
GBDT + XGB auc: 0.46712
GBDT + XGB auc2: 0.64323
XGB time taken: 4.10
lightGBM auc : 0.93369
GBDT + lightGBM auc : 0.49726
GBDT + lightGBM auc2 : 0.70421
lightGBM time taken: 0.79


In [36]:
gbdt_lr_train_test(example) 

best parameters set found: 
{'max_depth': 6, 'max_features': 0.5, 'n_estimators': 50}
GBDT AUC: 0.94437
GBDT + LR AUC 1: 0.58962
GBDT + LR AUC 2: 0.63072
GBDT time taken: 45.69
GBDT + SVC auc: 0.56931
GBDT + SVC auc2: 0.58028
GBDT + KNN auc : 0.49735
GBDT + KNN auc2: 0.55879
GBDT + SGD auc : 0.53380
GBDT + SGD auc2 : 0.50548
GBDT + other classfier time taken: 85.08
XGB auc : 0.93797
GBDT + XGB auc: 0.39039
GBDT + XGB auc2: 0.51196
XGB time taken: 3.86
lightGBM auc : 0.93462
GBDT + lightGBM auc : 0.54676
GBDT + lightGBM auc2 : 0.67607
lightGBM time taken: 0.70


In [40]:
gbdt_lr_train_test(example)  # changed apply func

best parameters set found: 
{'max_depth': 8, 'max_features': 0.5, 'n_estimators': 40}
GBDT AUC: 0.94238
GBDT + LR AUC 1: 0.86745
GBDT + LR AUC 2: 0.66643
GBDT time taken: 46.74
GBDT + SVC auc: 0.85564
GBDT + SVC auc2: 0.59346
GBDT + KNN auc : 0.84818
GBDT + KNN auc2: 0.58199
GBDT + SGD auc : 0.91196
GBDT + SGD auc2 : 0.50000
GBDT + other classfier time taken: 74.19
XGB auc : 0.93970
GBDT + XGB auc: 0.85874
GBDT + XGB auc2: 0.86823
XGB time taken: 4.17
lightGBM auc : 0.93742
GBDT + lightGBM auc : 0.92733
GBDT + lightGBM auc2 : 0.93174
lightGBM time taken: 1.17


In [41]:
gbdt_lr_train_test(example)  # changed apply func 2

best parameters set found: 
{'max_depth': 7, 'max_features': 0.5, 'n_estimators': 40}
GBDT AUC: 0.93967
GBDT + LR AUC 1: 0.85610
GBDT + LR AUC 2: 0.66004
GBDT time taken: 45.69
GBDT + SVC auc: 0.85620
GBDT + SVC auc2: 0.60296
GBDT + KNN auc : 0.85299
GBDT + KNN auc2: 0.58810
GBDT + SGD auc : 0.91376
GBDT + SGD auc2 : 0.55573
GBDT + other classfier time taken: 68.78
XGB auc : 0.93433
GBDT + XGB auc: 0.86995
GBDT + XGB auc2: 0.86995
XGB time taken: 3.87
lightGBM auc : 0.93040
GBDT + lightGBM auc : 0.92682
GBDT + lightGBM auc2 : 0.92661
lightGBM time taken: 1.07


In [42]:
gbdt_lr_train_test(example)  # changed apply func 3

best parameters set found: 
{'max_depth': 7, 'max_features': 0.5, 'n_estimators': 50}
GBDT AUC: 0.93733
GBDT + LR AUC 1: 0.85488
GBDT + LR AUC 2: 0.60898
GBDT time taken: 46.91
GBDT + SVC auc: 0.83817
GBDT + SVC auc2: 0.59360
GBDT + KNN auc : 0.85140
GBDT + KNN auc2: 0.57684
GBDT + SGD auc : 0.90615
GBDT + SGD auc2 : 0.49595
GBDT + other classfier time taken: 85.28
XGB auc : 0.93180
GBDT + XGB auc: 0.85971
GBDT + XGB auc2: 0.85492
XGB time taken: 4.48
lightGBM auc : 0.92683
GBDT + lightGBM auc : 0.92725
GBDT + lightGBM auc2 : 0.92777
lightGBM time taken: 1.22
