# GBM
---

In [1]:
import pandas as pd
import numpy as np

%matplotlib inline

### data preprocessing
---

In [2]:
train = pd.read_csv('data/train.csv', sep='\t', index_col='Unnamed: 0')
test = pd.read_csv('data/test.csv', sep='\t', index_col='Unnamed: 0')

In [3]:
all_data = pd.concat([train, test])

In [4]:
drop_columns = ['9', '140', '164', '11',
                '5', '129', '130', '137', '138', '141', '149', '150', '178', '186', '188', '192', '193', '291', '301', '303', '305',
                '152', '160']
                # '191', '182', '185', '181', '172', '170', '157', '136', '135'] 

In [5]:
all_data = all_data.drop(drop_columns, axis=1)

In [6]:
len(all_data.columns)

323

In [7]:
train =  all_data[all_data['0'].isnull() == False]
test =  all_data[all_data['0'].isnull()]

test = test.drop(['0'], axis=1)

In [8]:
target = train['0']
train = train.drop('0', axis=1)

---

In [9]:
from sklearn.model_selection import StratifiedKFold

In [10]:
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

## 3. XGBOOST
---

In [11]:
import xgboost as xgb

In [12]:
parameters = {
    #default
    'objective': 'reg:logistic',
    'eta': 0.01, # я использовал 0.1,а потом поменял(забыл поменять с самого начала)
    'silent': 1,
    "nthread": 4,
    "random_seed": 1,
    "eval_metric": 'auc',
    
    # regularization parameters
    'max_depth': 5,
    'subsample': 0.9,
    'colsample_bytree': 0.55,
    
    # lightgbm approach
#     'tree_method': 'hist'
    'grow_policy': 'lossguide'
}


xgb_train = xgb.DMatrix(train, target, feature_names=train.columns)

In [93]:
num_rounds = 10000
results = xgb.cv(parameters, xgb_train, num_rounds,
                 early_stopping_rounds=30, folds=skf, verbose_eval=1)

[0]	train-auc:0.689089+0.00582073	test-auc:0.672596+0.00664509
[1]	train-auc:0.706671+0.000682798	test-auc:0.686373+0.00438737
[2]	train-auc:0.71628+0.00233244	test-auc:0.690636+0.00748063
[3]	train-auc:0.721849+0.000441797	test-auc:0.694341+0.0061375
[4]	train-auc:0.724826+0.00241332	test-auc:0.697601+0.00771147
[5]	train-auc:0.730908+0.000800067	test-auc:0.701822+0.00601881
[6]	train-auc:0.735751+0.00106913	test-auc:0.704082+0.00491479
[7]	train-auc:0.739354+0.000797224	test-auc:0.706388+0.00495068
[8]	train-auc:0.741635+0.00126298	test-auc:0.707842+0.00462567
[9]	train-auc:0.744884+0.00233074	test-auc:0.708986+0.00510892
[10]	train-auc:0.747604+0.00207105	test-auc:0.70965+0.00497839
[11]	train-auc:0.749631+0.00224842	test-auc:0.710455+0.00536546
[12]	train-auc:0.752128+0.00220371	test-auc:0.711753+0.0057457
[13]	train-auc:0.753814+0.00168269	test-auc:0.712168+0.0058461
[14]	train-auc:0.755271+0.00197123	test-auc:0.712854+0.00543583
[15]	train-auc:0.757652+0.00208597	test-auc:0.71388

In [95]:
results.sort_values('test-auc-mean', ascending=False)['test-auc-mean'].head(1)

92    0.736324
Name: test-auc-mean, dtype: float64

P.S. забыл поменять learning rate но сабмитов мало осталось `-\_(:/)_/-`

In [100]:
bst = xgb.train(parameters, xgb_train, num_boost_round=130)

### submit
---

In [110]:
xgb_test = xgb.DMatrix(test, feature_names=train.columns)

In [115]:
test_pred_xgb = bst.predict(xgb_test)

In [116]:
submit = test.copy()
submit = submit.drop(test.columns, axis=1)
submit['_ID_'] = submit.index
submit['_VAL_'] = test_pred_xgb

In [114]:
submit.to_csv('submissions/xgboost.csv', index=False)

---

## 4. LightGBM
---

In [13]:
import lightgbm as lgb

In [14]:
parameters = {
    'objective': 'binary',
    'learning_rate': 0.01,
    'num_threads': 4,
    "metric": 'auc',
    
    # regularization
    'colsample_bytree': 0.7,
    'subsample': 0.9,
    'subsample_freq': 1,
    'min_data_in_leaf': 15
}


n_rounds = 10000

lgb_train = lgb.Dataset(train, label=target, free_raw_data=False)

In [192]:
result = lgb.cv(parameters, lgb_train, n_rounds, folds=skf.split(train, target), early_stopping_rounds=50, verbose_eval=1)

[1]	cv_agg's auc: 0.679081 + 0.00213977
[2]	cv_agg's auc: 0.686022 + 0.00218049
[3]	cv_agg's auc: 0.691803 + 0.001537
[4]	cv_agg's auc: 0.698325 + 0.000980682
[5]	cv_agg's auc: 0.700149 + 0.000708645
[6]	cv_agg's auc: 0.700548 + 0.000518869
[7]	cv_agg's auc: 0.701129 + 0.000327849
[8]	cv_agg's auc: 0.70296 + 0.00251308
[9]	cv_agg's auc: 0.70334 + 0.00277826
[10]	cv_agg's auc: 0.704164 + 0.00287596
[11]	cv_agg's auc: 0.704385 + 0.00277416
[12]	cv_agg's auc: 0.70449 + 0.00293895
[13]	cv_agg's auc: 0.705562 + 0.00333359
[14]	cv_agg's auc: 0.706516 + 0.00317658
[15]	cv_agg's auc: 0.706302 + 0.00255524
[16]	cv_agg's auc: 0.706811 + 0.00255725
[17]	cv_agg's auc: 0.708319 + 0.00219934
[18]	cv_agg's auc: 0.708491 + 0.00181563
[19]	cv_agg's auc: 0.709069 + 0.00224263
[20]	cv_agg's auc: 0.709443 + 0.0022233
[21]	cv_agg's auc: 0.70903 + 0.00262701
[22]	cv_agg's auc: 0.70906 + 0.00309024
[23]	cv_agg's auc: 0.709121 + 0.00317785
[24]	cv_agg's auc: 0.709403 + 0.00330731
[25]	cv_agg's auc: 0.709271 +

In [193]:
max(result['auc-mean'])

0.7431390912421514

In [195]:
bstl = lgb.train(parameters, lgb_train, num_boost_round=1000)

### submit
---

In [196]:
# lgb_test = lgb.Dataset(test)

In [199]:
test_pred_lgb = bstl.predict(test)

In [200]:
submit = test.copy()
submit = submit.drop(test.columns, axis=1)
submit['_ID_'] = submit.index
submit['_VAL_'] = test_pred_lgb

In [201]:
submit.to_csv('submissions/lightgbm.csv', index=False)

---

## 5. Catboost
---

In [16]:
import catboost as ctb

In [17]:
parameters = {
    'loss_function':'Logloss',
    'eval_metric': 'AUC',
    'iterations': 10000,
    'learning_rate': 0.03,
    'random_seed': 42,
    'use_best_model': False,
    'od_wait': 50,
    'od_type': 'Iter',
    
#     # reg
#     'rsm': 0.8,
#     'l2_leaf_reg': 2
    
}

In [18]:
ctb_data = ctb.Pool(train, target)

In [243]:
ctb_results = ctb.cv(ctb_data, parameters, stratified=True, seed=42, logging_level='Verbose')

In [246]:
best_ctb = ctb.train(ctb_data, parameters, num_boost_round=850)

0:	learn: 0.6039914	total: 217ms	remaining: 3m 4s
1:	learn: 0.6309516	total: 454ms	remaining: 3m 12s
2:	learn: 0.6324938	total: 654ms	remaining: 3m 4s
3:	learn: 0.6474709	total: 888ms	remaining: 3m 7s
4:	learn: 0.6516380	total: 1.16s	remaining: 3m 16s
5:	learn: 0.6608076	total: 1.4s	remaining: 3m 17s
6:	learn: 0.6635945	total: 1.66s	remaining: 3m 19s
7:	learn: 0.6684154	total: 1.87s	remaining: 3m 17s
8:	learn: 0.6702819	total: 2.13s	remaining: 3m 18s
9:	learn: 0.6705592	total: 2.32s	remaining: 3m 14s
10:	learn: 0.6749002	total: 2.54s	remaining: 3m 13s
11:	learn: 0.6790555	total: 2.79s	remaining: 3m 14s
12:	learn: 0.6835250	total: 2.97s	remaining: 3m 11s
13:	learn: 0.6840838	total: 3.24s	remaining: 3m 13s
14:	learn: 0.6850613	total: 3.49s	remaining: 3m 14s
15:	learn: 0.6876162	total: 3.69s	remaining: 3m 12s
16:	learn: 0.6873918	total: 3.97s	remaining: 3m 14s
17:	learn: 0.6878326	total: 4.26s	remaining: 3m 16s
18:	learn: 0.6886142	total: 4.5s	remaining: 3m 16s
19:	learn: 0.6911530	total:

### submit
---

In [251]:
test_pred_ctb = best_ctb.predict(test)

In [248]:
submit = test.copy()
submit = submit.drop(test.columns, axis=1)
submit['_ID_'] = submit.index
submit['_VAL_'] = test_pred_ctb

#### ???

In [249]:
submit.head()

Unnamed: 0,_ID_,_VAL_
0,0,-1.778764
1,1,0.13185
2,2,-1.339785
3,3,-0.696541
4,4,0.179437


In [254]:
submit.to_csv('submissions/catboost.csv', index=False)

---

## 6. Ensembles

In [19]:
from catboost import CatBoostClassifier

In [20]:
from xgboost import XGBClassifier

In [21]:
from lightgbm import LGBMClassifier

In [22]:
ensemble_train = train.copy()

In [23]:
ensemble_train = ensemble_train.drop(train.columns, axis=1)

In [24]:
ensemble_test = test.copy()

In [25]:
ensemble_test = ensemble_test.drop(test.columns, axis=1)

In [117]:
xgb_parameters = {
    #default
    'objective': 'reg:logistic',
    'eta': 0.01,
    'silent': 1,
    "nthread": 4,
    "random_seed": 1,
    "eval_metric": 'auc',
    
    # regularization parameters
    'max_depth': 5,
    'subsample': 0.9,
    'colsample_bytree': 0.55,
    
    # lightgbm approach
#     'tree_method': 'hist'
    'grow_policy': 'lossguide',
    'num_boost_rounds': 10000
}

xgb_n_rounds = 10000

xgb_train = xgb.DMatrix(train, target, feature_names=train.columns)

In [37]:
catboost_parameters = {
    'loss_function':'Logloss',
    'eval_metric': 'AUC',
    'iterations': 10000,
    'learning_rate': 0.03,
    'random_seed': 42,
    'use_best_model': False,
    'od_wait': 50,
    'od_type': 'Iter',
    
#     # reg
#     'rsm': 0.8,
#     'l2_leaf_reg': 2
    
}

In [116]:
lgbm_parameters = {
    'objective': 'binary',
    'learning_rate': 0.01,
    'num_threads': 4,
    "metric": 'auc',
    
    # regularization
    'colsample_bytree': 0.7,
    'subsample': 0.9,
    'subsample_freq': 1,
    'min_data_in_leaf': 15,
    'num_boost_rounds': 10000
}


lgbm_n_rounds = 10000

lgb_train = lgb.Dataset(train, label=target, free_raw_data=False)

In [74]:
xgb_clf = XGBClassifier(num_boost_round=10000, early_stopping_rounds=30, **xgb_parameters)

In [30]:
ctb_clf = CatBoostClassifier(**catboost_parameters)

In [75]:
lgbm_clf = LGBMClassifier(num_boost_round=10000, early_stopping_rounds=50, **lgbm_parameters)

In [79]:
i = 0

for tr, val in skf.split(train, target):
    print(i)
    
    X = train.iloc[tr]
    y = target[tr]
    X_val = train.iloc[val]
    y_val = target[val]
    
    lgbm_clf.fit(X, y, eval_set=[(X, y), (X_val, y_val)], eval_metric='auc')
    xgb_clf.fit(X, y, eval_set=[(X, y), (X_val, y_val)], eval_metric='auc')
    ctb_clf.fit(X, y, eval_set=(X_val, y_val))
    
    ensemble_train.loc[val, 'lgbm'] = lgbm_clf.predict_proba(X_val)[:,1]
    ensemble_train.loc[val, 'xgb'] = xgb_clf.predict_proba(X_val)[:,1]
    ensemble_train.loc[val, 'ctb'] = ctb_clf.predict_proba(X_val)[:,1]

    
    ensemble_test['lgbm_{}'.format(i)] = lgbm_clf.predict_proba(test)[:,1]
    ensemble_test['xgb_{}'.format(i)] = xgb_clf.predict_proba(test)[:,1]
    ensemble_test['ctb_{}'.format(i)] = ctb_clf.predict_proba(test)[:,1]
    
    i+=1

0




[1]	training's auc: 0.697378	valid_1's auc: 0.677118
Training until validation scores don't improve for 50 rounds.
[2]	training's auc: 0.714185	valid_1's auc: 0.690637
[3]	training's auc: 0.71859	valid_1's auc: 0.695599
[4]	training's auc: 0.723053	valid_1's auc: 0.696862
[5]	training's auc: 0.722906	valid_1's auc: 0.696923
[6]	training's auc: 0.724226	valid_1's auc: 0.697216
[7]	training's auc: 0.72653	valid_1's auc: 0.699937
[8]	training's auc: 0.729312	valid_1's auc: 0.7017
[9]	training's auc: 0.729913	valid_1's auc: 0.702651
[10]	training's auc: 0.731642	valid_1's auc: 0.702779
[11]	training's auc: 0.732316	valid_1's auc: 0.703739
[12]	training's auc: 0.732115	valid_1's auc: 0.703144
[13]	training's auc: 0.732711	valid_1's auc: 0.70385
[14]	training's auc: 0.732227	valid_1's auc: 0.7035
[15]	training's auc: 0.733148	valid_1's auc: 0.704477
[16]	training's auc: 0.734391	valid_1's auc: 0.705475
[17]	training's auc: 0.73425	valid_1's auc: 0.705784
[18]	training's auc: 0.733784	valid_1



[1]	training's auc: 0.696921	valid_1's auc: 0.672974
Training until validation scores don't improve for 50 rounds.
[2]	training's auc: 0.708754	valid_1's auc: 0.682938
[3]	training's auc: 0.716434	valid_1's auc: 0.690022
[4]	training's auc: 0.719302	valid_1's auc: 0.694201
[5]	training's auc: 0.718531	valid_1's auc: 0.694644
[6]	training's auc: 0.719718	valid_1's auc: 0.695233
[7]	training's auc: 0.725523	valid_1's auc: 0.697917
[8]	training's auc: 0.727963	valid_1's auc: 0.699203
[9]	training's auc: 0.730808	valid_1's auc: 0.701214
[10]	training's auc: 0.731943	valid_1's auc: 0.702171
[11]	training's auc: 0.731738	valid_1's auc: 0.70233
[12]	training's auc: 0.731517	valid_1's auc: 0.702474
[13]	training's auc: 0.731323	valid_1's auc: 0.702538
[14]	training's auc: 0.731723	valid_1's auc: 0.702734
[15]	training's auc: 0.73279	valid_1's auc: 0.702656
[16]	training's auc: 0.734674	valid_1's auc: 0.703448
[17]	training's auc: 0.73534	valid_1's auc: 0.705351
[18]	training's auc: 0.735092	va



[1]	training's auc: 0.700741	valid_1's auc: 0.686416
Training until validation scores don't improve for 50 rounds.
[2]	training's auc: 0.71333	valid_1's auc: 0.69341
[3]	training's auc: 0.719588	valid_1's auc: 0.697091
[4]	training's auc: 0.722421	valid_1's auc: 0.695635
[5]	training's auc: 0.723655	valid_1's auc: 0.698154
[6]	training's auc: 0.724666	valid_1's auc: 0.699366
[7]	training's auc: 0.726879	valid_1's auc: 0.705022
[8]	training's auc: 0.728497	valid_1's auc: 0.706729
[9]	training's auc: 0.730182	valid_1's auc: 0.708473
[10]	training's auc: 0.729455	valid_1's auc: 0.707756
[11]	training's auc: 0.731615	valid_1's auc: 0.70829
[12]	training's auc: 0.731913	valid_1's auc: 0.709036
[13]	training's auc: 0.734825	valid_1's auc: 0.711147
[14]	training's auc: 0.734614	valid_1's auc: 0.711194
[15]	training's auc: 0.735124	valid_1's auc: 0.711575
[16]	training's auc: 0.734611	valid_1's auc: 0.711094
[17]	training's auc: 0.734387	valid_1's auc: 0.710908
[18]	training's auc: 0.735306	va

---

In [92]:
from sklearn.metrics import roc_auc_score
from scipy.stats import rankdata
from scipy.optimize import minimize

In [93]:
def optimize_weights(preds, y, method = "Nelder-Mead"):
    def rocauc(x):
        weights  = np.array(np.exp(x)/np.sum(np.exp(x)))
        pred = weights.dot(preds)# sum([p*w for (p,w) in zip(preds,weights)])
        #log.debug("weights: {} auc: {}".format(weights, roc_auc_score(y, pred)))
        score = -roc_auc_score(y, pred)
        return score
    #res = minimize(rocauc, np.zeros(len(preds)), method= method)
    res = minimize(rocauc, np.ones(len(preds))/len(preds), method= method)
    w = np.exp(res.x)/np.sum(np.exp(res.x))
    return w

In [81]:
roc_auc_score(target, ensemble_train['lgbm'])

0.7423912148848191

In [82]:
roc_auc_score(target, ensemble_train['xgb'])

0.7360527546758419

In [83]:
roc_auc_score(target, ensemble_train['ctb'])

0.7413141707571659

In [86]:
roc_auc_score(target, (ensemble_train['lgbm']+ensemble_train['xgb']+ensemble_train['ctb'])/3)

0.7428648510513122

In [87]:
roc_auc_score(target, (rankdata(ensemble_train['lgbm'])+rankdata(ensemble_train['xgb'])+rankdata(ensemble_train['ctb'])))

0.7428275218063198

In [88]:
roc_auc_score(target, (ensemble_train['lgbm']*ensemble_train['xgb']*ensemble_train['ctb'])**(1/3))

0.7428941173558664

In [94]:
ww = optimize_weights(ensemble_train.T.values, target, method="Powell")
roc_auc_score(target, ensemble_train.values.dot(ww))

0.7437722976794078

### save
---

In [95]:
ensemble_test.to_csv('data/ensemble_boosting_test.csv', index=False)

In [97]:
ensemble_train.to_csv('data/ensemble_boosting_train.csv', index=False)

In [152]:
ensemble_train = pd.read_csv('data/ensemble_boosting_train.csv')
ensemble_test = pd.read_csv('data/ensemble_boosting_test.csv')

ensemble_test['lgbm'] = 0
ensemble_test['xgb'] =0
ensemble_test['ctb'] = 0
for x in range(3):
    ensemble_test['lgbm'] += ensemble_test['lgbm_{}'.format(x)]/3
    ensemble_test['xgb'] += ensemble_test['xgb_{}'.format(x)]/3
    ensemble_test['ctb'] += ensemble_test['ctb_{}'.format(x)]/3

et = ensemble_test[['lgbm', 'xgb', 'ctb']]

### submit
---

In [101]:
ensemble_test['lgbm'] = 0
ensemble_test['xgb'] =0
ensemble_test['ctb'] = 0

In [102]:
for x in range(3):
    ensemble_test['lgbm'] += ensemble_test['lgbm_{}'.format(x)]/3
    ensemble_test['xgb'] += ensemble_test['xgb_{}'.format(x)]/3
    ensemble_test['ctb'] += ensemble_test['ctb_{}'.format(x)]/3

In [104]:
et = ensemble_test[['lgbm', 'xgb', 'ctb']]

In [106]:
pred = et.values.dot(ww)

In [107]:
submit = test.copy()
submit = submit.drop(test.columns, axis=1)
submit['_ID_'] = submit.index
submit['_VAL_'] = pred

In [110]:
submit.to_csv('submissions/ensemble_boosting.csv', index=False)

In [153]:
pred1 = rankdata(et['lgbm']) + rankdata(et['xgb']) + rankdata(et['ctb'])

In [154]:
submit = test.copy()
submit = submit.drop(test.columns, axis=1)
submit['_ID_'] = submit.index
submit['_VAL_'] = pred1

In [155]:
submit.to_csv('submissions/ensemble_rankdata.csv', index=False)

## 7. Stacking
---

In [120]:
xgb_parameters = {
    #default
    'objective': 'reg:logistic',
    'eta': 0.01,
    'silent': 1,
    "nthread": 4,
    "random_seed": 1,
    "eval_metric": 'auc',
    
    # regularization parameters
    'max_depth': 5,
#     'subsample': 0.9,
#     'colsample_bytree': 0.55,
    
    # lightgbm approach
#     'tree_method': 'hist'
    'grow_policy': 'lossguide',
    'num_boost_rounds': 10000
}

In [118]:
catboost_parameters = {
    'loss_function':'Logloss',
    'eval_metric': 'AUC',
    'iterations': 10000,
    'learning_rate': 0.03,
    'random_seed': 42,
    'use_best_model': False,
    'od_wait': 50,
    'od_type': 'Iter',
    
#     # reg
#     'rsm': 0.8,
#     'l2_leaf_reg': 2
    
}

In [121]:
lgbm_parameters = {
    'objective': 'binary',
    'learning_rate': 0.01,
    'num_threads': 4,
    "metric": 'auc',
    
    # regularization
#     'colsample_bytree': 0.7,
#     'subsample': 0.9,
    'subsample_freq': 1,
    'min_data_in_leaf': 15,
    'num_boost_rounds': 10000
}

In [122]:
xgb_clf = XGBClassifier(num_boost_round=10000, early_stopping_rounds=30, **xgb_parameters)

In [123]:
ctb_clf = CatBoostClassifier(**catboost_parameters)

In [124]:
lgbm_clf = LGBMClassifier(num_boost_round=10000, early_stopping_rounds=50, **lgbm_parameters)

In [130]:
trains = []
tests = []

In [144]:
for x in range(3):
    i = 0
    
    test_df = et.copy()
    train_df = ensemble_train.copy()
    
    for tr, val in skf.split(ensemble_train, target):
        print(i)

        X = ensemble_train.iloc[tr]
        y = target[tr]
        X_val = ensemble_train.iloc[val]
        y_val = target[val]

        lgbm_clf.fit(X, y, eval_set=[(X, y), (X_val, y_val)], eval_metric='auc')
        xgb_clf.fit(X, y, eval_set=[(X, y), (X_val, y_val)], eval_metric='auc')
        ctb_clf.fit(X, y, eval_set=(X_val, y_val))

        train_df.loc[val, 'lgbm'] = lgbm_clf.predict_proba(X_val)[:,1]
        train_df.loc[val, 'xgb'] = xgb_clf.predict_proba(X_val)[:,1]
        train_df.loc[val, 'ctb'] = ctb_clf.predict_proba(X_val)[:,1]


        test_df['lgbm_{}'.format(i)] = lgbm_clf.predict_proba(et)[:,1]
        test_df['xgb_{}'.format(i)] = xgb_clf.predict_proba(et)[:,1]
        test_df['ctb_{}'.format(i)] = ctb_clf.predict_proba(et)[:,1]

        i+=1

    trains.append(train_df)
    tests.append(test_df)
    
    ensemble_train = train_df.copy()
    
    et['lgbm'] = 0
    et['xgb'] = 0
    et['ctb'] = 0
    for x in range(3):
        et['lgbm'] += test_df['lgbm_{}'.format(x)]/3
        et['xgb'] += test_df['xgb_{}'.format(x)]/3
        et['ctb'] += test_df['ctb_{}'.format(x)]/3
        
    et = et[['lgbm', 'xgb', 'ctb']]
    
    
    print(roc_auc_score(target, ensemble_train['lgbm']))
    print(roc_auc_score(target, ensemble_train['xgb']))
    print(roc_auc_score(target, ensemble_train['ctb']))
    print(roc_auc_score(target, (ensemble_train['lgbm']+ensemble_train['xgb']+ensemble_train['ctb'])/3))
    print(roc_auc_score(target, (rankdata(ensemble_train['lgbm'])+rankdata(ensemble_train['xgb'])+rankdata(ensemble_train['ctb']))))
    print(roc_auc_score(target, (ensemble_train['lgbm']*ensemble_train['xgb']*ensemble_train['ctb'])**(1/3)))
    
    ww = optimize_weights(ensemble_train.T.values, target, method="Powell")
    print(roc_auc_score(target, ensemble_train.values.dot(ww)))
    
    print('----')

0
[1]	training's auc: 0.748146	valid_1's auc: 0.734315
Training until validation scores don't improve for 50 rounds.
[2]	training's auc: 0.748678	valid_1's auc: 0.734918
[3]	training's auc: 0.748673	valid_1's auc: 0.734935
[4]	training's auc: 0.74868	valid_1's auc: 0.734935
[5]	training's auc: 0.74869	valid_1's auc: 0.734981
[6]	training's auc: 0.748817	valid_1's auc: 0.734834
[7]	training's auc: 0.748804	valid_1's auc: 0.734727
[8]	training's auc: 0.749173	valid_1's auc: 0.735038
[9]	training's auc: 0.749239	valid_1's auc: 0.734856
[10]	training's auc: 0.749181	valid_1's auc: 0.735034
[11]	training's auc: 0.749495	valid_1's auc: 0.735009
[12]	training's auc: 0.749487	valid_1's auc: 0.735025




[13]	training's auc: 0.74975	valid_1's auc: 0.735529
[14]	training's auc: 0.749935	valid_1's auc: 0.735456
[15]	training's auc: 0.74995	valid_1's auc: 0.735429
[16]	training's auc: 0.749909	valid_1's auc: 0.735994
[17]	training's auc: 0.75051	valid_1's auc: 0.73741
[18]	training's auc: 0.75052	valid_1's auc: 0.737397
[19]	training's auc: 0.750666	valid_1's auc: 0.737352
[20]	training's auc: 0.751213	valid_1's auc: 0.737697
[21]	training's auc: 0.751444	valid_1's auc: 0.737613
[22]	training's auc: 0.751427	valid_1's auc: 0.737593
[23]	training's auc: 0.751831	valid_1's auc: 0.737315
[24]	training's auc: 0.751958	valid_1's auc: 0.73732
[25]	training's auc: 0.752288	valid_1's auc: 0.737433
[26]	training's auc: 0.752583	valid_1's auc: 0.737567
[27]	training's auc: 0.752669	valid_1's auc: 0.737579
[28]	training's auc: 0.752648	valid_1's auc: 0.737454
[29]	training's auc: 0.752692	valid_1's auc: 0.737406
[30]	training's auc: 0.752919	valid_1's auc: 0.737382
[31]	training's auc: 0.75295	valid



[39]	training's auc: 0.752936	valid_1's auc: 0.735733
[40]	training's auc: 0.753027	valid_1's auc: 0.735683
[41]	training's auc: 0.753087	valid_1's auc: 0.735702
[42]	training's auc: 0.75319	valid_1's auc: 0.735696
[43]	training's auc: 0.753363	valid_1's auc: 0.735677
[44]	training's auc: 0.753456	valid_1's auc: 0.735629
[45]	training's auc: 0.753498	valid_1's auc: 0.735729
[46]	training's auc: 0.753544	valid_1's auc: 0.735674
[47]	training's auc: 0.753438	valid_1's auc: 0.735603
[48]	training's auc: 0.753579	valid_1's auc: 0.735695
[49]	training's auc: 0.753641	valid_1's auc: 0.735681
[50]	training's auc: 0.753676	valid_1's auc: 0.735777
[51]	training's auc: 0.753682	valid_1's auc: 0.735794
[52]	training's auc: 0.753719	valid_1's auc: 0.73578
[53]	training's auc: 0.753741	valid_1's auc: 0.735718
[54]	training's auc: 0.753761	valid_1's auc: 0.735713
[55]	training's auc: 0.753794	valid_1's auc: 0.735742
[56]	training's auc: 0.754971	valid_1's auc: 0.737055
[57]	training's auc: 0.754972	




[51]	training's auc: 0.7514	valid_1's auc: 0.744218
[52]	training's auc: 0.751511	valid_1's auc: 0.744056
[53]	training's auc: 0.751552	valid_1's auc: 0.744019
[54]	training's auc: 0.751559	valid_1's auc: 0.744057
[55]	training's auc: 0.751613	valid_1's auc: 0.744049
[56]	training's auc: 0.751567	valid_1's auc: 0.744128
[57]	training's auc: 0.751652	valid_1's auc: 0.744052
[58]	training's auc: 0.751732	valid_1's auc: 0.744096
[59]	training's auc: 0.751879	valid_1's auc: 0.744062
[60]	training's auc: 0.751833	valid_1's auc: 0.74408
[61]	training's auc: 0.751918	valid_1's auc: 0.744109
[62]	training's auc: 0.751968	valid_1's auc: 0.744149
[63]	training's auc: 0.751972	valid_1's auc: 0.744128
[64]	training's auc: 0.751927	valid_1's auc: 0.744042
[65]	training's auc: 0.751974	valid_1's auc: 0.744025
[66]	training's auc: 0.752006	valid_1's auc: 0.744042
[67]	training's auc: 0.752042	valid_1's auc: 0.744162
[68]	training's auc: 0.75196	valid_1's auc: 0.744281
[69]	training's auc: 0.751998	v

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

0.7084190503318009
0.7382711111476817
0.74268629718016
0.7349542277570441
0.7374922891927678
0.7395345441332968
0.7426872825281022
----
0
[1]	training's auc: 0.746397	valid_1's auc: 0.612032
Training until validation scores don't improve for 50 rounds.
[2]	training's auc: 0.746969	valid_1's auc: 0.616402
[3]	training's auc: 0.746978	valid_1's auc: 0.615677
[4]	training's auc: 0.747007	valid_1's auc: 0.616426
[5]	training's auc: 0.747061	valid_1's auc: 0.617223
[6]	training's auc: 0.747368	valid_1's auc: 0.619409
[7]	training's auc: 0.747535	valid_1's auc: 0.625564
[8]	training's auc: 0.747777	valid_1's auc: 0.625358
[9]	training's auc: 0.747819	valid_1's auc: 0.627597
[10]	training's auc: 0.747845	valid_1's auc: 0.625995
[11]	training's auc: 0.747949	valid_1's auc: 0.627603
[12]	training's auc: 0.747932	valid_1's auc: 0.626702
[13]	training's auc: 0.747858	valid_1's auc: 0.623932
[14]	training's auc: 0.747828	valid_1's auc: 0.640775
[15]	training's auc: 0.74799	valid_1's auc: 0.637964




[25]	training's auc: 0.749074	valid_1's auc: 0.681049
[26]	training's auc: 0.749191	valid_1's auc: 0.683383
[27]	training's auc: 0.749617	valid_1's auc: 0.685204
[28]	training's auc: 0.749685	valid_1's auc: 0.690995
[29]	training's auc: 0.749634	valid_1's auc: 0.691554
[30]	training's auc: 0.749617	valid_1's auc: 0.691631
[31]	training's auc: 0.749707	valid_1's auc: 0.699613
[32]	training's auc: 0.749779	valid_1's auc: 0.703517
[33]	training's auc: 0.750164	valid_1's auc: 0.704352
[34]	training's auc: 0.750324	valid_1's auc: 0.704879
[35]	training's auc: 0.750281	valid_1's auc: 0.704851
[36]	training's auc: 0.750414	valid_1's auc: 0.707309
[37]	training's auc: 0.750972	valid_1's auc: 0.707931
[38]	training's auc: 0.751109	valid_1's auc: 0.709022
[39]	training's auc: 0.751141	valid_1's auc: 0.708646
[40]	training's auc: 0.75113	valid_1's auc: 0.70973
[41]	training's auc: 0.751128	valid_1's auc: 0.71088
[42]	training's auc: 0.75124	valid_1's auc: 0.711316
[43]	training's auc: 0.751237	va



[54]	training's auc: 0.752613	valid_1's auc: 0.735199
[55]	training's auc: 0.752636	valid_1's auc: 0.735169
[56]	training's auc: 0.752666	valid_1's auc: 0.735174
[57]	training's auc: 0.752675	valid_1's auc: 0.735217
[58]	training's auc: 0.752651	valid_1's auc: 0.735258
[59]	training's auc: 0.75269	valid_1's auc: 0.735266
[60]	training's auc: 0.752707	valid_1's auc: 0.735272
[61]	training's auc: 0.752718	valid_1's auc: 0.735307
[62]	training's auc: 0.752798	valid_1's auc: 0.735276
[63]	training's auc: 0.752896	valid_1's auc: 0.735316
[64]	training's auc: 0.752965	valid_1's auc: 0.735333
[65]	training's auc: 0.753287	valid_1's auc: 0.735607
[66]	training's auc: 0.753304	valid_1's auc: 0.735466
[67]	training's auc: 0.753271	valid_1's auc: 0.73552
[68]	training's auc: 0.753263	valid_1's auc: 0.735527
[69]	training's auc: 0.75334	valid_1's auc: 0.73554
[70]	training's auc: 0.753346	valid_1's auc: 0.735538
[71]	training's auc: 0.753375	valid_1's auc: 0.735544
[72]	training's auc: 0.753387	va



[64]	training's auc: 0.750479	valid_1's auc: 0.743758
[65]	training's auc: 0.75047	valid_1's auc: 0.74374
[66]	training's auc: 0.750489	valid_1's auc: 0.743745
[67]	training's auc: 0.7505	valid_1's auc: 0.743681
[68]	training's auc: 0.750547	valid_1's auc: 0.743714
[69]	training's auc: 0.750597	valid_1's auc: 0.743748
[70]	training's auc: 0.750641	valid_1's auc: 0.743737
[71]	training's auc: 0.750694	valid_1's auc: 0.743715
[72]	training's auc: 0.750705	valid_1's auc: 0.743683
[73]	training's auc: 0.750771	valid_1's auc: 0.743657
[74]	training's auc: 0.750801	valid_1's auc: 0.743631
[75]	training's auc: 0.750819	valid_1's auc: 0.743649
[76]	training's auc: 0.750886	valid_1's auc: 0.743615
[77]	training's auc: 0.750919	valid_1's auc: 0.743597
[78]	training's auc: 0.751025	valid_1's auc: 0.743614
[79]	training's auc: 0.751045	valid_1's auc: 0.743548
[80]	training's auc: 0.751092	valid_1's auc: 0.743606
[81]	training's auc: 0.75112	valid_1's auc: 0.743592
[82]	training's auc: 0.751158	val



[31]	training's auc: 0.749224	valid_1's auc: 0.667033
[32]	training's auc: 0.749243	valid_1's auc: 0.667171
[33]	training's auc: 0.749285	valid_1's auc: 0.667093
[34]	training's auc: 0.749557	valid_1's auc: 0.666259
[35]	training's auc: 0.74964	valid_1's auc: 0.672662
[36]	training's auc: 0.749394	valid_1's auc: 0.673604
[37]	training's auc: 0.749409	valid_1's auc: 0.674368
[38]	training's auc: 0.749364	valid_1's auc: 0.675975
[39]	training's auc: 0.74966	valid_1's auc: 0.6772
[40]	training's auc: 0.749719	valid_1's auc: 0.677365
[41]	training's auc: 0.749714	valid_1's auc: 0.679552
[42]	training's auc: 0.749973	valid_1's auc: 0.677622
[43]	training's auc: 0.750028	valid_1's auc: 0.681667
[44]	training's auc: 0.750037	valid_1's auc: 0.686621
[45]	training's auc: 0.750084	valid_1's auc: 0.68686
[46]	training's auc: 0.750159	valid_1's auc: 0.687074
[47]	training's auc: 0.750159	valid_1's auc: 0.689305
[48]	training's auc: 0.750191	valid_1's auc: 0.689494
[49]	training's auc: 0.750205	val



[18]	training's auc: 0.747856	valid_1's auc: 0.729898
[19]	training's auc: 0.747877	valid_1's auc: 0.729923
[20]	training's auc: 0.747876	valid_1's auc: 0.729907
[21]	training's auc: 0.747865	valid_1's auc: 0.73084
[22]	training's auc: 0.747782	valid_1's auc: 0.730931
[23]	training's auc: 0.747823	valid_1's auc: 0.730965
[24]	training's auc: 0.747728	valid_1's auc: 0.731029
[25]	training's auc: 0.748281	valid_1's auc: 0.731139
[26]	training's auc: 0.748352	valid_1's auc: 0.7312
[27]	training's auc: 0.748386	valid_1's auc: 0.731248
[28]	training's auc: 0.748447	valid_1's auc: 0.73127
[29]	training's auc: 0.748461	valid_1's auc: 0.731316
[30]	training's auc: 0.748462	valid_1's auc: 0.731316
[31]	training's auc: 0.748588	valid_1's auc: 0.731249
[32]	training's auc: 0.748618	valid_1's auc: 0.731248
[33]	training's auc: 0.748715	valid_1's auc: 0.731231
[34]	training's auc: 0.748716	valid_1's auc: 0.731146
[35]	training's auc: 0.74869	valid_1's auc: 0.731247
[36]	training's auc: 0.74873	vali



[62]	training's auc: 0.750016	valid_1's auc: 0.736016
[63]	training's auc: 0.749997	valid_1's auc: 0.736036
[64]	training's auc: 0.750007	valid_1's auc: 0.736049
[65]	training's auc: 0.749987	valid_1's auc: 0.736014
[66]	training's auc: 0.75012	valid_1's auc: 0.736006
[67]	training's auc: 0.750137	valid_1's auc: 0.736095
[68]	training's auc: 0.750203	valid_1's auc: 0.736077
[69]	training's auc: 0.75018	valid_1's auc: 0.736008
[70]	training's auc: 0.750177	valid_1's auc: 0.736085
[71]	training's auc: 0.750267	valid_1's auc: 0.736133
[72]	training's auc: 0.750303	valid_1's auc: 0.736187
[73]	training's auc: 0.750359	valid_1's auc: 0.736182
[74]	training's auc: 0.75047	valid_1's auc: 0.736457
[75]	training's auc: 0.750562	valid_1's auc: 0.736474
[76]	training's auc: 0.750492	valid_1's auc: 0.736459
[77]	training's auc: 0.750512	valid_1's auc: 0.736479
[78]	training's auc: 0.750672	valid_1's auc: 0.736418
[79]	training's auc: 0.750731	valid_1's auc: 0.736465
[80]	training's auc: 0.751747	v

In [147]:
# 0.7084190503318009
# 0.7382711111476817
# 0.74268629718016
# 0.7349542277570441
# 0.7374922891927678
# 0.7395345441332968
# 0.7426872825281022
# ----

# 0.7249111423888001
# 0.7072078224037694
# 0.724188003622639
# 0.7232958629776917
# 0.7297328065074966
# 0.7219977773638826
# 0.73539175901013
# ----

# 0.6472419805931899
# 0.7098353740220119
# 0.7123062847308034
# 0.723701649849682
# 0.7239245002581796
# 0.7242684455167775
# 0.7277345760306274
# ----

In [149]:
pred2 = rankdata(tests[0]['lgbm']) + rankdata(tests[0]['xgb']) + rankdata(tests[0]['ctb'])

In [150]:
submit = test.copy()
submit = submit.drop(test.columns, axis=1)
submit['_ID_'] = submit.index
submit['_VAL_'] = pred2

In [151]:
submit.to_csv('submissions/stacking2_rankdata.csv', index=False)

In [113]:
# stacking + scores(ensembles)