In [96]:
# Render our plots inline
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd

matplotlib.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 5)

train = pd.read_csv('/Users/Kseniya/study/4_term/ml/train.csv')
print train.shape
test= pd.read_csv('/Users/Kseniya/study/4_term/ml/test.csv')
print test.shape

(76020, 371)
(75818, 370)


In [97]:
def duplicate_columns(frame):
    groups = frame.columns.to_series().groupby(frame.dtypes).groups
    dups = []
    for t, v in groups.items():
        dcols = frame[v].to_dict(orient="list")

        vs = dcols.values()
        ks = dcols.keys()
        lvs = len(vs)

        for i in range(lvs):
            for j in range(i+1,lvs):
                if vs[i] == vs[j]: 
                    dups.append(ks[i])
                    break

    return dups   


def data_preprocess(data):
    # Drop constants columns
    data = data.loc[:, data.apply(pd.Series.nunique) != 1]
    
    # Drop columns duplicate
    dups = duplicate_columns(data)
    data = data.drop(dups, axis=1)
    
    # Drop row duplicates 
    data.drop_duplicates(keep='first', inplace = True)
    
    data = pd.get_dummies(data)
    
    # Findout NaN values (no such values)
    # train_data.isnull().sum().sum()
    #empty_arr = pd.isnull(data).any(1).nonzero()[0]
    #print empty_arr
    return data

def leave_same_columns(data, test):
    modified_test = pd.DataFrame()
    for col in data:
        if col in test.columns:
            modified_test[col] = test[col]
    return modified_test

print 'Before processing: '
print train.shape
print test.shape

train = data_preprocess(train)

# Drop Target column 
target = train['TARGET']
train.drop('TARGET', axis=1, inplace=True)
test = leave_same_columns(train, test)

print 'After processing: '
print train.shape
print test.shape

train_orig = train.copy()
test_orig = test.copy()

Before processing: 
(76020, 371)
(75818, 370)
After processing: 
(76020, 307)
(75818, 307)


In [98]:
%%time
import numpy as np
# categories encoding
def dummy_columns(data, category):
    for c in category:
        dummies = pd.get_dummies(data[c], prefix="{}_xxx_".format(c))
        data = pd.concat([data, dummies], axis=1)
    print data.shape
    data.drop(category, axis=1, inplace=True)
    return data

def categories_encode(data, test, n):
    # examine if there are categorical columns
    res = data.dtypes == 'object'
    print 'Train data types cnt : '
    print res.value_counts()
    res = test.dtypes == 'object'
    print 'Test data types cnt : '
    print res.value_counts()
    
    # no str data types found
    # find value sets of N elements
    category = []
    for col in data.loc[:, data.dtypes == np.int64]:
        if (data[col].value_counts().count() <= n and test[col].value_counts().count() <= n):
            category.append(col)
            
    if len(category) == 0:
        print "No category columns for n = {}".format(n)
        return None
    
    return category




CPU times: user 62 µs, sys: 248 µs, total: 310 µs
Wall time: 181 µs


In [61]:
train = train_orig.copy()
test = test_orig.copy()

In [99]:
# categories encoding
category_train = None
category_test = None

category = categories_encode(train, test, 10)
if category is not None:
    for col in category:
        dummies = pd.get_dummies(train[col], prefix="{}_xxx_".format(col))
        category_train = pd.concat([category_train, dummies], axis=1)
        
        dummies = pd.get_dummies(test[col], prefix="{}_xxx_".format(col))
        category_test = pd.concat([category_test, dummies], axis=1)
        
        train.drop(col, axis=1, inplace=True)
        test.drop(col, axis=1, inplace=True)
        
    #train = dummy_columns(train, category)
    #test = dummy_columns(test, category)

print 'After categorisation:\n Data:\n'
print train.shape
print 'Test:\n'
print test.shape
print 'Category train:\n'
print category_train.shape
print 'Category test:\n'
print category_test.shape

Train data types cnt : 
False    307
dtype: int64
Test data types cnt : 
False    307
dtype: int64
After categorisation:
 Data:

(76020, 157)
Test:

(75818, 157)
Category train:

(76020, 516)
Category test:

(75818, 503)


In [100]:
def modify_test_colomns(data, test_from):
    test = pd.DataFrame()
    index = -1
    for col in data.columns:
        index += 1
        if col not in test_from.columns:
            test[col] = pd.Series([0 for x in range(test_from.shape[0])])
        else:
            test[col] = test_from[col]
            
    #print test.shape
    
    for col in data.columns:
        if data.columns.get_loc(col) != test.columns.get_loc(col):
            print "index missmatch " + col
    #print test.shape
    return test

category_test = modify_test_colomns(category_train, category_test)

print category_train.shape
print category_test.shape


(76020, 516)
(75818, 516)


In [101]:
train_orig_1 = train.copy()
test_orig_1 = test.copy()

In [102]:
# feature selection
from sklearn.feature_selection import SelectPercentile, f_classif
# Removing features with low variance
def get_best_features(X, y, perc):
    selectK = SelectPercentile(f_classif, perc)
    selectK.fit(X[:y.shape[0]], y)
    return pd.DataFrame(selectK.transform(X))

train = train_orig_1.copy()
test = test_orig_1.copy()

print 'Before selection best features:\n Data:\n'
print train.shape
print 'Test:\n'
print test.shape

train = get_best_features(train, target, 90)
test = modify_test_colomns(train, test)

print 'After selection best features:\n Data:\n'
print train.shape
print 'Test:\n'
print test.shape

Before selection best features:
 Data:

(76020, 157)
Test:

(75818, 157)
After selection best features:
 Data:

(76020, 141)
Test:

(75818, 141)


In [68]:
train_orig_2 = train.copy()
test_orig_2 = test.copy()

In [70]:
# estimate linear models for category features only
from sklearn import cross_validation
from sklearn.linear_model import BayesianRidge, LinearRegression,LogisticRegression
classes = [
    {'type': "liner regression", 'classifier': LinearRegression, 'params': {
                                                                    'copy_X': True,
                                                                    'normalize': True,
                                                                    'n_jobs': 4}},
    {'type': "logistic regression", 'classifier': LogisticRegression, 'params': {
                                                                        'solver': 'sag',
                                                                        'multi_class': 'ovr',
                                                                        'n_jobs': 4}},
   {'type': "bayes ridge", 'classifier': BayesianRidge, 'params': {}},
    # {'type': "", 'classifier': , 'params': {}},
]

for c in classes:
    params = dict(c.get('params', {}))
    classifier = c['classifier']
    classifier = classifier(**params)
    res = cross_validation.cross_val_score(classifier, category_train, target, n_jobs=4, scoring='roc_auc').mean()
    print "{}: {}\n".format(c['type'], res)
    


liner regression: 0.708772132648





logistic regression: 0.728865553806

bayes ridge: 0.726836607321



In [87]:
# find parametrs for ligistic regression on category train
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

log_train = int(0.7 * category_train.shape[0])

dtrain = category_train[:int(log_train)]
dtarget = target[:int(log_train)]

dtest = category_train[log_train:]
dtest_target = target[log_train:]

def objective(params):
    log_clf = LogisticRegression(dual = False, penalty='l2', random_state=13, multi_class='ovr', verbose=1, 
                                n_jobs=4, solver='sag', C=params['C'], tol=params['tol'])


    log_clf.fit(dtrain, dtarget)
    dpred = log_clf.predict_proba(dtest)[:,1]
    
    auc = roc_auc_score(dtest_target, dpred)
    print "auc {} params {}".format(auc, params)
    return {'loss':1-auc, 'status': STATUS_OK }
    
params = {
    'C': hp.uniform('C', 0.2, 5),
    'tol': hp.uniform('tol', 0, 100),
}    
iteration_number = 100

trials = Trials()
best_res = fmin(fn=objective,
                space=params,
                algo=tpe.suggest,
                max_evals=iteration_number,
                trials=trials)
print "BEST "
print best_res




convergence after 1 epochs took 1 seconds
auc 0.727091568292 params {'C': 1.5734688077618213, 'tol': 69.64691855978616}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727020480314 params {'C': 3.7782631274509426, 'tol': 10.606490595473272}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727010953803 params {'C': 0.4761473368068307, 'tol': 50.672601149956996}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029477574 params {'C': 0.8150915457632018, 'tol': 10.517212721767521}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.726319439783 params {'C': 0.39182227591987606, 'tol': 52.383331995467586}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727065105762 params {'C': 1.4630952780675868, 'tol': 86.66370428503978}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.72706878646 params {'C': 1.4876469239067243, 'tol': 57.05316251454068}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727096259377 params {'C': 1.695412933412432, 'tol': 14.16302690767407}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029621915 params {'C': 4.750923996033295, 'tol': 65.01536054717491}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727127340821 params {'C': 2.0294938007328396, 'tol': 78.09038987924662}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727040423439 params {'C': 0.8223721245668998, 'tol': 41.95086163487056}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727154789682 params {'C': 2.349268375266297, 'tol': 81.14036263085849}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727109129789 params {'C': 1.772934561443143, 'tol': 66.13289424499646}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727075474263 params {'C': 0.911205576627975, 'tol': 15.321593689979663}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.726917733528 params {'C': 0.6250113351847272, 'tol': 94.38502081196458}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727017641606 params {'C': 3.5851045352667894, 'tol': 74.93422613186887}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727021779384 params {'C': 3.8155752358449875, 'tol': 49.09623875237653}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.726995797991 params {'C': 0.721915135524331, 'tol': 74.79055164963538}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727166914332 params {'C': 2.7074955086509696, 'tol': 91.73854839516488}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727175935649 params {'C': 2.8774761717733024, 'tol': 90.20615248125083}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727180722961 params {'C': 3.046220077246333, 'tol': 99.67564752909608}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727182214486 params {'C': 3.0897902517068063, 'tol': 31.26587454558365}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727183056475 params {'C': 3.2588881219092243, 'tol': 28.76677042726056}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029477574 params {'C': 4.700680831949592, 'tol': 28.82995544724941}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 1 epochs took 0 seconds
auc 0.727025291683 params {'C': 4.269249160311455, 'tol': 29.4366873625872}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727183056475 params {'C': 3.2728967407488443, 'tol': 2.051136397182546}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727015981684 params {'C': 3.5466351087678714, 'tol': 2.7018337932107173}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727024666205 params {'C': 4.231252686908773, 'tol': 2.4116873960096825}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727154669398 params {'C': 2.324104050026068, 'tol': 23.155827034379325}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 1 epochs took 0 seconds
auc 0.727024810546 params {'C': 4.259174319263215, 'tol': 39.04787701500794}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727183200816 params {'C': 3.2744792330900463, 'tol': 23.722220307482328}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 10 epochs took 2 seconds
auc 0.745438473295 params {'C': 3.9516364538326187, 'tol': 0.060459326089611576}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    2.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029044551 params {'C': 4.945494058320892, 'tol': 20.808991090945558}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727021875611 params {'C': 3.841319911916065, 'tol': 7.94154084198532}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 1 epochs took 1 seconds
auc 0.727028226618 params {'C': 4.519472703787405, 'tol': 40.21955447487945}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 1 epochs took 0 seconds
auc 0.727021707213 params {'C': 3.9868870169947233, 'tol': 21.027291385286034}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 1 epochs took 0 seconds
auc 0.727014249591 params {'C': 3.4580502635281496, 'tol': 11.89415671035603}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727164652988 params {'C': 2.4776517210401696, 'tol': 7.998828938409137}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727143410794 params {'C': 2.1271088779305805, 'tol': 34.77810210590908}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727108648652 params {'C': 1.1133685276086744, 'tol': 57.42864131487434}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.7271671549 params {'C': 2.677977731958729, 'tol': 45.55554525405514}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.72702173127 params {'C': 4.0520941703862805, 'tol': 16.909621504511744}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 6 epochs took 2 seconds
auc 0.743813529618 params {'C': 4.899856008844104, 'tol': 0.14536182267027442}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    1.5s finished


convergence after 2 epochs took 1 seconds
auc 0.740052169675 params {'C': 4.9655081901844165, 'tol': 0.7270356321927285}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.5s finished


convergence after 1 epochs took 1 seconds
auc 0.727028226618 params {'C': 4.518693271152445, 'tol': 59.55296280530575}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727029477574 params {'C': 4.68387104746974, 'tol': 6.363435545274802}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.726111035332 params {'C': 0.2294839919406897, 'tol': 16.38544298503521}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029116722 params {'C': 4.959912014484609, 'tol': 11.42401776796518}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.72702570065 params {'C': 4.401116889669234, 'tol': 51.18843153903623}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 1 epochs took 0 seconds
auc 0.72702173127 params {'C': 4.0290998118977415, 'tol': 34.42477118518352}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029694086 params {'C': 4.7915791424441645, 'tol': 66.5914652710535}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727019133131 params {'C': 3.685242956634383, 'tol': 18.822723087830482}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 1 epochs took 0 seconds
auc 0.727172206838 params {'C': 2.8312184169596377, 'tol': 83.43015484845816}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727021875611 params {'C': 3.8487404891927417, 'tol': 5.15559947883901}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727127172423 params {'C': 1.9149819910532364, 'tol': 24.60550197035271}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727013936852 params {'C': 3.425136795472485, 'tol': 45.04689817406354}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 1 epochs took 0 seconds
auc 0.727037921527 params {'C': 1.4008548898060593, 'tol': 61.26854932270203}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 1 epochs took 0 seconds
auc 0.727180554563 params {'C': 3.011063511040607, 'tol': 14.718535759993244}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029020494 params {'C': 4.533043483481109, 'tol': 71.81539807997831}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029670029 params {'C': 4.85720581936842, 'tol': 26.991370065899748}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727025387911 params {'C': 4.349854517065604, 'tol': 32.985565057280425}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727022958169 params {'C': 4.174472706826127, 'tol': 97.57880565510584}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727028803983 params {'C': 4.6507464669504355, 'tol': 54.71666166073748}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727019253415 params {'C': 3.7002949489750354, 'tol': 37.5304684630874}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727182623452 params {'C': 3.169077770626056, 'tol': 11.37869152721942}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.3s finished


convergence after 4 epochs took 1 seconds
auc 0.742477556768 params {'C': 4.994367052573153, 'tol': 0.26656215638774444}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.9s finished


convergence after 3 epochs took 1 seconds
auc 0.744685734603 params {'C': 4.114228393375134, 'tol': 0.5382487703580214}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.6s finished


convergence after 1 epochs took 0 seconds
auc 0.727021875611 params {'C': 3.906940452130709, 'tol': 4.516558706922588}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727022958169 params {'C': 4.1731665807369165, 'tol': 8.866235044261707}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727014249591 params {'C': 3.4555154528887737, 'tol': 14.00253847741216}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 3 epochs took 1 seconds
auc 0.741296606229 params {'C': 4.413613711305447, 'tol': 0.4073675733740567}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.7s finished


convergence after 1 epochs took 1 seconds
auc 0.727018892562 params {'C': 3.6558416374961404, 'tol': 5.219875424628339}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727177619628 params {'C': 2.9247611412301655, 'tol': 9.335834421405488}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029164835 params {'C': 4.582543369850231, 'tol': 18.724608067206823}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727166745934 params {'C': 2.567257270497474, 'tol': 26.61080357726355}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727029645972 params {'C': 4.851835802603578, 'tol': 13.770807575024751}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727021851554 params {'C': 4.078829401871807, 'tol': 3.204076392610652}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727150820302 params {'C': 2.2427924900603604, 'tol': 22.54197612588039}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727182070144 params {'C': 3.3630329796431013, 'tol': 47.267389712421966}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 4 epochs took 1 seconds
auc 0.742912696986 params {'C': 3.537546384405375, 'tol': 0.29763385917466223}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.9s finished


convergence after 1 epochs took 0 seconds
auc 0.72702531574 params {'C': 4.302864842206838, 'tol': 18.70155363196375}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727020528428 params {'C': 3.757026437764228, 'tol': 7.761593100597345}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727022044009 params {'C': 3.9372802792618735, 'tol': 12.350951024882134}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727021009565 params {'C': 4.130890051810063, 'tol': 77.61662338942583}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727169873324 params {'C': 2.768808151819761, 'tol': 31.91020518058372}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727029958711 params {'C': 4.761853791001071, 'tol': 6.944902653027825}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.72702615773 params {'C': 4.42724401043953, 'tol': 36.801026356697115}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727183200816 params {'C': 3.2800554030383897, 'tol': 42.27875670808998}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727182503168 params {'C': 3.117574172101585, 'tol': 86.5122683536016}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.7270181468 params {'C': 3.6027406060099803, 'tol': 3.2434235013048993}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727093877749 params {'C': 1.5963878377423621, 'tol': 25.47923748921536}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727164701102 params {'C': 2.4865376928969662, 'tol': 20.922874819918498}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727028803983 params {'C': 4.649595221419617, 'tol': 29.794321291027234}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727046413593 params {'C': 1.3264558837820386, 'tol': 41.931340532172015}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 1 seconds
auc 0.727029525688 params {'C': 4.888239694582497, 'tol': 1.6897948977729413}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727178196992 params {'C': 2.9740917073787245, 'tol': 16.83467710923631}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727020552485 params {'C': 3.786335267456784, 'tol': 5.699691062479656}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.72710965904 params {'C': 1.8181445771112226, 'tol': 68.72679308435721}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727021779384 params {'C': 4.017901737520956, 'tol': 9.427472698903635}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


convergence after 1 epochs took 0 seconds
auc 0.727024810546 params {'C': 4.257733439994073, 'tol': 62.246658241353764}
BEST 
{'C': 3.9516364538326187, 'tol': 0.060459326089611576}


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    0.2s finished


In [88]:
# auc 0.745438473295 params {'C': 3.9516364538326187, 'tol': 0.060459326089611576}
category_model = LogisticRegression(dual = False, penalty='l2', random_state=13, multi_class='ovr', verbose=1, 
                                    n_jobs=4, solver='sag', C=3.9516364538326187, tol=0.060459326089611576)
category_model.fit(dtrain, dtarget)
dpred = category_model.predict_proba(dtest)[:,1]
print dpred

convergence after 10 epochs took 2 seconds


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    2.2s finished


[ 0.01835048  0.0436105   0.00032668 ...,  0.02184358  0.01896538
  0.08473333]


In [106]:
# test classifiers on all train data 
import time

from sklearn import cross_validation
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn import svm
from sklearn.linear_model import BayesianRidge, LinearRegression, LogisticRegression

classes = [
    {'type': "random_forest", 'classifier': RandomForestClassifier, 'params': {'n_estimators': 100, 'n_jobs': 4}},
    {'type': "liner regression", 'classifier': LinearRegression, 'params': {
                                                                    'copy_X': True,
                                                                    'normalize': True,
                                                                    'n_jobs': 4}},
    {'type': "logistic regression", 'classifier': LogisticRegression, 'params': {
                                                                        'solver': 'sag',
                                                                        'multi_class': 'ovr',
                                                                        'n_jobs': 4}},
   {'type': "ada boost", 'classifier': AdaBoostClassifier, 'params': {'n_estimators': 15}},
    # {'type': "", 'classifier': , 'params': {}},
]

for c in classes:
    params = dict(c.get('params', {}))
    classifier = c['classifier']
    classifier = classifier(**params)
    res = cross_validation.cross_val_score(classifier, train, target, n_jobs=4, scoring='roc_auc').mean()
    print "{}: {}\n".format(c['type'], res)
    


random_forest: 0.71659756633

liner regression: 0.777619229636





logistic regression: 0.593684156614

ada boost: 0.820528181998



In [107]:
for col in train.columns:
    if train.columns.get_loc(col) != test.columns.get_loc(col):
        print "index missmatch " + col
print "done"

done


In [104]:
# test xgboost on train data without category data
import xgboost
from sklearn.metrics import roc_auc_score
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

xgb_train = int(0.7 * train.shape[0])

dtrain = train[:int(xgb_train/2)]
dtarget = target[:int(xgb_train/2)]

dtrain_test = train[int(xgb_train/2):xgb_train]
dtrain_test_target = target[int(xgb_train/2):xgb_train]

dtest = train[xgb_train:]
dtest_target = target[xgb_train:]

def objective(params):
    xgb_clf = xgb.XGBClassifier(missing=np.nan, nthread=4, seed=13, 
                                 n_estimators=int(params['n_estimators']),
                                 base_score=params['base_score'],
                                 max_depth=int(params['max_depth']),
                                 max_delta_step=int(params['max_delta_step']),
                                 min_child_weight=params['min_child_weight'],
                                 learning_rate=params['learning_rate'],
                                 subsample=params['subsample'],
                                 scale_pos_weight=params['scale_pos_weight'],
                                 colsample_bytree=params['colsample_bytree'])

    xgb_clf.fit(dtrain, dtarget, eval_metric="auc", eval_set=[(dtrain_test, dtrain_test_target)], early_stopping_rounds=30)
    dpred = xgb_clf.predict_proba(dtest)[:,1]
    
    auc = roc_auc_score(dtest_target, dpred)
    print "auc {} params {}".format(auc, params)
    return {'loss':1-auc, 'status': STATUS_OK }
    
params = {
    'max_depth': hp.quniform("max_depth", 2, 6, 1),
    'max_delta_step': hp.quniform('max_delta_step', 1, 10, 1),
    'base_score': hp.uniform('base_score', 0.4, 1),
    'n_estimators': hp.quniform('n_estimators', 1000, 40000, 1),
    'min_child_weight': hp.quniform('min_child', 1, 15, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.03),
    'subsample': hp.uniform('subsample', 0.6, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1.0),
    'scale_pos_weight': hp.uniform('scale_pos_weight', 0.2, 1.0)
}    
iteration_number = 50

trials = Trials()
best_res = fmin(fn=objective,
                space=params,
                algo=tpe.suggest,
                max_evals=iteration_number,
                trials=trials)
print best_res




Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.710925
[1]	validation_0-auc:0.710804
[2]	validation_0-auc:0.710804
[3]	validation_0-auc:0.710804
[4]	validation_0-auc:0.760027
[5]	validation_0-auc:0.762722
[6]	validation_0-auc:0.763944
[7]	validation_0-auc:0.780960
[8]	validation_0-auc:0.780025
[9]	validation_0-auc:0.780142
[10]	validation_0-auc:0.780086
[11]	validation_0-auc:0.782400
[12]	validation_0-auc:0.782345
[13]	validation_0-auc:0.780068
[14]	validation_0-auc:0.780019
[15]	validation_0-auc:0.780170
[16]	validation_0-auc:0.786236
[17]	validation_0-auc:0.790499
[18]	validation_0-auc:0.796524
[19]	validation_0-auc:0.796464
[20]	validation_0-auc:0.809441
[21]	validation_0-auc:0.813787
[22]	validation_0-auc:0.813868
[23]	validation_0-auc:0.808868
[24]	validation_0-auc:0.808815
[25]	validation_0-auc:0.808961
[26]	validation_0-auc:0.810352
[27]	validation_0-auc:0.810684
[28]	validation_0-auc:0.811150
[29]	validation_0-auc:0.811120
[30]	validati

auc 0.841002270726 params {'colsample_bytree': 0.8739318954339452, 'scale_pos_weight': 0.4289114679603036, 'learning_rate': 0.029615283967692307, 'max_delta_step': 5.0, 'base_score': 0.6885591408906166, 'n_estimators': 9847.0, 'subsample': 0.8785876742391446, 'min_child_weight': 9.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.759656
[1]	validation_0-auc:0.783988
[2]	validation_0-auc:0.799326
[3]	validation_0-auc:0.790437
[4]	validation_0-auc:0.805313
[5]	validation_0-auc:0.796000
[6]	validation_0-auc:0.809592
[7]	validation_0-auc:0.810758
[8]	validation_0-auc:0.814087
[9]	validation_0-auc:0.816589
[10]	validation_0-auc:0.816379
[11]	validation_0-auc:0.815186
[12]	validation_0-auc:0.810310
[13]	validation_0-auc:0.811100
[14]	validation_0-auc:0.809947
[15]	validation_0-auc:0.811472
[16]	validation_0-auc:0.815408
[17]	validation_0-auc:0.813356
[18]	validation_0-auc:0.815632
[19]	validation_0-auc:0.818082
[20]	validation_0-auc:0.818709
[21]	validation_0-auc:0.819466
[22]	validation_0-auc:0.819784
[23]	validation_0-auc:0.821356
[24]	validation_0-auc:0.821772
[25]	validation_0-auc:0.821773
[26]	validation_0-auc:0.820656
[27]	validation_0-auc:0.819876
[28]	validation_0-auc:0.820964
[29]	validation_0-auc:0.821499
[30]	validati

auc 0.841122603066 params {'colsample_bytree': 0.7581900043129112, 'scale_pos_weight': 0.7963771879084904, 'learning_rate': 0.023359248188161465, 'max_delta_step': 3.0, 'base_score': 0.5689437364024854, 'n_estimators': 23320.0, 'subsample': 0.6424259623818931, 'min_child_weight': 7.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.710333
[14]	validation_0-auc:0.710333
[15]	validation_0-auc:0.710333
[16]	validation_0-auc:0.710333
[17]	validation_0-auc:0.710333
[18]	validation_0-auc:0.711043
[19]	validation_0-auc:0.711042
[20]	validation_0-auc:0.711042
[21]	validation_0-auc:0.711563
[22]	validation_0-auc:0.711564
[23]	validation_0-auc:0.760938
[24]	validation_0-auc:0.762086
[25]	validation_0-auc:0.770642
[26]	validation_0-auc:0.757085
[27]	validation_0-auc:0.758386
[28]	validation_0-auc:0.757085
[29]	validation_0-auc:0.759038
[30]	validati

auc 0.82593130664 params {'colsample_bytree': 0.7651304304997187, 'scale_pos_weight': 0.2460245561344718, 'learning_rate': 0.020133722218798455, 'max_delta_step': 8.0, 'base_score': 0.7468052181573694, 'n_estimators': 25476.0, 'subsample': 0.802690404599828, 'min_child_weight': 3.0, 'max_depth': 3.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.704912
[6]	validation_0-auc:0.704912
[7]	validation_0-auc:0.758977
[8]	validation_0-auc:0.753610
[9]	validation_0-auc:0.753880
[10]	validation_0-auc:0.756737
[11]	validation_0-auc:0.756737
[12]	validation_0-auc:0.756605
[13]	validation_0-auc:0.756602
[14]	validation_0-auc:0.753987
[15]	validation_0-auc:0.758389
[16]	validation_0-auc:0.760716
[17]	validation_0-auc:0.764817
[18]	validation_0-auc:0.764835
[19]	validation_0-auc:0.764831
[20]	validation_0-auc:0.766268
[21]	validation_0-auc:0.767419
[22]	validation_0-auc:0.767325
[23]	validation_0-auc:0.767525
[24]	validation_0-auc:0.767430
[25]	validation_0-auc:0.769579
[26]	validation_0-auc:0.769537
[27]	validation_0-auc:0.770513
[28]	validation_0-auc:0.770305
[29]	validation_0-auc:0.770283
[30]	validati

auc 0.842189572272 params {'colsample_bytree': 0.6199986650131059, 'scale_pos_weight': 0.30251525762720033, 'learning_rate': 0.021600903484051248, 'max_delta_step': 4.0, 'base_score': 0.6592170905114092, 'n_estimators': 4409.0, 'subsample': 0.6420688508870701, 'min_child_weight': 3.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.500000
[20]	validation_0-auc:0.500000
[21]	validation_0-auc:0.500000
[22]	validation_0-auc:0.500000
[23]	validation_0-auc:0.500000
[24]	validation_0-auc:0.500000
[25]	validation_0-auc:0.500000
[26]	validation_0-auc:0.500000
[27]	validation_0-auc:0.500000
[28]	validation_0-auc:0.500000
[29]	validation_0-auc:0.500000
[30]	validati

auc 0.5 params {'colsample_bytree': 0.8862196605882628, 'scale_pos_weight': 0.23197037931997935, 'learning_rate': 0.01882789514186331, 'max_delta_step': 2.0, 'base_score': 0.8023318918047861, 'n_estimators': 8253.0, 'subsample': 0.8095333279818704, 'min_child_weight': 12.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.710925
[1]	validation_0-auc:0.710924
[2]	validation_0-auc:0.710924
[3]	validation_0-auc:0.716565
[4]	validation_0-auc:0.764610
[5]	validation_0-auc:0.777948
[6]	validation_0-auc:0.778221
[7]	validation_0-auc:0.778529
[8]	validation_0-auc:0.778582
[9]	validation_0-auc:0.780851
[10]	validation_0-auc:0.780442
[11]	validation_0-auc:0.781040
[12]	validation_0-auc:0.781988
[13]	validation_0-auc:0.782046
[14]	validation_0-auc:0.776812
[15]	validation_0-auc:0.791026
[16]	validation_0-auc:0.790855
[17]	validation_0-auc:0.794083
[18]	validation_0-auc:0.793865
[19]	validation_0-auc:0.794203
[20]	validation_0-auc:0.794987
[21]	validation_0-auc:0.797395
[22]	validation_0-auc:0.797311
[23]	validation_0-auc:0.802687
[24]	validation_0-auc:0.803481
[25]	validation_0-auc:0.804546
[26]	validation_0-auc:0.802927
[27]	validation_0-auc:0.800490
[28]	validation_0-auc:0.802623
[29]	validation_0-auc:0.804434
[30]	validati

auc 0.839463041589 params {'colsample_bytree': 0.8405769752769505, 'scale_pos_weight': 0.41051587967793113, 'learning_rate': 0.025811987040444663, 'max_delta_step': 7.0, 'base_score': 0.6005796228930116, 'n_estimators': 6125.0, 'subsample': 0.9466548171401592, 'min_child_weight': 2.0, 'max_depth': 3.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.678514
[1]	validation_0-auc:0.761375
[2]	validation_0-auc:0.764560
[3]	validation_0-auc:0.763858
[4]	validation_0-auc:0.764726
[5]	validation_0-auc:0.784749
[6]	validation_0-auc:0.785559
[7]	validation_0-auc:0.785802
[8]	validation_0-auc:0.786358
[9]	validation_0-auc:0.796755
[10]	validation_0-auc:0.796792
[11]	validation_0-auc:0.799025
[12]	validation_0-auc:0.798146
[13]	validation_0-auc:0.801925
[14]	validation_0-auc:0.798481
[15]	validation_0-auc:0.799210
[16]	validation_0-auc:0.803676
[17]	validation_0-auc:0.806816
[18]	validation_0-auc:0.811545
[19]	validation_0-auc:0.809743
[20]	validation_0-auc:0.808976
[21]	validation_0-auc:0.810423
[22]	validation_0-auc:0.810010
[23]	validation_0-auc:0.811858
[24]	validation_0-auc:0.810474
[25]	validation_0-auc:0.813064
[26]	validation_0-auc:0.811681
[27]	validation_0-auc:0.811412
[28]	validation_0-auc:0.812958
[29]	validation_0-auc:0.812794
[30]	validati

auc 0.837086513949 params {'colsample_bytree': 0.6389569596049459, 'scale_pos_weight': 0.4146078206511208, 'learning_rate': 0.027779786593098342, 'max_delta_step': 6.0, 'base_score': 0.4534542233813451, 'n_estimators': 24607.0, 'subsample': 0.8282126500581627, 'min_child_weight': 2.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.500000
[20]	validation_0-auc:0.500000
[21]	validation_0-auc:0.500000
[22]	validation_0-auc:0.500000
[23]	validation_0-auc:0.500000
[24]	validation_0-auc:0.500000
[25]	validation_0-auc:0.500000
[26]	validation_0-auc:0.500000
[27]	validation_0-auc:0.500000
[28]	validation_0-auc:0.500000
[29]	validation_0-auc:0.500000
[30]	validati

auc 0.5 params {'colsample_bytree': 0.6505798605773329, 'scale_pos_weight': 0.449235488902072, 'learning_rate': 0.017596900682887225, 'max_delta_step': 9.0, 'base_score': 0.9992505228742916, 'n_estimators': 22637.0, 'subsample': 0.6566521076306963, 'min_child_weight': 3.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.504242
[9]	validation_0-auc:0.504242
[10]	validation_0-auc:0.504242
[11]	validation_0-auc:0.504242
[12]	validation_0-auc:0.505965
[13]	validation_0-auc:0.693461
[14]	validation_0-auc:0.716103
[15]	validation_0-auc:0.726761
[16]	validation_0-auc:0.727297
[17]	validation_0-auc:0.730967
[18]	validation_0-auc:0.746318
[19]	validation_0-auc:0.745200
[20]	validation_0-auc:0.803325
[21]	validation_0-auc:0.809380
[22]	validation_0-auc:0.807436
[23]	validation_0-auc:0.811702
[24]	validation_0-auc:0.810292
[25]	validation_0-auc:0.812760
[26]	validation_0-auc:0.810431
[27]	validation_0-auc:0.810009
[28]	validation_0-auc:0.811852
[29]	validation_0-auc:0.813976
[30]	validati

auc 0.839275542536 params {'colsample_bytree': 0.701662826904287, 'scale_pos_weight': 0.9584873326722159, 'learning_rate': 0.01478933551943857, 'max_delta_step': 2.0, 'base_score': 0.6558649875889305, 'n_estimators': 16133.0, 'subsample': 0.8600614421886996, 'min_child_weight': 10.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.710925
[10]	validation_0-auc:0.710925
[11]	validation_0-auc:0.714950
[12]	validation_0-auc:0.718301
[13]	validation_0-auc:0.766533
[14]	validation_0-auc:0.763239
[15]	validation_0-auc:0.765158
[16]	validation_0-auc:0.765833
[17]	validation_0-auc:0.774356
[18]	validation_0-auc:0.775561
[19]	validation_0-auc:0.775265
[20]	validation_0-auc:0.775634
[21]	validation_0-auc:0.783527
[22]	validation_0-auc:0.786472
[23]	validation_0-auc:0.786765
[24]	validation_0-auc:0.798935
[25]	validation_0-auc:0.800064
[26]	validation_0-auc:0.800332
[27]	validation_0-auc:0.800176
[28]	validation_0-auc:0.800247
[29]	validation_0-auc:0.805031
[30]	validati

auc 0.840484543357 params {'colsample_bytree': 0.7424959418874832, 'scale_pos_weight': 0.5049156334554732, 'learning_rate': 0.029242438675174773, 'max_delta_step': 4.0, 'base_score': 0.8911482425726851, 'n_estimators': 33317.0, 'subsample': 0.9123615595169865, 'min_child_weight': 12.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.674601
[9]	validation_0-auc:0.708541
[10]	validation_0-auc:0.710229
[11]	validation_0-auc:0.710229
[12]	validation_0-auc:0.710229
[13]	validation_0-auc:0.710734
[14]	validation_0-auc:0.710734
[15]	validation_0-auc:0.759826
[16]	validation_0-auc:0.759812
[17]	validation_0-auc:0.759812
[18]	validation_0-auc:0.760700
[19]	validation_0-auc:0.760700
[20]	validation_0-auc:0.760835
[21]	validation_0-auc:0.760583
[22]	validation_0-auc:0.762347
[23]	validation_0-auc:0.762893
[24]	validation_0-auc:0.764703
[25]	validation_0-auc:0.770005
[26]	validation_0-auc:0.769996
[27]	validation_0-auc:0.766395
[28]	validation_0-auc:0.766405
[29]	validation_0-auc:0.769981
[30]	validati

auc 0.841079060176 params {'colsample_bytree': 0.7244628764583512, 'scale_pos_weight': 0.30372868742781667, 'learning_rate': 0.014051278037298562, 'max_delta_step': 8.0, 'base_score': 0.7348784226656926, 'n_estimators': 5273.0, 'subsample': 0.7678034465394823, 'min_child_weight': 3.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.785989
[1]	validation_0-auc:0.798225
[2]	validation_0-auc:0.798948
[3]	validation_0-auc:0.802549
[4]	validation_0-auc:0.804028
[5]	validation_0-auc:0.809750
[6]	validation_0-auc:0.811143
[7]	validation_0-auc:0.815193
[8]	validation_0-auc:0.815538
[9]	validation_0-auc:0.817158
[10]	validation_0-auc:0.816998
[11]	validation_0-auc:0.818513
[12]	validation_0-auc:0.819989
[13]	validation_0-auc:0.820726
[14]	validation_0-auc:0.819951
[15]	validation_0-auc:0.820253
[16]	validation_0-auc:0.820617
[17]	validation_0-auc:0.821181
[18]	validation_0-auc:0.821322
[19]	validation_0-auc:0.821135
[20]	validation_0-auc:0.821152
[21]	validation_0-auc:0.822417
[22]	validation_0-auc:0.822492
[23]	validation_0-auc:0.822834
[24]	validation_0-auc:0.822891
[25]	validation_0-auc:0.822641
[26]	validation_0-auc:0.822681
[27]	validation_0-auc:0.824149
[28]	validation_0-auc:0.824193
[29]	validation_0-auc:0.824541
[30]	validati

auc 0.840795983277 params {'colsample_bytree': 0.8661086606561865, 'scale_pos_weight': 0.5582113958777162, 'learning_rate': 0.024233401548082067, 'max_delta_step': 9.0, 'base_score': 0.47459502245197577, 'n_estimators': 25482.0, 'subsample': 0.924561450523434, 'min_child_weight': 13.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.727343
[1]	validation_0-auc:0.767503
[2]	validation_0-auc:0.791609
[3]	validation_0-auc:0.786828
[4]	validation_0-auc:0.788141
[5]	validation_0-auc:0.801595
[6]	validation_0-auc:0.808724
[7]	validation_0-auc:0.810033
[8]	validation_0-auc:0.806576
[9]	validation_0-auc:0.807698
[10]	validation_0-auc:0.808814
[11]	validation_0-auc:0.814935
[12]	validation_0-auc:0.816609
[13]	validation_0-auc:0.816418
[14]	validation_0-auc:0.817689
[15]	validation_0-auc:0.817822
[16]	validation_0-auc:0.817896
[17]	validation_0-auc:0.817565
[18]	validation_0-auc:0.819250
[19]	validation_0-auc:0.818674
[20]	validation_0-auc:0.819462
[21]	validation_0-auc:0.819220
[22]	validation_0-auc:0.818861
[23]	validation_0-auc:0.819914
[24]	validation_0-auc:0.819919
[25]	validation_0-auc:0.819354
[26]	validation_0-auc:0.820568
[27]	validation_0-auc:0.820943
[28]	validation_0-auc:0.821862
[29]	validation_0-auc:0.821716
[30]	validati

auc 0.839771161664 params {'colsample_bytree': 0.9855822460533539, 'scale_pos_weight': 0.4621557602405239, 'learning_rate': 0.020412113540178228, 'max_delta_step': 2.0, 'base_score': 0.4315335694347931, 'n_estimators': 8621.0, 'subsample': 0.8645315769799858, 'min_child_weight': 13.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.692812
[19]	validation_0-auc:0.710596
[20]	validation_0-auc:0.711333
[21]	validation_0-auc:0.711333
[22]	validation_0-auc:0.711333
[23]	validation_0-auc:0.761154
[24]	validation_0-auc:0.762303
[25]	validation_0-auc:0.762303
[26]	validation_0-auc:0.762303
[27]	validation_0-auc:0.761155
[28]	validation_0-auc:0.761182
[29]	validation_0-auc:0.762328
[30]	validati

auc 0.839506103342 params {'colsample_bytree': 0.7918581576358503, 'scale_pos_weight': 0.3185342627713292, 'learning_rate': 0.01736500664939674, 'max_delta_step': 4.0, 'base_score': 0.8412470012913114, 'n_estimators': 9325.0, 'subsample': 0.6612863747599186, 'min_child_weight': 3.0, 'max_depth': 3.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.678362
[13]	validation_0-auc:0.678362
[14]	validation_0-auc:0.678362
[15]	validation_0-auc:0.759398
[16]	validation_0-auc:0.760366
[17]	validation_0-auc:0.760366
[18]	validation_0-auc:0.760357
[19]	validation_0-auc:0.760384
[20]	validation_0-auc:0.760384
[21]	validation_0-auc:0.761140
[22]	validation_0-auc:0.761140
[23]	validation_0-auc:0.761159
[24]	validation_0-auc:0.761159
[25]	validation_0-auc:0.773766
[26]	validation_0-auc:0.773758
[27]	validation_0-auc:0.773758
[28]	validation_0-auc:0.773697
[29]	validation_0-auc:0.773697
[30]	validati

auc 0.839145635571 params {'colsample_bytree': 0.6355814559156833, 'scale_pos_weight': 0.2708352225307879, 'learning_rate': 0.027836063384428983, 'max_delta_step': 6.0, 'base_score': 0.9084978228610991, 'n_estimators': 28119.0, 'subsample': 0.9775400832478583, 'min_child_weight': 11.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.500000
[20]	validation_0-auc:0.500000
[21]	validation_0-auc:0.500000
[22]	validation_0-auc:0.500000
[23]	validation_0-auc:0.500000
[24]	validation_0-auc:0.500000
[25]	validation_0-auc:0.500000
[26]	validation_0-auc:0.500000
[27]	validation_0-auc:0.500000
[28]	validation_0-auc:0.500000
[29]	validation_0-auc:0.500000
[30]	validati

auc 0.5 params {'colsample_bytree': 0.6780497847395665, 'scale_pos_weight': 0.7641840892111316, 'learning_rate': 0.01769128180219213, 'max_delta_step': 2.0, 'base_score': 0.9537753965557723, 'n_estimators': 13637.0, 'subsample': 0.8997369045274755, 'min_child_weight': 14.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.704237
[1]	validation_0-auc:0.770558
[2]	validation_0-auc:0.800464
[3]	validation_0-auc:0.792751
[4]	validation_0-auc:0.793657
[5]	validation_0-auc:0.811002
[6]	validation_0-auc:0.810541
[7]	validation_0-auc:0.810926
[8]	validation_0-auc:0.815078
[9]	validation_0-auc:0.816454
[10]	validation_0-auc:0.817038
[11]	validation_0-auc:0.811968
[12]	validation_0-auc:0.809951
[13]	validation_0-auc:0.811507
[14]	validation_0-auc:0.810079
[15]	validation_0-auc:0.811062
[16]	validation_0-auc:0.816326
[17]	validation_0-auc:0.813693
[18]	validation_0-auc:0.814151
[19]	validation_0-auc:0.812980
[20]	validation_0-auc:0.814112
[21]	validation_0-auc:0.817242
[22]	validation_0-auc:0.813832
[23]	validation_0-auc:0.814467
[24]	validation_0-auc:0.813006
[25]	validation_0-auc:0.814613
[26]	validation_0-auc:0.813320
[27]	validation_0-auc:0.812366
[28]	validation_0-auc:0.813559
[29]	validation_0-auc:0.813941
[30]	validati

auc 0.839259159824 params {'colsample_bytree': 0.7001048751899173, 'scale_pos_weight': 0.8025958726408313, 'learning_rate': 0.01583361301447416, 'max_delta_step': 2.0, 'base_score': 0.5233972426323402, 'n_estimators': 37435.0, 'subsample': 0.7963849550095061, 'min_child_weight': 14.0, 'max_depth': 3.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.710333
[1]	validation_0-auc:0.710226
[2]	validation_0-auc:0.710226
[3]	validation_0-auc:0.710226
[4]	validation_0-auc:0.710226
[5]	validation_0-auc:0.710226
[6]	validation_0-auc:0.760355
[7]	validation_0-auc:0.760355
[8]	validation_0-auc:0.760225
[9]	validation_0-auc:0.760225
[10]	validation_0-auc:0.760225
[11]	validation_0-auc:0.759672
[12]	validation_0-auc:0.759672
[13]	validation_0-auc:0.759672
[14]	validation_0-auc:0.759672
[15]	validation_0-auc:0.759672
[16]	validation_0-auc:0.760291
[17]	validation_0-auc:0.760451
[18]	validation_0-auc:0.760451
[19]	validation_0-auc:0.760449
[20]	validation_0-auc:0.760449
[21]	validation_0-auc:0.760447
[22]	validation_0-auc:0.761870
[23]	validation_0-auc:0.761870
[24]	validation_0-auc:0.762211
[25]	validation_0-auc:0.762384
[26]	validation_0-auc:0.763209
[27]	validation_0-auc:0.764767
[28]	validation_0-auc:0.764852
[29]	validation_0-auc:0.764862
[30]	validati

auc 0.815855505926 params {'colsample_bytree': 0.8365112438437619, 'scale_pos_weight': 0.28698585592072184, 'learning_rate': 0.011849556612446778, 'max_delta_step': 4.0, 'base_score': 0.5151679776257312, 'n_estimators': 1038.0, 'subsample': 0.8991622065985414, 'min_child_weight': 5.0, 'max_depth': 2.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.500000
[20]	validation_0-auc:0.500000
[21]	validation_0-auc:0.500000
[22]	validation_0-auc:0.500000
[23]	validation_0-auc:0.500000
[24]	validation_0-auc:0.500000
[25]	validation_0-auc:0.500000
[26]	validation_0-auc:0.500000
[27]	validation_0-auc:0.500000
[28]	validation_0-auc:0.500000
[29]	validation_0-auc:0.500000
[30]	validati

auc 0.5 params {'colsample_bytree': 0.8784986580589766, 'scale_pos_weight': 0.617915918108495, 'learning_rate': 0.01100828162027363, 'max_delta_step': 2.0, 'base_score': 0.9259968378064084, 'n_estimators': 2678.0, 'subsample': 0.9669541935806596, 'min_child_weight': 7.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.685409
[9]	validation_0-auc:0.733703
[10]	validation_0-auc:0.796822
[11]	validation_0-auc:0.800329
[12]	validation_0-auc:0.789554
[13]	validation_0-auc:0.806686
[14]	validation_0-auc:0.797564
[15]	validation_0-auc:0.805240
[16]	validation_0-auc:0.807326
[17]	validation_0-auc:0.806409
[18]	validation_0-auc:0.806784
[19]	validation_0-auc:0.807304
[20]	validation_0-auc:0.807340
[21]	validation_0-auc:0.814577
[22]	validation_0-auc:0.816316
[23]	validation_0-auc:0.817332
[24]	validation_0-auc:0.817828
[25]	validation_0-auc:0.817363
[26]	validation_0-auc:0.816330
[27]	validation_0-auc:0.815237
[28]	validation_0-auc:0.816360
[29]	validation_0-auc:0.816740
[30]	validati

auc 0.838487969532 params {'colsample_bytree': 0.7729154208697534, 'scale_pos_weight': 0.6462460286288838, 'learning_rate': 0.025428782145635202, 'max_delta_step': 2.0, 'base_score': 0.6311693385309614, 'n_estimators': 26583.0, 'subsample': 0.9608246099250033, 'min_child_weight': 13.0, 'max_depth': 3.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.785457
[1]	validation_0-auc:0.807668
[2]	validation_0-auc:0.818928
[3]	validation_0-auc:0.824009
[4]	validation_0-auc:0.825791
[5]	validation_0-auc:0.827211
[6]	validation_0-auc:0.826451
[7]	validation_0-auc:0.827227
[8]	validation_0-auc:0.826177
[9]	validation_0-auc:0.825735
[10]	validation_0-auc:0.825648
[11]	validation_0-auc:0.820691
[12]	validation_0-auc:0.820384
[13]	validation_0-auc:0.820850
[14]	validation_0-auc:0.822889
[15]	validation_0-auc:0.823305
[16]	validation_0-auc:0.823672
[17]	validation_0-auc:0.823105
[18]	validation_0-auc:0.820041
[19]	validation_0-auc:0.818342
[20]	validation_0-auc:0.819269
[21]	validation_0-auc:0.820150
[22]	validation_0-auc:0.818994
[23]	validation_0-auc:0.819667
[24]	validation_0-auc:0.820458
[25]	validation_0-auc:0.821442
[26]	validation_0-auc:0.822008
[27]	validation_0-auc:0.822892
[28]	validation_0-auc:0.823622
[29]	validation_0-auc:0.824198
[30]	validati

auc 0.834581017561 params {'colsample_bytree': 0.9615726791969124, 'scale_pos_weight': 0.9996539913949202, 'learning_rate': 0.022487020336813296, 'max_delta_step': 3.0, 'base_score': 0.5563646225157248, 'n_estimators': 18459.0, 'subsample': 0.6001077822894957, 'min_child_weight': 6.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.692911
[1]	validation_0-auc:0.776221
[2]	validation_0-auc:0.786457
[3]	validation_0-auc:0.784765
[4]	validation_0-auc:0.787368
[5]	validation_0-auc:0.802246
[6]	validation_0-auc:0.807011
[7]	validation_0-auc:0.810990
[8]	validation_0-auc:0.813637
[9]	validation_0-auc:0.816343
[10]	validation_0-auc:0.815948
[11]	validation_0-auc:0.816437
[12]	validation_0-auc:0.814800
[13]	validation_0-auc:0.818563
[14]	validation_0-auc:0.815244
[15]	validation_0-auc:0.818554
[16]	validation_0-auc:0.818957
[17]	validation_0-auc:0.819575
[18]	validation_0-auc:0.820509
[19]	validation_0-auc:0.819308
[20]	validation_0-auc:0.818297
[21]	validation_0-auc:0.820200
[22]	validation_0-auc:0.818763
[23]	validation_0-auc:0.819939
[24]	validation_0-auc:0.819764
[25]	validation_0-auc:0.821646
[26]	validation_0-auc:0.816805
[27]	validation_0-auc:0.814067
[28]	validation_0-auc:0.812716
[29]	validation_0-auc:0.812253
[30]	validati

auc 0.836949582385 params {'colsample_bytree': 0.6096953556765353, 'scale_pos_weight': 0.8803573526737914, 'learning_rate': 0.022334563262755947, 'max_delta_step': 5.0, 'base_score': 0.5885866935613888, 'n_estimators': 30900.0, 'subsample': 0.7207196547868879, 'min_child_weight': 5.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.692933
[1]	validation_0-auc:0.781155
[2]	validation_0-auc:0.793661
[3]	validation_0-auc:0.793463
[4]	validation_0-auc:0.795478
[5]	validation_0-auc:0.805919
[6]	validation_0-auc:0.809202
[7]	validation_0-auc:0.810703
[8]	validation_0-auc:0.813411
[9]	validation_0-auc:0.816294
[10]	validation_0-auc:0.816101
[11]	validation_0-auc:0.811852
[12]	validation_0-auc:0.810409
[13]	validation_0-auc:0.811626
[14]	validation_0-auc:0.809284
[15]	validation_0-auc:0.810555
[16]	validation_0-auc:0.815735
[17]	validation_0-auc:0.814246
[18]	validation_0-auc:0.812065
[19]	validation_0-auc:0.810943
[20]	validation_0-auc:0.810369
[21]	validation_0-auc:0.811983
[22]	validation_0-auc:0.810952
[23]	validation_0-auc:0.812339
[24]	validation_0-auc:0.812262
[25]	validation_0-auc:0.813558
[26]	validation_0-auc:0.813014
[27]	validation_0-auc:0.812920
[28]	validation_0-auc:0.810575
[29]	validation_0-auc:0.810313
[30]	validati

auc 0.84071611455 params {'colsample_bytree': 0.6132467032849127, 'scale_pos_weight': 0.7089356527699596, 'learning_rate': 0.022273506964779508, 'max_delta_step': 3.0, 'base_score': 0.40109415532405707, 'n_estimators': 18856.0, 'subsample': 0.6134991562979462, 'min_child_weight': 8.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.678728
[1]	validation_0-auc:0.698080
[2]	validation_0-auc:0.792907
[3]	validation_0-auc:0.794505
[4]	validation_0-auc:0.804072
[5]	validation_0-auc:0.814012
[6]	validation_0-auc:0.815550
[7]	validation_0-auc:0.815713
[8]	validation_0-auc:0.816976
[9]	validation_0-auc:0.817783
[10]	validation_0-auc:0.818404
[11]	validation_0-auc:0.814312
[12]	validation_0-auc:0.815709
[13]	validation_0-auc:0.816053
[14]	validation_0-auc:0.816408
[15]	validation_0-auc:0.816385
[16]	validation_0-auc:0.816256
[17]	validation_0-auc:0.818830
[18]	validation_0-auc:0.818982
[19]	validation_0-auc:0.817026
[20]	validation_0-auc:0.817979
[21]	validation_0-auc:0.818822
[22]	validation_0-auc:0.817842
[23]	validation_0-auc:0.820226
[24]	validation_0-auc:0.821161
[25]	validation_0-auc:0.822031
[26]	validation_0-auc:0.822329
[27]	validation_0-auc:0.819663
[28]	validation_0-auc:0.820637
[29]	validation_0-auc:0.820596
[30]	validati

auc 0.837223589854 params {'colsample_bytree': 0.9272992076601769, 'scale_pos_weight': 0.9048531512973906, 'learning_rate': 0.024351361207262213, 'max_delta_step': 3.0, 'base_score': 0.6964748139228987, 'n_estimators': 14158.0, 'subsample': 0.711382912333417, 'min_child_weight': 5.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.713454
[1]	validation_0-auc:0.776587
[2]	validation_0-auc:0.777324
[3]	validation_0-auc:0.763977
[4]	validation_0-auc:0.764045
[5]	validation_0-auc:0.781480
[6]	validation_0-auc:0.774775
[7]	validation_0-auc:0.775161
[8]	validation_0-auc:0.775802
[9]	validation_0-auc:0.779080
[10]	validation_0-auc:0.788814
[11]	validation_0-auc:0.791455
[12]	validation_0-auc:0.793087
[13]	validation_0-auc:0.793215
[14]	validation_0-auc:0.773007
[15]	validation_0-auc:0.776014
[16]	validation_0-auc:0.776670
[17]	validation_0-auc:0.777880
[18]	validation_0-auc:0.778283
[19]	validation_0-auc:0.779213
[20]	validation_0-auc:0.780736
[21]	validation_0-auc:0.782117
[22]	validation_0-auc:0.784054
[23]	validation_0-auc:0.785469
[24]	validation_0-auc:0.787817
[25]	validation_0-auc:0.786721
[26]	validation_0-auc:0.784772
[27]	validation_0-auc:0.785860
[28]	validation_0-auc:0.785500
[29]	validation_0-auc:0.788166
[30]	validati

auc 0.834020420894 params {'colsample_bytree': 0.8225434621155635, 'scale_pos_weight': 0.8108384671214798, 'learning_rate': 0.02130433471397549, 'max_delta_step': 5.0, 'base_score': 0.7994983812287377, 'n_estimators': 38119.0, 'subsample': 0.6452194826186735, 'min_child_weight': 1.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.500000
[20]	validation_0-auc:0.500000
[21]	validation_0-auc:0.500000
[22]	validation_0-auc:0.500000
[23]	validation_0-auc:0.500000
[24]	validation_0-auc:0.500000
[25]	validation_0-auc:0.500000
[26]	validation_0-auc:0.500000
[27]	validation_0-auc:0.500000
[28]	validation_0-auc:0.500000
[29]	validation_0-auc:0.500000
[30]	validati

auc 0.5 params {'colsample_bytree': 0.6712087540076502, 'scale_pos_weight': 0.6786932867010336, 'learning_rate': 0.023697351629738274, 'max_delta_step': 1.0, 'base_score': 0.6446451142901017, 'n_estimators': 21594.0, 'subsample': 0.706774615993358, 'min_child_weight': 7.0, 'max_depth': 2.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.713893
[1]	validation_0-auc:0.779520
[2]	validation_0-auc:0.784101
[3]	validation_0-auc:0.773131
[4]	validation_0-auc:0.772984
[5]	validation_0-auc:0.803960
[6]	validation_0-auc:0.805230
[7]	validation_0-auc:0.806836
[8]	validation_0-auc:0.803608
[9]	validation_0-auc:0.811634
[10]	validation_0-auc:0.815544
[11]	validation_0-auc:0.813584
[12]	validation_0-auc:0.809607
[13]	validation_0-auc:0.816501
[14]	validation_0-auc:0.811478
[15]	validation_0-auc:0.815422
[16]	validation_0-auc:0.817404
[17]	validation_0-auc:0.816242
[18]	validation_0-auc:0.817108
[19]	validation_0-auc:0.815395
[20]	validation_0-auc:0.816759
[21]	validation_0-auc:0.818187
[22]	validation_0-auc:0.818835
[23]	validation_0-auc:0.820590
[24]	validation_0-auc:0.821840
[25]	validation_0-auc:0.821059
[26]	validation_0-auc:0.820153
[27]	validation_0-auc:0.819525
[28]	validation_0-auc:0.820994
[29]	validation_0-auc:0.821644
[30]	validati

auc 0.84168582193 params {'colsample_bytree': 0.740740992747089, 'scale_pos_weight': 0.568791319319883, 'learning_rate': 0.02659416902512807, 'max_delta_step': 4.0, 'base_score': 0.5081971995008678, 'n_estimators': 32628.0, 'subsample': 0.7493135931564708, 'min_child_weight': 9.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.678882
[1]	validation_0-auc:0.759346
[2]	validation_0-auc:0.759700
[3]	validation_0-auc:0.761475
[4]	validation_0-auc:0.767854
[5]	validation_0-auc:0.789187
[6]	validation_0-auc:0.790426
[7]	validation_0-auc:0.801325
[8]	validation_0-auc:0.800186
[9]	validation_0-auc:0.802913
[10]	validation_0-auc:0.803981
[11]	validation_0-auc:0.803272
[12]	validation_0-auc:0.802733
[13]	validation_0-auc:0.807573
[14]	validation_0-auc:0.801732
[15]	validation_0-auc:0.807876
[16]	validation_0-auc:0.812058
[17]	validation_0-auc:0.813195
[18]	validation_0-auc:0.813992
[19]	validation_0-auc:0.812658
[20]	validation_0-auc:0.811852
[21]	validation_0-auc:0.813257
[22]	validation_0-auc:0.812441
[23]	validation_0-auc:0.813937
[24]	validation_0-auc:0.813195
[25]	validation_0-auc:0.814796
[26]	validation_0-auc:0.813869
[27]	validation_0-auc:0.813289
[28]	validation_0-auc:0.811115
[29]	validation_0-auc:0.810491
[30]	validati

auc 0.84103914987 params {'colsample_bytree': 0.6012679715319871, 'scale_pos_weight': 0.5366705774621712, 'learning_rate': 0.027165563035580563, 'max_delta_step': 6.0, 'base_score': 0.5125975401179372, 'n_estimators': 34627.0, 'subsample': 0.7485069892161134, 'min_child_weight': 9.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.713737
[1]	validation_0-auc:0.777785
[2]	validation_0-auc:0.777358
[3]	validation_0-auc:0.763238
[4]	validation_0-auc:0.763302
[5]	validation_0-auc:0.765875
[6]	validation_0-auc:0.768358
[7]	validation_0-auc:0.784399
[8]	validation_0-auc:0.782407
[9]	validation_0-auc:0.803432
[10]	validation_0-auc:0.804293
[11]	validation_0-auc:0.802836
[12]	validation_0-auc:0.794962
[13]	validation_0-auc:0.794927
[14]	validation_0-auc:0.793710
[15]	validation_0-auc:0.797436
[16]	validation_0-auc:0.803612
[17]	validation_0-auc:0.802790
[18]	validation_0-auc:0.803061
[19]	validation_0-auc:0.797083
[20]	validation_0-auc:0.799075
[21]	validation_0-auc:0.807417
[22]	validation_0-auc:0.805170
[23]	validation_0-auc:0.806610
[24]	validation_0-auc:0.806116
[25]	validation_0-auc:0.809090
[26]	validation_0-auc:0.807597
[27]	validation_0-auc:0.808688
[28]	validation_0-auc:0.809745
[29]	validation_0-auc:0.812824
[30]	validati

auc 0.840671681557 params {'colsample_bytree': 0.7139516265366563, 'scale_pos_weight': 0.35388029776890695, 'learning_rate': 0.02920680358141848, 'max_delta_step': 5.0, 'base_score': 0.40670292506643557, 'n_estimators': 29470.0, 'subsample': 0.6886475693367647, 'min_child_weight': 9.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.713893
[1]	validation_0-auc:0.777218
[2]	validation_0-auc:0.777252
[3]	validation_0-auc:0.764493
[4]	validation_0-auc:0.773078
[5]	validation_0-auc:0.786918
[6]	validation_0-auc:0.787334
[7]	validation_0-auc:0.787455
[8]	validation_0-auc:0.787413
[9]	validation_0-auc:0.804806
[10]	validation_0-auc:0.812673
[11]	validation_0-auc:0.807190
[12]	validation_0-auc:0.804348
[13]	validation_0-auc:0.809779
[14]	validation_0-auc:0.808357
[15]	validation_0-auc:0.810397
[16]	validation_0-auc:0.814344
[17]	validation_0-auc:0.812655
[18]	validation_0-auc:0.813969
[19]	validation_0-auc:0.817078
[20]	validation_0-auc:0.818331
[21]	validation_0-auc:0.820088
[22]	validation_0-auc:0.820231
[23]	validation_0-auc:0.820322
[24]	validation_0-auc:0.820224
[25]	validation_0-auc:0.820785
[26]	validation_0-auc:0.820592
[27]	validation_0-auc:0.820410
[28]	validation_0-auc:0.821365
[29]	validation_0-auc:0.821353
[30]	validati

auc 0.841108289243 params {'colsample_bytree': 0.8013138885278842, 'scale_pos_weight': 0.5785432259517088, 'learning_rate': 0.02641645990231272, 'max_delta_step': 4.0, 'base_score': 0.6752429974905398, 'n_estimators': 11895.0, 'subsample': 0.7537093373294113, 'min_child_weight': 10.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.710333
[1]	validation_0-auc:0.773859
[2]	validation_0-auc:0.774161
[3]	validation_0-auc:0.760056
[4]	validation_0-auc:0.760063
[5]	validation_0-auc:0.760400
[6]	validation_0-auc:0.759944
[7]	validation_0-auc:0.774451
[8]	validation_0-auc:0.759736
[9]	validation_0-auc:0.759541
[10]	validation_0-auc:0.759757
[11]	validation_0-auc:0.762267
[12]	validation_0-auc:0.764680
[13]	validation_0-auc:0.764695
[14]	validation_0-auc:0.766448
[15]	validation_0-auc:0.767444
[16]	validation_0-auc:0.767812
[17]	validation_0-auc:0.771930
[18]	validation_0-auc:0.771936
[19]	validation_0-auc:0.772503
[20]	validation_0-auc:0.772738
[21]	validation_0-auc:0.772367
[22]	validation_0-auc:0.772605
[23]	validation_0-auc:0.774034
[24]	validation_0-auc:0.774550
[25]	validation_0-auc:0.778546
[26]	validation_0-auc:0.772493
[27]	validation_0-auc:0.773046
[28]	validation_0-auc:0.774073
[29]	validation_0-auc:0.774325
[30]	validati

auc 0.841593179018 params {'colsample_bytree': 0.7444541832183825, 'scale_pos_weight': 0.37874502415622424, 'learning_rate': 0.019347801002280986, 'max_delta_step': 7.0, 'base_score': 0.6164271800802942, 'n_estimators': 34659.0, 'subsample': 0.61973805701594, 'min_child_weight': 6.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.500000
[20]	validation_0-auc:0.500000
[21]	validation_0-auc:0.500000
[22]	validation_0-auc:0.500000
[23]	validation_0-auc:0.500000
[24]	validation_0-auc:0.500000
[25]	validation_0-auc:0.500000
[26]	validation_0-auc:0.500000
[27]	validation_0-auc:0.500000
[28]	validation_0-auc:0.500000
[29]	validation_0-auc:0.500000
[30]	validati

auc 0.5 params {'colsample_bytree': 0.6779248163947736, 'scale_pos_weight': 0.7208292631490114, 'learning_rate': 0.025285430808625, 'max_delta_step': 1.0, 'base_score': 0.759278748405368, 'n_estimators': 19455.0, 'subsample': 0.6878537479116554, 'min_child_weight': 8.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.713893
[1]	validation_0-auc:0.714332
[2]	validation_0-auc:0.758945
[3]	validation_0-auc:0.763777
[4]	validation_0-auc:0.764765
[5]	validation_0-auc:0.794016
[6]	validation_0-auc:0.794053
[7]	validation_0-auc:0.800146
[8]	validation_0-auc:0.799446
[9]	validation_0-auc:0.803843
[10]	validation_0-auc:0.807034
[11]	validation_0-auc:0.809101
[12]	validation_0-auc:0.811945
[13]	validation_0-auc:0.817998
[14]	validation_0-auc:0.818208
[15]	validation_0-auc:0.818173
[16]	validation_0-auc:0.818427
[17]	validation_0-auc:0.819622
[18]	validation_0-auc:0.820942
[19]	validation_0-auc:0.821996
[20]	validation_0-auc:0.821797
[21]	validation_0-auc:0.821313
[22]	validation_0-auc:0.822097
[23]	validation_0-auc:0.822901
[24]	validation_0-auc:0.822667
[25]	validation_0-auc:0.822598
[26]	validation_0-auc:0.822494
[27]	validation_0-auc:0.820835
[28]	validation_0-auc:0.821531
[29]	validation_0-auc:0.821727
[30]	validati

auc 0.841436857637 params {'colsample_bytree': 0.9118994496931756, 'scale_pos_weight': 0.4982360057996215, 'learning_rate': 0.02088031665916088, 'max_delta_step': 4.0, 'base_score': 0.482026749169325, 'n_estimators': 39453.0, 'subsample': 0.7770482938332124, 'min_child_weight': 4.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.692812
[8]	validation_0-auc:0.703163
[9]	validation_0-auc:0.711020
[10]	validation_0-auc:0.711020
[11]	validation_0-auc:0.711020
[12]	validation_0-auc:0.711020
[13]	validation_0-auc:0.711559
[14]	validation_0-auc:0.711559
[15]	validation_0-auc:0.711546
[16]	validation_0-auc:0.711546
[17]	validation_0-auc:0.708911
[18]	validation_0-auc:0.758245
[19]	validation_0-auc:0.758245
[20]	validation_0-auc:0.760659
[21]	validation_0-auc:0.760657
[22]	validation_0-auc:0.760648
[23]	validation_0-auc:0.760778
[24]	validation_0-auc:0.761260
[25]	validation_0-auc:0.761048
[26]	validation_0-auc:0.761062
[27]	validation_0-auc:0.761062
[28]	validation_0-auc:0.761078
[29]	validation_0-auc:0.763271
[30]	validati

auc 0.839250691815 params {'colsample_bytree': 0.7858868331441188, 'scale_pos_weight': 0.2005557339605228, 'learning_rate': 0.029600507766719943, 'max_delta_step': 7.0, 'base_score': 0.5658996013293515, 'n_estimators': 32313.0, 'subsample': 0.8334176367093558, 'min_child_weight': 10.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.710333
[14]	validation_0-auc:0.710333
[15]	validation_0-auc:0.710333
[16]	validation_0-auc:0.710333
[17]	validation_0-auc:0.710333
[18]	validation_0-auc:0.711043
[19]	validation_0-auc:0.711042
[20]	validation_0-auc:0.711042
[21]	validation_0-auc:0.711042
[22]	validation_0-auc:0.711042
[23]	validation_0-auc:0.760967
[24]	validation_0-auc:0.760967
[25]	validation_0-auc:0.762841
[26]	validation_0-auc:0.762841
[27]	validation_0-auc:0.762841
[28]	validation_0-auc:0.762834
[29]	validation_0-auc:0.762834
[30]	validati

auc 0.83878966643 params {'colsample_bytree': 0.6531685019880271, 'scale_pos_weight': 0.21626074370529416, 'learning_rate': 0.028123635974835335, 'max_delta_step': 3.0, 'base_score': 0.7300390821931141, 'n_estimators': 4949.0, 'subsample': 0.7375061980722962, 'min_child_weight': 11.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.678362
[1]	validation_0-auc:0.756602
[2]	validation_0-auc:0.758877
[3]	validation_0-auc:0.757994
[4]	validation_0-auc:0.758365
[5]	validation_0-auc:0.761756
[6]	validation_0-auc:0.765948
[7]	validation_0-auc:0.765914
[8]	validation_0-auc:0.769300
[9]	validation_0-auc:0.770050
[10]	validation_0-auc:0.770175
[11]	validation_0-auc:0.770619
[12]	validation_0-auc:0.773820
[13]	validation_0-auc:0.773838
[14]	validation_0-auc:0.768720
[15]	validation_0-auc:0.769012
[16]	validation_0-auc:0.769532
[17]	validation_0-auc:0.773949
[18]	validation_0-auc:0.774199
[19]	validation_0-auc:0.773751
[20]	validation_0-auc:0.773955
[21]	validation_0-auc:0.774261
[22]	validation_0-auc:0.773960
[23]	validation_0-auc:0.773937
[24]	validation_0-auc:0.773860
[25]	validation_0-auc:0.779378
[26]	validation_0-auc:0.778713
[27]	validation_0-auc:0.778530
[28]	validation_0-auc:0.778654
[29]	validation_0-auc:0.778436
[30]	validati

auc 0.841899278319 params {'colsample_bytree': 0.6282202431696802, 'scale_pos_weight': 0.3533146298639731, 'learning_rate': 0.01846048545406044, 'max_delta_step': 5.0, 'base_score': 0.5421922988771722, 'n_estimators': 16130.0, 'subsample': 0.6769578142966699, 'min_child_weight': 7.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.678362
[1]	validation_0-auc:0.756602
[2]	validation_0-auc:0.758877
[3]	validation_0-auc:0.757994
[4]	validation_0-auc:0.758365
[5]	validation_0-auc:0.761756
[6]	validation_0-auc:0.765806
[7]	validation_0-auc:0.765831
[8]	validation_0-auc:0.764722
[9]	validation_0-auc:0.765885
[10]	validation_0-auc:0.765862
[11]	validation_0-auc:0.765878
[12]	validation_0-auc:0.769078
[13]	validation_0-auc:0.768709
[14]	validation_0-auc:0.763715
[15]	validation_0-auc:0.765679
[16]	validation_0-auc:0.766166
[17]	validation_0-auc:0.771470
[18]	validation_0-auc:0.771391
[19]	validation_0-auc:0.771463
[20]	validation_0-auc:0.771830
[21]	validation_0-auc:0.772877
[22]	validation_0-auc:0.772721
[23]	validation_0-auc:0.772754
[24]	validation_0-auc:0.773072
[25]	validation_0-auc:0.774748
[26]	validation_0-auc:0.775092
[27]	validation_0-auc:0.775319
[28]	validation_0-auc:0.775205
[29]	validation_0-auc:0.775282
[30]	validati

auc 0.830223553047 params {'colsample_bytree': 0.6320162653092456, 'scale_pos_weight': 0.35230266383874764, 'learning_rate': 0.012993701546203572, 'max_delta_step': 10.0, 'base_score': 0.546126727815881, 'n_estimators': 10859.0, 'subsample': 0.6774994818208557, 'min_child_weight': 2.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.678024
[20]	validation_0-auc:0.678024
[21]	validation_0-auc:0.678024
[22]	validation_0-auc:0.678335
[23]	validation_0-auc:0.678335
[24]	validation_0-auc:0.678335
[25]	validation_0-auc:0.678335
[26]	validation_0-auc:0.678335
[27]	validation_0-auc:0.678335
[28]	validation_0-auc:0.678335
[29]	validation_0-auc:0.678535
[30]	validati

auc 0.83715803495 params {'colsample_bytree': 0.6013061510336535, 'scale_pos_weight': 0.2500538822908879, 'learning_rate': 0.018421194526332726, 'max_delta_step': 5.0, 'base_score': 0.788494198920454, 'n_estimators': 15775.0, 'subsample': 0.6291732026693703, 'min_child_weight': 1.0, 'max_depth': 6.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.678362
[1]	validation_0-auc:0.756602
[2]	validation_0-auc:0.759193
[3]	validation_0-auc:0.758231
[4]	validation_0-auc:0.758231
[5]	validation_0-auc:0.763207
[6]	validation_0-auc:0.759750
[7]	validation_0-auc:0.759622
[8]	validation_0-auc:0.758530
[9]	validation_0-auc:0.759960
[10]	validation_0-auc:0.760134
[11]	validation_0-auc:0.761041
[12]	validation_0-auc:0.762652
[13]	validation_0-auc:0.762679
[14]	validation_0-auc:0.762275
[15]	validation_0-auc:0.764761
[16]	validation_0-auc:0.766007
[17]	validation_0-auc:0.770215
[18]	validation_0-auc:0.769984
[19]	validation_0-auc:0.770721
[20]	validation_0-auc:0.770884
[21]	validation_0-auc:0.770897
[22]	validation_0-auc:0.770758
[23]	validation_0-auc:0.771183
[24]	validation_0-auc:0.771518
[25]	validation_0-auc:0.773969
[26]	validation_0-auc:0.773746
[27]	validation_0-auc:0.773964
[28]	validation_0-auc:0.774049
[29]	validation_0-auc:0.773935
[30]	validati

auc 0.841130830507 params {'colsample_bytree': 0.624837424930562, 'scale_pos_weight': 0.40959129667620925, 'learning_rate': 0.01578604858561505, 'max_delta_step': 7.0, 'base_score': 0.6704410792356681, 'n_estimators': 7105.0, 'subsample': 0.6653050265555432, 'min_child_weight': 4.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.710333
[14]	validation_0-auc:0.710333
[15]	validation_0-auc:0.710333
[16]	validation_0-auc:0.710333
[17]	validation_0-auc:0.710333
[18]	validation_0-auc:0.713334
[19]	validation_0-auc:0.713269
[20]	validation_0-auc:0.713536
[21]	validation_0-auc:0.756586
[22]	validation_0-auc:0.756891
[23]	validation_0-auc:0.761358
[24]	validation_0-auc:0.761358
[25]	validation_0-auc:0.761358
[26]	validation_0-auc:0.761358
[27]	validation_0-auc:0.761358
[28]	validation_0-auc:0.761314
[29]	validation_0-auc:0.761314
[30]	validati

auc 0.841900505218 params {'colsample_bytree': 0.6540753858746919, 'scale_pos_weight': 0.3266676833570749, 'learning_rate': 0.016504623601821845, 'max_delta_step': 8.0, 'base_score': 0.8425173778578414, 'n_estimators': 2514.0, 'subsample': 0.6001806378112017, 'min_child_weight': 6.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.737804
[8]	validation_0-auc:0.752747
[9]	validation_0-auc:0.753409
[10]	validation_0-auc:0.756001
[11]	validation_0-auc:0.756001
[12]	validation_0-auc:0.755870
[13]	validation_0-auc:0.756359
[14]	validation_0-auc:0.756359
[15]	validation_0-auc:0.760634
[16]	validation_0-auc:0.760875
[17]	validation_0-auc:0.760875
[18]	validation_0-auc:0.763289
[19]	validation_0-auc:0.763278
[20]	validation_0-auc:0.767172
[21]	validation_0-auc:0.767357
[22]	validation_0-auc:0.767884
[23]	validation_0-auc:0.768705
[24]	validation_0-auc:0.768580
[25]	validation_0-auc:0.776523
[26]	validation_0-auc:0.776556
[27]	validation_0-auc:0.776326
[28]	validation_0-auc:0.776344
[29]	validation_0-auc:0.776341
[30]	validati

auc 0.838048499081 params {'colsample_bytree': 0.6558959052694197, 'scale_pos_weight': 0.46452152467349317, 'learning_rate': 0.01660387567611707, 'max_delta_step': 10.0, 'base_score': 0.8531238785219676, 'n_estimators': 1013.0, 'subsample': 0.6007804067754831, 'min_child_weight': 4.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.500000
[20]	validation_0-auc:0.500000
[21]	validation_0-auc:0.500000
[22]	validation_0-auc:0.678362
[23]	validation_0-auc:0.678362
[24]	validation_0-auc:0.678362
[25]	validation_0-auc:0.678362
[26]	validation_0-auc:0.677726
[27]	validation_0-auc:0.677726
[28]	validation_0-auc:0.757080
[29]	validation_0-auc:0.757959
[30]	validati

auc 0.838731015841 params {'colsample_bytree': 0.6951190035027264, 'scale_pos_weight': 0.5041973204354026, 'learning_rate': 0.013927606547856188, 'max_delta_step': 8.0, 'base_score': 0.9774356932599612, 'n_estimators': 4368.0, 'subsample': 0.627463723935481, 'min_child_weight': 2.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.500000
[20]	validation_0-auc:0.500000
[21]	validation_0-auc:0.500000
[22]	validation_0-auc:0.500000
[23]	validation_0-auc:0.500000
[24]	validation_0-auc:0.500000
[25]	validation_0-auc:0.500000
[26]	validation_0-auc:0.500000
[27]	validation_0-auc:0.500000
[28]	validation_0-auc:0.500000
[29]	validation_0-auc:0.500000
[30]	validati

auc 0.5 params {'colsample_bytree': 0.7232359264853074, 'scale_pos_weight': 0.20023306729541457, 'learning_rate': 0.019212303542967074, 'max_delta_step': 9.0, 'base_score': 0.8610848318953102, 'n_estimators': 3673.0, 'subsample': 0.6431526847516491, 'min_child_weight': 6.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.678362
[13]	validation_0-auc:0.710333
[14]	validation_0-auc:0.710333
[15]	validation_0-auc:0.710333
[16]	validation_0-auc:0.710333
[17]	validation_0-auc:0.710333
[18]	validation_0-auc:0.760366
[19]	validation_0-auc:0.760366
[20]	validation_0-auc:0.760366
[21]	validation_0-auc:0.760890
[22]	validation_0-auc:0.760890
[23]	validation_0-auc:0.760869
[24]	validation_0-auc:0.763008
[25]	validation_0-auc:0.759154
[26]	validation_0-auc:0.759154
[27]	validation_0-auc:0.760524
[28]	validation_0-auc:0.760524
[29]	validation_0-auc:0.761036
[30]	validati

auc 0.841526709955 params {'colsample_bytree': 0.7632241409835525, 'scale_pos_weight': 0.24755812495739435, 'learning_rate': 0.01992240645055632, 'max_delta_step': 9.0, 'base_score': 0.7176229027599674, 'n_estimators': 6964.0, 'subsample': 0.8186005249599753, 'min_child_weight': 2.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.678362
[9]	validation_0-auc:0.710333
[10]	validation_0-auc:0.710333
[11]	validation_0-auc:0.710333
[12]	validation_0-auc:0.710333
[13]	validation_0-auc:0.759440
[14]	validation_0-auc:0.759440
[15]	validation_0-auc:0.759440
[16]	validation_0-auc:0.759390
[17]	validation_0-auc:0.759390
[18]	validation_0-auc:0.760862
[19]	validation_0-auc:0.760862
[20]	validation_0-auc:0.760862
[21]	validation_0-auc:0.760862
[22]	validation_0-auc:0.760862
[23]	validation_0-auc:0.760862
[24]	validation_0-auc:0.760862
[25]	validation_0-auc:0.776767
[26]	validation_0-auc:0.773469
[27]	validation_0-auc:0.773469
[28]	validation_0-auc:0.773526
[29]	validation_0-auc:0.773646
[30]	validati

auc 0.840263460948 params {'colsample_bytree': 0.6898255455287783, 'scale_pos_weight': 0.30369954001524035, 'learning_rate': 0.01516605613853247, 'max_delta_step': 8.0, 'base_score': 0.7823897073363554, 'n_estimators': 2251.0, 'subsample': 0.9945741049469491, 'min_child_weight': 3.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.710333
[3]	validation_0-auc:0.710333
[4]	validation_0-auc:0.711043
[5]	validation_0-auc:0.772623
[6]	validation_0-auc:0.770069
[7]	validation_0-auc:0.770069
[8]	validation_0-auc:0.756591
[9]	validation_0-auc:0.760880
[10]	validation_0-auc:0.775010
[11]	validation_0-auc:0.775010
[12]	validation_0-auc:0.759370
[13]	validation_0-auc:0.759369
[14]	validation_0-auc:0.759369
[15]	validation_0-auc:0.761643
[16]	validation_0-auc:0.778448
[17]	validation_0-auc:0.774189
[18]	validation_0-auc:0.774024
[19]	validation_0-auc:0.767154
[20]	validation_0-auc:0.768272
[21]	validation_0-auc:0.774660
[22]	validation_0-auc:0.767420
[23]	validation_0-auc:0.768675
[24]	validation_0-auc:0.768827
[25]	validation_0-auc:0.793761
[26]	validation_0-auc:0.786197
[27]	validation_0-auc:0.784407
[28]	validation_0-auc:0.784677
[29]	validation_0-auc:0.784710
[30]	validati

auc 0.837531421247 params {'colsample_bytree': 0.6540822876823836, 'scale_pos_weight': 0.42680168533990537, 'learning_rate': 0.01677781208762188, 'max_delta_step': 6.0, 'base_score': 0.8136843883338478, 'n_estimators': 11705.0, 'subsample': 0.8524977185655622, 'min_child_weight': 4.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.710333
[14]	validation_0-auc:0.710333
[15]	validation_0-auc:0.710333
[16]	validation_0-auc:0.710333
[17]	validation_0-auc:0.710333
[18]	validation_0-auc:0.713334
[19]	validation_0-auc:0.713311
[20]	validation_0-auc:0.716589
[21]	validation_0-auc:0.758474
[22]	validation_0-auc:0.758741
[23]	validation_0-auc:0.759057
[24]	validation_0-auc:0.759890
[25]	validation_0-auc:0.759890
[26]	validation_0-auc:0.759603
[27]	validation_0-auc:0.759603
[28]	validation_0-auc:0.759603
[29]	validation_0-auc:0.762819
[30]	validati

auc 0.809146340437 params {'colsample_bytree': 0.8548155504783141, 'scale_pos_weight': 0.39754159496791563, 'learning_rate': 0.012578770849364236, 'max_delta_step': 8.0, 'base_score': 0.876523480465641, 'n_estimators': 8760.0, 'subsample': 0.6019667107424682, 'min_child_weight': 1.0, 'max_depth': 4.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.710333
[10]	validation_0-auc:0.710333
[11]	validation_0-auc:0.710333
[12]	validation_0-auc:0.710925
[13]	validation_0-auc:0.710925
[14]	validation_0-auc:0.710925
[15]	validation_0-auc:0.760018
[16]	validation_0-auc:0.760018
[17]	validation_0-auc:0.760018
[18]	validation_0-auc:0.775389
[19]	validation_0-auc:0.763198
[20]	validation_0-auc:0.763214
[21]	validation_0-auc:0.763213
[22]	validation_0-auc:0.779912
[23]	validation_0-auc:0.779622
[24]	validation_0-auc:0.780024
[25]	validation_0-auc:0.780064
[26]	validation_0-auc:0.766966
[27]	validation_0-auc:0.765381
[28]	validation_0-auc:0.765349
[29]	validation_0-auc:0.767068
[30]	validati

auc 0.830903640065 params {'colsample_bytree': 0.8160651622834605, 'scale_pos_weight': 0.3194680632155796, 'learning_rate': 0.02158798199520957, 'max_delta_step': 7.0, 'base_score': 0.8262470471168736, 'n_estimators': 2278.0, 'subsample': 0.7952934916304413, 'min_child_weight': 5.0, 'max_depth': 3.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.500000
[6]	validation_0-auc:0.500000
[7]	validation_0-auc:0.500000
[8]	validation_0-auc:0.500000
[9]	validation_0-auc:0.500000
[10]	validation_0-auc:0.500000
[11]	validation_0-auc:0.500000
[12]	validation_0-auc:0.500000
[13]	validation_0-auc:0.500000
[14]	validation_0-auc:0.500000
[15]	validation_0-auc:0.500000
[16]	validation_0-auc:0.500000
[17]	validation_0-auc:0.500000
[18]	validation_0-auc:0.500000
[19]	validation_0-auc:0.678024
[20]	validation_0-auc:0.710344
[21]	validation_0-auc:0.710344
[22]	validation_0-auc:0.710344
[23]	validation_0-auc:0.760944
[24]	validation_0-auc:0.760944
[25]	validation_0-auc:0.760944
[26]	validation_0-auc:0.760944
[27]	validation_0-auc:0.760944
[28]	validation_0-auc:0.760944
[29]	validation_0-auc:0.760944
[30]	validati

auc 0.841272429099 params {'colsample_bytree': 0.6699839087191768, 'scale_pos_weight': 0.45038079775921824, 'learning_rate': 0.010731457848557755, 'max_delta_step': 6.0, 'base_score': 0.9281751280068482, 'n_estimators': 5839.0, 'subsample': 0.7214679401811748, 'min_child_weight': 6.0, 'max_depth': 5.0}


Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.710925
[1]	validation_0-auc:0.773915
[2]	validation_0-auc:0.773915
[3]	validation_0-auc:0.759795
[4]	validation_0-auc:0.762509
[5]	validation_0-auc:0.777442
[6]	validation_0-auc:0.779798
[7]	validation_0-auc:0.781874
[8]	validation_0-auc:0.781229
[9]	validation_0-auc:0.785056
[10]	validation_0-auc:0.786142
[11]	validation_0-auc:0.784548
[12]	validation_0-auc:0.784865
[13]	validation_0-auc:0.785908
[14]	validation_0-auc:0.779184
[15]	validation_0-auc:0.780084
[16]	validation_0-auc:0.801126
[17]	validation_0-auc:0.798882
[18]	validation_0-auc:0.799202
[19]	validation_0-auc:0.797024
[20]	validation_0-auc:0.797774
[21]	validation_0-auc:0.805068
[22]	validation_0-auc:0.801713
[23]	validation_0-auc:0.801645
[24]	validation_0-auc:0.799726
[25]	validation_0-auc:0.804806
[26]	validation_0-auc:0.803492
[27]	validation_0-auc:0.802391
[28]	validation_0-auc:0.798440
[29]	validation_0-auc:0.803496
[30]	validati

auc 0.840439364601 params {'colsample_bytree': 0.710955267817827, 'scale_pos_weight': 0.5297986988018918, 'learning_rate': 0.020020473914413705, 'max_delta_step': 9.0, 'base_score': 0.7637105054651051, 'n_estimators': 10199.0, 'subsample': 0.7817688404200562, 'min_child_weight': 3.0, 'max_depth': 4.0}
{'colsample_bytree': 0.6199986650131059, 'scale_pos_weight': 0.30251525762720033, 'learning_rate': 0.021600903484051248, 'max_delta_step': 4.0, 'base_score': 0.6592170905114092, 'n_estimators': 4409.0, 'subsample': 0.6420688508870701, 'min_child': 3.0, 'max_depth': 5.0}


In [None]:
#best params 
auc 0.842189572272 params {'colsample_bytree': 0.6199986650131059, 'scale_pos_weight': 0.30251525762720033, 'learning_rate': 0.021600903484051248, 'max_delta_step': 4.0, 'base_score': 0.6592170905114092, 'n_estimators': 4409.0, 'subsample': 0.6420688508870701, 'min_child_weight': 3.0, 'max_depth': 5.0}

In [106]:
# try to blend two classifiers: logistic regression for category data and xgboost for (train - category) data 
from sklearn.linear_model import LogisticRegression
import xgboost
from sklearn.metrics import roc_auc_score
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials

model_train = int(0.7 * category_train.shape[0])
# for xgb
dtrain = train[:int(model_train/2)]
dtarget = target[:int(model_train/2)]

dtrain_test = train[int(model_train/2):model_train]
dtrain_test_target = target[int(model_train/2):model_train]

# for log regression
dcat_train = category_train[:int(model_train)]
dcat_target = target[:int(model_train)]

dtest = train[model_train:]
dcat_test = category_train[model_train:]
dtest_target = target[model_train:]

xgb_clf = xgb.XGBClassifier(missing=np.nan, nthread=4, seed=13, 
                            scale_pos_weight =  0.30251525762720033,
                            learning_rate = 0.021600903484051248,
                            max_delta_step = 9.0,
                            base_score = 0.6592170905114092,
                            n_estimators = 4409,
                            subsample = 0.6420688508870701,
                            min_child_weight = 3.0,
                            max_depth = 5.0,
                            colsample_bytree=0.6199986650131059)
xgb_clf.fit(dtrain, dtarget, eval_metric="auc", eval_set=[(dtrain_test, dtrain_test_target)], early_stopping_rounds=30)

category_clf = LogisticRegression(dual = False, penalty='l2', random_state=13, multi_class='ovr', verbose=1, 
                                    n_jobs=4, solver='sag', C=3.9516364538326187, tol=0.060459326089611576)
category_clf.fit(dcat_train, dcat_target)

def objective(params):
    xgb_pred = xgb_clf.predict_proba(dtest)[:,1]
    log_pred = category_clf.predict_proba(dcat_test)[:,1]
    
    pred = xgb_pred + params['cat_k'] * log_pred
    for i in range(len(pred)):
        if pred[i] >= params['threshold']:
            pred[i] = 1
        else:
            pred[i] = 0
    
    auc = roc_auc_score(dtest_target, pred)
    print "auc {} params {}".format(auc, params)
    return {'loss':1-auc, 'status': STATUS_OK }
    
params = {
    'cat_k': hp.uniform('cat_k', 0.1, 1),
    'threshold': hp.uniform('threshold', 0, 1),
}    
iteration_number = 1000

trials = Trials()
best_res = fmin(fn=objective,
                space=params,
                algo=tpe.suggest,
                max_evals=iteration_number,
                trials=trials)
print "BEST "
print best_res

Will train until validation_0 error hasn't decreased in 30 rounds.
[0]	validation_0-auc:0.500000
[1]	validation_0-auc:0.500000
[2]	validation_0-auc:0.500000
[3]	validation_0-auc:0.500000
[4]	validation_0-auc:0.500000
[5]	validation_0-auc:0.704912
[6]	validation_0-auc:0.704912
[7]	validation_0-auc:0.758977
[8]	validation_0-auc:0.753610
[9]	validation_0-auc:0.753880
[10]	validation_0-auc:0.756737
[11]	validation_0-auc:0.756737
[12]	validation_0-auc:0.756605
[13]	validation_0-auc:0.756602
[14]	validation_0-auc:0.753987
[15]	validation_0-auc:0.758389
[16]	validation_0-auc:0.760716
[17]	validation_0-auc:0.764817
[18]	validation_0-auc:0.764835
[19]	validation_0-auc:0.764831
[20]	validation_0-auc:0.766268
[21]	validation_0-auc:0.767419
[22]	validation_0-auc:0.767325
[23]	validation_0-auc:0.767525
[24]	validation_0-auc:0.767430
[25]	validation_0-auc:0.769579
[26]	validation_0-auc:0.769537
[27]	validation_0-auc:0.770513
[28]	validation_0-auc:0.770305
[29]	validation_0-auc:0.770283
[30]	validati

convergence after 10 epochs took 2 seconds


[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:    2.0s finished


auc 0.5 params {'threshold': 0.6964691855978616, 'cat_k': 0.3575254014553415}
auc 0.706431116471 params {'threshold': 0.10606490595473272, 'cat_k': 0.7709243363970517}
auc 0.5 params {'threshold': 0.50672601149957, 'cat_k': 0.15177762565128078}
auc 0.53113177113 params {'threshold': 0.10517212721767522, 'cat_k': 0.21532966483060034}
auc 0.5 params {'threshold': 0.5238333199546759, 'cat_k': 0.13596667673497678}
auc 0.5 params {'threshold': 0.8666370428503979, 'cat_k': 0.3368303646376725}
auc 0.5 params {'threshold': 0.5705316251454068, 'cat_k': 0.34143379823251085}
auc 0.523337714085 params {'threshold': 0.1416302690767407, 'cat_k': 0.38038992501483104}
auc 0.500525762355 params {'threshold': 0.650153605471749, 'cat_k': 0.9532982492562428}
auc 0.5 params {'threshold': 0.7809038987924661, 'cat_k': 0.44303008763740737}
auc 0.5 params {'threshold': 0.4195086163487056, 'cat_k': 0.21669477335629372}
auc 0.5 params {'threshold': 0.8114036263085849, 'cat_k': 0.5029878203624306}
auc 0.5 params 

In [107]:
# best params 
auc 0.768332723492 params {'threshold': 0.01958767611980896, 'cat_k': 0.10053511401918697}

In [None]:
# try another objective function
def objective(params):
    xgb_pred = xgb_clf.predict_proba(dtest)[:,1]
    log_pred = category_clf.predict_proba(dcat_test)[:,1]
    
    pred = params['xgb_k'] * xgb_pred**params['xgb_n'] + params['cat_k'] * log_pred**params['cat_n']
    for i in range(len(pred)):
        if pred[i] >= params['threshold']:
            pred[i] = 1
        else:
            pred[i] = 0
    
    auc = roc_auc_score(dtest_target, pred)
    print "auc {} params {}".format(auc, params)
    return {'loss':1-auc, 'status': STATUS_OK }
    
params = {
    'cat_k': hp.uniform('cat_k', 0, 1),
    'cat_n': hp.uniform('cat_n', 0, 2),
    'xgb_k': hp.uniform('xgb_k', 0, 1),
    'xgb_n': hp.uniform('xgb_n', 0, 2),
    'threshold': hp.uniform('threshold', 0, 10),
}    
iteration_number = 10000

trials = Trials()
best_res = fmin(fn=objective,
                space=params,
                algo=tpe.suggest,
                max_evals=iteration_number,
                trials=trials)
print "BEST "
print best_res


auc 0.5 params {'threshold': 2.268514535642031, 'xgb_k': 0.28613933495037946, 'cat_n': 1.1026295381657825, 'xgb_n': 1.3929383711957233, 'cat_k': 0.7194689697855631}
auc 0.5 params {'threshold': 5.723135366314988, 'xgb_k': 0.745471484885613, 'cat_n': 0.9164823675217628, 'xgb_n': 0.21212981190946545, 'cat_k': 0.3847059030441362}
auc 0.5 params {'threshold': 6.275799857910759, 'xgb_k': 0.05753069516808973, 'cat_n': 0.26510505440031973, 'xgb_n': 1.01345202299914, 'cat_k': 0.13108514971070295}
auc 0.5 params {'threshold': 0.8740630845952124, 'xgb_k': 0.12814407203400036, 'cat_n': 0.23095712534208368, 'xgb_n': 0.21034425443535043, 'cat_k': 0.7477276197510565}
auc 0.5 params {'threshold': 1.8596531220043622, 'xgb_k': 0.039962974149974184, 'cat_n': 1.545581295905817, 'xgb_n': 1.0476666399093517, 'cat_k': 0.5521488787070138}
auc 0.5 params {'threshold': 1.31408478173487, 'xgb_k': 0.2631448495974139, 'cat_n': 0.08318688749537473, 'xgb_n': 1.7332740857007958, 'cat_k': 0.23892433469051455}
auc 0.5

In [None]:
# best parametrs 
auc 0.767830705243 {'threshold': 0.010177010937287788, 'xgb_k': 0.17652353342254384, 'cat_n': 2.2238509460958156, 'xgb_n': 0.7107432826000818, 'cat_k': 0.7078955901408212}

In [60]:
# best results are just using xgboost
xgb_clf = xgb.XGBClassifier(missing=np.nan, nthread=4, seed=13, 
                            scale_pos_weight = 0.3999090923487273,
                            learning_rate = 0.2674798971406099,
                            max_delta_step = 8.0,
                            base_score = 0.6868410504015201,
                            n_estimators = 34952,
                            subsample = 0.8866035180913567,
                            min_child_weight = 4.0,
                            max_depth = 4.0,
                            colsample_bytree=0.7)

xgb_clf.fit(train, target)
test_pred = xgb_clf.predict_proba(test)[:,1]

print test_pred

KeyboardInterrupt: 

In [105]:
submission = pd.DataFrame({"ID": test.index, "TARGET": test_pred})
submission.to_csv("/Users/Kseniya/study/4_term/ml/my_submission.csv", index=False)

In [None]:
%%time
# previous testing. not importent now
from sklearn import svm

classes = [
  {'type': "svm linear", 'classifier': svm.SVC, 'params': {'kernel': 'linear' }},
  {'type': "svm poly 3", 'classifier': svm.SVC, 'params': {'kernel': 'poly', 'degree': '3' }},
  {'type': "svm poly 2", 'classifier': svm.SVC, 'params': {'kernel': 'poly', 'degree': '2' }}, 
  {'type': "svm linear", 'classifier': svm.SVC, 'params': {'kernel': 'sigmoid', 'coef': '1' }}, 
]

train_el = 500
for c in classes:
    params = dict(c.get('params', {}))
    classifier = c['classifier']
    classifier = classifier(**params)
    res = cross_validation.cross_val_score(classifier, train[:train_el], target[:train_el], n_jobs=4, scoring='roc_auc').mean()
    print "{}: {}\n".format(c['type'], res)

from sklearn.ensemble import AdaBoostClassifier
classes = [
    {'type': "ada boost 15", 'classifier': AdaBoostClassifier, 'params': {'n_estimators': 15}},
    {'type': "ada boost 10", 'classifier': AdaBoostClassifier, 'params': {'n_estimators': 10}},
]

for c in classes:
    params = dict(c.get('params', {}))
    classifier = c['classifier']
    classifier = classifier(**params)
    res = cross_validation.cross_val_score(classifier, train[:train_el], target[:train_el], n_jobs=4, scoring='roc_auc').mean()
    print "{}: {}\n".format(c['type'], res)
    