In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_absolute_error
from sklearn.cross_validation import train_test_split
import matplotlib.pylab as plt
import pandas as pd, numpy as np
import xgboost as xgb

from hyperopt import hp, tpe, STATUS_OK, Trials
from hyperopt.fmin import fmin



In [2]:
train = pd.read_csv("train_big.csv")
test = pd.read_csv("test_big.csv")

In [3]:
shift = 200

ids = pd.read_csv('data/test.csv')['id']
y_train = np.log(train['loss'] + shift)
X_train = train.drop(['loss','id'], axis=1)
X_test = test.drop(['loss','id'], axis=1)

In [4]:
## create holdout
X_train_local, X_val, y_train_local, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [5]:
def xg_eval_mae(yhat, dtrain):
    y = dtrain.get_label()
    return 'mae', mean_absolute_error(np.exp(y)-shift,
                               np.exp(yhat)-shift)


In [6]:
fair_constant = 0.7
def fair_obj(preds, dtrain):
    labels = dtrain.get_label()
    x = (preds - labels)
    den = abs(x) + fair_constant
    grad = fair_constant * x / (den)
    hess = fair_constant * fair_constant / (den * den)
    return grad, hess


def objective(space):

    d_train = xgb.DMatrix(X_train_local, label=y_train_local)
    d_valid = xgb.DMatrix(X_val, label=y_val)
    watchlist = [(d_train, 'train'), (d_valid, 'eval')]

    params = {
            'seed': 0,
            'colsample_bytree': 0.8,
            'silent': 1,
            'subsample': 0.8,
            'learning_rate': 1.0,
            'objective': 'reg:linear',
            #'max_depth': space["max_depth"],
            'min_child_weight': 1,
            'booster': 'gbtree',
            'nthreads': 24}
    
    clf = xgb.train(params,
                    d_train,
                    10000,
                    watchlist,
                    early_stopping_rounds=10,
                    #obj=fair_obj,
                    feval=xg_eval_mae,
                    )

    pred = clf.predict(d_valid, ntree_limit=clf.best_ntree_limit)
    score = xg_eval_mae(pred, d_valid)
    #mae = mean_absolute_error((y_valid), (pred))

#    print "SCORE:", mae
    return{'loss':score, 'status': STATUS_OK }




def obj(space):
    print "Training with params : ", space
    d_train = xgb.DMatrix(X_train_local, label=y_train_local)
    d_valid = xgb.DMatrix(X_val, label=y_val)
    watchlist = [(d_train, 'train'), (d_valid, 'eval')]
    
    clf = xgb.train(space,
                    d_train,
                    10000,
                    watchlist,
                    early_stopping_rounds=10,
                    #obj=fair_obj,
                    feval=xg_eval_mae,
                    )

    pred = clf.predict(d_valid, ntree_limit=clf.best_ntree_limit)
    score = xg_eval_mae(pred, d_valid)
    return {'loss': score, 'status': STATUS_OK}

In [7]:
space ={
        'max_depth': hp.choice('max_depth', [7,8,9,10]),
        #'min_child_weight': hp.quniform ('min_child', 1, 20, 1),
        #'subsample': hp.uniform ('subsample', 0.8, 1),
        #'n_estimators' : hp.choice('n_estimators', np.arange(1000, 10000, 100, dtype=int)),
        #'learning_rate' : hp.quniform('learning_rate', 0.025, 0.5, 0.025),
        #'gamma' : hp.quniform('gamma', 0.5, 1, 0.05),
        #'colsample_bytree' : hp.quniform('colsample_bytree', 0.5, 1, 0.05),
        'seed': 0,
        'colsample_bytree': 0.8,
        #'silent': 1,
        'subsample': 0.8,
        'learning_rate': 0.7,
        'objective': 'reg:linear',
        'min_child_weight': 1,
        'booster': 'gbtree',
        'nthreads': 24
    }

In [17]:
#trials = Trials()
best = fmin(fn=obj,
            space=space,
            algo=tpe.suggest,
            max_evals=25, # change
            #trials=trials
           )

print(best)

Training with params :  {'colsample_bytree': 0.8, 'nthreads': 24, 'learning_rate': 0.7, 'min_child_weight': 1, 'subsample': 0.8, 'seed': 0, 'objective': 'reg:linear', 'max_depth': 9, 'booster': 'gbtree'}
[0]	train-mae:2946.76	eval-mae:2951.32
Multiple eval metrics have been passed: 'eval-mae' will be used for early stopping.

Will train until eval-mae hasn't improved in 10 rounds.
[1]	train-mae:1878.38	eval-mae:1888.13
[2]	train-mae:1316.37	eval-mae:1343.49
[3]	train-mae:1195.94	eval-mae:1247.56
[4]	train-mae:1168.5	eval-mae:1235.38
[5]	train-mae:1146.57	eval-mae:1234.83
[6]	train-mae:1133.87	eval-mae:1235.26
[7]	train-mae:1118.15	eval-mae:1241.8
[8]	train-mae:1110.41	eval-mae:1242.92
[9]	train-mae:1104.59	eval-mae:1243.13
[10]	train-mae:1093.88	eval-mae:1246.12
[11]	train-mae:1088.1	eval-mae:1248.75
[12]	train-mae:1076.82	eval-mae:1251.2
[13]	train-mae:1069.5	eval-mae:1253.6
[14]	train-mae:1063.21	eval-mae:1253.91
[15]	train-mae:1057.68	eval-mae:1254.5
Stopping. Best iteration:
[5]	tr

KeyboardInterrupt: 

старый добрый перебор

In [33]:
    %%time
    
    seach_res = {}
    
    for g in [i/10.0 for i in range(0,5)]:
        
        space ={
        'max_depth': 6,
        #'min_child_weight': hp.quniform ('min_child', 1, 20, 1),
        #'subsample': hp.uniform ('subsample', 0.8, 1),
        #'n_estimators' : hp.choice('n_estimators', np.arange(1000, 10000, 100, dtype=int)),
        #'learning_rate' : hp.quniform('learning_rate', 0.025, 0.5, 0.025),
        #'gamma' : hp.quniform('gamma', 0.5, 1, 0.05),
        #'colsample_bytree' : hp.quniform('colsample_bytree', 0.5, 1, 0.05),
        'seed': 42,
        'colsample_bytree': 0.8,
        #'silent': 1,
        'subsample': 0.8,
        'learning_rate': 0.03,
        'objective': 'reg:linear',
        'min_child_weight': 97,
        'booster': 'gbtree',
        'nthreads': 24,
    }
    
    
        print "Training with params : ", space
        d_train = xgb.DMatrix(X_train, label=y_train)
        
        xgb.cv(space, d_train, num_boost_round=100000, nfold=3, metrics="mae", 
               feval=xg_eval_mae, early_stopping_rounds=200, verbose_eval=10, seed=42)

        
        #pred = clf.predict(d_valid, ntree_limit=clf.best_ntree_limit)
        #score = xg_eval_mae(pred, d_valid)
        
        #seach_res[mcw] = score[1]
        
        print 'min_child_weight:', mcw
        print '-----------------'

Training with params :  {'subsample': 0.8, 'seed': 42, 'colsample_bytree': 0.8, 'nthreads': 24, 'objective': 'reg:linear', 'learning_rate': 0.1, 'max_depth': 6, 'min_child_weight': 80, 'booster': 'gbtree'}
[0]	train-mae:3233.92+7.12391	test-mae:3233.92+14.2484
[10]	train-mae:2976.5+6.8853	test-mae:2976.55+13.8863
[20]	train-mae:2048.82+5.69911	test-mae:2050.48+12.3487
[30]	train-mae:1401.33+5.67503	test-mae:1410.56+10.3287
[40]	train-mae:1215.95+4.56471	test-mae:1232.31+9.18845
[50]	train-mae:1167.71+4.21425	test-mae:1188.64+7.89961
[60]	train-mae:1149.59+4.02491	test-mae:1173.9+7.24461
[70]	train-mae:1139.86+3.57061	test-mae:1166.66+7.06854
[80]	train-mae:1133.24+3.48597	test-mae:1162.49+7.07632
[90]	train-mae:1127.87+3.54514	test-mae:1159.29+7.05585
[100]	train-mae:1123.26+3.72846	test-mae:1156.98+6.60492
[110]	train-mae:1119.38+3.28646	test-mae:1154.99+6.96383
[120]	train-mae:1115.9+3.58641	test-mae:1153.37+6.80881
[130]	train-mae:1112.58+3.69547	test-mae:1152.12+6.76727
[140]	train

KeyboardInterrupt: 

mcw=1/depth=6 - [340]	train-mae:1025.23+3.91891	test-mae:1146.07+7.37434
mcw=21/depth=6 - [460]	train-mae:1018.09+3.54485	test-mae:1144.44+6.91012
mcw=41/depth=6 - [400]	train-mae:1043.75+3.43271	test-mae:1144.12+7.83714
mcw=61/depth=6 - [510]	train-mae:1033.96+4.0336	test-mae:1144.49+7.20521
mcw=81/depth=6 -
mcw=101/depth=6 -

In [16]:
        space ={
            'max_depth': 6,
            'gamma' : 0,
            'colsample_bytree': 0.7,
            #'silent': 1,
            'subsample': 0.8,
            'learning_rate': 0.1,
            'objective': 'reg:linear',
            'min_child_weight': 97,
            'booster': 'gbtree',
            'nthreads': 24,
            'seed': 42
        }

    
        print "Training with params : ", space
        d_train = xgb.DMatrix(X_train, label=y_train)
        
        xgb.cv(space, d_train, num_boost_round=100000, nfold=3, metrics="mae", 
               feval=xg_eval_mae, 
               #early_stopping_rounds=100, 
               verbose_eval=10, seed=42)

Training with params :  {'nthreads': 24, 'learning_rate': 0.1, 'seed': 42, 'booster': 'gbtree', 'colsample_bytree': 0.7, 'min_child_weight': 97, 'subsample': 0.8, 'objective': 'reg:linear', 'max_depth': 6, 'gamma': 0, 'lambda': 1.5}
[0]	train-mae:3233.92+7.12354	test-mae:3233.92+14.2487
[10]	train-mae:2976.88+6.88334	test-mae:2976.9+13.8967
[20]	train-mae:2051.11+5.60926	test-mae:2052.55+12.5829
[30]	train-mae:1404.18+5.04386	test-mae:1412.47+10.8077
[40]	train-mae:1219.86+5.10533	test-mae:1234.9+8.81327
[50]	train-mae:1170.82+4.86833	test-mae:1189.76+7.20175
[60]	train-mae:1153.08+4.84655	test-mae:1175.25+6.59042
[70]	train-mae:1143.17+3.65502	test-mae:1167.64+6.64281
[80]	train-mae:1136.04+3.71804	test-mae:1162.9+6.32383
[90]	train-mae:1130.94+3.92217	test-mae:1159.8+6.24067
[100]	train-mae:1126.63+3.66525	test-mae:1157.38+6.27106
[110]	train-mae:1122.7+3.5288	test-mae:1155.15+6.4611
[120]	train-mae:1119.56+4.01812	test-mae:1153.81+6.16824
[130]	train-mae:1116.69+4.31963	test-mae:115

KeyboardInterrupt: 

lambda = 1 - [500]	train-mae:1048.78+2.86435	test-mae:1141.98+7.11705
lambda = 0.5 - [600]	train-mae:1034.63+3.16577	test-mae:1142.85+7.39968
lambda = 1.5 - [500]	train-mae:1049.91+3.3814	test-mae:1142.52+7.43783


In [None]:
colsample = 0.8 - train-mae:1048.9+3.95934	test-mae:1141.69+7.37217 
                  [490]	train-mae:1049.16+4.25344	test-mae:1142.85+6.72124

colsample = 0.7 -  [500]	train-mae:1048.78+2.86435	test-mae:1141.98+7.11705
                   [520]	train-mae:1046.2+4.21008	test-mae:1141.39+7.54647

        


subsample=0.7 - [490]	train-mae:1053.38+3.99966	test-mae:1143.67+7.41941

subsample=0.75 - [420]	train-mae:1060.5+3.95251	test-mae:1143.61+6.66474

subsample=0.8 - [490]	train-mae:1048.9+3.95934	test-mae:1141.69+7.37217 

subsample=0.85 - [690]	train-mae:1022.26+3.56749	test-mae:1143.01+7.99066

subsample=0.9 - [550]	train-mae:1039.67+3.1969	test-mae:1141.81+6.42243


gamma = 0 - [610]	train-mae:1033.81+4.62808	test-mae:1141.75+7.59049 

gamma = 0.01 - [500]	train-mae:1047.51+4.35696	test-mae:1142.13+7.93495

gamma = 0.1 - [490]	train-mae:1049.46+3.51804	test-mae:1142.8+7.95768

gamma = 0.5 - [580]	train-mae:1037.7+4.75016	test-mae:1142.8+7.1516



max_depth = 3: test-mae: 1141.63+6.36582 ?? > 4k iter
max_depth = 5: test-mae: 1138.46 - 2750
max_depth = 6: test-mae: 1138.22 - 1660
max_depth = 7: test-mae: 1138.32 - 1470
max_depth = 8: test-mae: 1139.44 - ??
max_depth = 10: test-mae:1143.01  - 600

In [None]:
1133.57

In [32]:
seach_res

{1: 1148.401,
 11: 1147.177,
 21: 1148.1711,
 31: 1146.5968,
 41: 1144.9031,
 51: 1144.2423,
 61: 1146.0494,
 71: 1143.9772,
 81: 1142.9365,
 91: 1145.125}

In [30]:
Min_child_weight: 1 . Score: 1146.88
Min_child_weight: 11 . Score: 1148.94
Min_child_weight: 21 . Score: 1146.91
Min_child_weight: 31 . Score: 1144.52
Min_child_weight: 41 . Score: 1146.99
Min_child_weight: 51 . Score: 1145.58
Min_child_weight: 61 . Score: 1143.5
Min_child_weight: 71 . Score: 1144.04
Min_child_weight: 81 . Score: 1143.76
Min_child_weight: 91 . Score: 1143.75


{14: 1143.7512}

In [27]:
seach_res

{5: 1149.339,
 6: 1147.8495,
 7: 1146.8751,
 8: 1150.7089,
 9: 1150.5143,
 10: 1155.672,
 11: 1155.2705,
 12: 1160.9513,
 13: 1169.829,
 14: 1170.7006}

In [43]:
def objective(space):

    clf = xgb.XGBRegressor(#n_estimators = space['n_estimators'],
                           max_depth = space['max_depth'],
                           min_child_weight = 1,
                           subsample = 0.8,
                           learning_rate = 0.5,
                           gamma = 0,
                           colsample_bytree = 0.8,
                           objective='reg:linear'
                           )

    eval_set  = [( X_train_local, y_train_local), (X_val, y_val)]

    clf.fit(X_train_local,
            y_train_local,
            eval_set=eval_set,
            eval_metric = 'rmse',
            #feval=xg_eval_mae,
            early_stopping_rounds=10)

    pred = clf.predict(X_val)
    mae = mean_absolute_error(np.exp(y_val)-shift, np.exp(pred)-shift)
#    mae = mean_absolute_error((y_valid), (pred))

#    print "SCORE:", mae
    return{'loss':mae, 'status': STATUS_OK }


space ={
        'max_depth': hp.choice('max_depth', range(7, 9)),
        #'min_child_weight': hp.quniform ('min_child', 1, 20, 1),
        #'subsample': hp.uniform ('subsample', 0.8, 1),
        #'n_estimators' : hp.choice('n_estimators', np.arange(1000, 10000, 100, dtype=int)),
        #'learning_rate' : hp.quniform('learning_rate', 0.025, 0.5, 0.025),
        #'gamma' : hp.quniform('gamma', 0.5, 1, 0.05),
        #'colsample_bytree' : hp.quniform('colsample_bytree', 0.5, 1, 0.05)
    }


trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=3, # change
            trials=trials)

print(best)

[0]	validation_0-rmse:3.70067	validation_1-rmse:3.70299
Multiple eval metrics have been passed: 'validation_1-rmse' will be used for early stopping.

Will train until validation_1-rmse hasn't improved in 10 rounds.
[1]	validation_0-rmse:1.9075	validation_1-rmse:1.90984
[2]	validation_0-rmse:1.05408	validation_1-rmse:1.05784
[3]	validation_0-rmse:0.685975	validation_1-rmse:0.691611
[4]	validation_0-rmse:0.552334	validation_1-rmse:0.559991
[5]	validation_0-rmse:0.509469	validation_1-rmse:0.518744
[6]	validation_0-rmse:0.494861	validation_1-rmse:0.505511
[7]	validation_0-rmse:0.48855	validation_1-rmse:0.50111
[8]	validation_0-rmse:0.485102	validation_1-rmse:0.49908
[9]	validation_0-rmse:0.482205	validation_1-rmse:0.497057
[10]	validation_0-rmse:0.479963	validation_1-rmse:0.495868
[11]	validation_0-rmse:0.478588	validation_1-rmse:0.495255
[12]	validation_0-rmse:0.477024	validation_1-rmse:0.4948
[13]	validation_0-rmse:0.475906	validation_1-rmse:0.494369
[14]	validation_0-rmse:0.474923	valid