## XGBoost tuning using HYPEROPT

Adapted from notebook: https://www.kaggle.com/prashant111/a-guide-on-xgboost-hyperparameters-tuning/comments 

In [1]:
import xgboost as xgb

In [2]:
# import packages for hyperparameters tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

%run Data_Preprocessing.ipynb
X_train_, X_validation, y_train_, y_validation = train_test_split(X_train, y_train, test_size = 0.3, random_state = 0)

In [3]:
space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180,
        'seed': 0
    }

In [4]:
def objective(space):
    clf=xgb.XGBClassifier(
                    n_estimators =space['n_estimators'], max_depth = int(space['max_depth']), gamma = space['gamma'],
                    reg_alpha = int(space['reg_alpha']),min_child_weight=int(space['min_child_weight']),
                    colsample_bytree=int(space['colsample_bytree']))
    
    evaluation = [( X_train_, y_train_), ( X_validation, y_validation)]
    
    clf.fit(X_train_, y_train_,
            eval_set=evaluation, eval_metric="auc",
            early_stopping_rounds=10,verbose=False)
    

    pred = clf.predict(X_validation)
    accuracy = accuracy_score(y_validation, pred>0.5)
    print ("SCORE:", accuracy)
    return {'loss': -accuracy, 'status': STATUS_OK }

In [5]:
trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 50,
                        trials = trials)

  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]




SCORE:                                                 
0.8068055555555556                                     
SCORE:                                                                            
0.8084722222222223                                                                
SCORE:                                                                            
0.8079166666666666                                                                
SCORE:                                                                            
0.805                                                                             
SCORE:                                                                            
0.8056944444444445                                                                
SCORE:                                                                            
0.8047222222222222                                                                
SCORE:                                                    

SCORE:                                                                             
0.8070833333333334                                                                 
SCORE:                                                                             
0.8063888888888889                                                                 
SCORE:                                                                             
0.8073611111111111                                                                 
SCORE:                                                                             
0.8065277777777777                                                                 
SCORE:                                                                             
0.8079166666666666                                                                 
SCORE:                                                                             
0.8091666666666667                                                          

0.8063888888888889                                                                 
SCORE:                                                                             
0.8068055555555556                                                                 
SCORE:                                                                             
0.8056944444444445                                                                 
100%|██████████| 100/100 [00:50<00:00,  1.99trial/s, best loss: -0.8094444444444444]


In [6]:
print("The best hyperparameters are : ","\n")
print(best_hyperparams)

The best hyperparameters are :  

{'colsample_bytree': 0.768055220786191, 'gamma': 6.790872680743958, 'max_depth': 15.0, 'min_child_weight': 2.0, 'reg_alpha': 64.0, 'reg_lambda': 0.6576130023631855}


In [7]:
xgb_best = xgb.XGBClassifier(best_hyperparams)



In [16]:
xgb_best = xgb.XGBClassifier(silent=False, 
                      scale_pos_weight=1,
                      learning_rate=0.05,  
                      colsample_bytree = 0.9267151796260045,
                      min_child_weight = 9,
                      reg_lambda = 0.583387,
                      subsample = 0.8,
                      objective='binary:logistic', 
                      n_estimators=180, 
                      reg_alpha = 61.0,
                      max_depth=18, 
                      gamma=1.01854676464289)

In [17]:
xgb_best.fit(X_train,y_train)
y_prediction = xgb_best.predict(X_test)
print(accuracy_score(y_test,y_prediction))

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


0.8338333333333333
