## Introduction

Testing {shap-hypertune} package for doing hyperparameter tuning and feature selection at the same time. 

In [4]:
import numpy as np
import pandas as pd
from scipy import stats

from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification, make_regression
from sklearn.metrics import roc_auc_score

from hyperopt import hp
from hyperopt import Trials

from xgboost import *

try:
    from shaphypetune import BoostSearch, BoostBoruta, BoostRFE, BoostRFA
except:
    !pip install --upgrade shap-hypetune
    from shaphypetune import BoostSearch, BoostBoruta, BoostRFE, BoostRFA

import warnings
warnings.simplefilter('ignore')

### Dummy Data

In [5]:
# dummmy data for classification and regression problems respectively, both with 20 features
X_clf, y_clf = make_classification(n_samples=6000, n_features=20, n_classes=2, 
                                   n_informative=4, n_redundant=6, random_state=0)

X_clf_train, X_clf_valid, y_clf_train, y_clf_valid = train_test_split(
    X_clf, y_clf, test_size=0.3, shuffle=False)

X_regr, y_regr = make_classification(n_samples=6000, n_features=20,
                                     n_informative=7, random_state=0)

X_regr_train, X_regr_valid, y_regr_train, y_regr_valid = train_test_split(
    X_regr, y_regr, test_size=0.3, shuffle=False)

### Search Space & Model

In [6]:
param_grid = {
    'learning_rate': [0.2, 0.1],
    'num_leaves': [25, 35],
    'max_depth': [10, 12]
}

param_dist = {
    'learning_rate': stats.uniform(0.09, 0.25),
    'num_leaves': stats.randint(20,40),
    'max_depth': [10, 12]
}

param_dist_hyperopt = {
    'max_depth': 15 + hp.randint('num_leaves', 5), 
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
    'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0)
}


regr_xgb = XGBRegressor(n_estimators=150, random_state=0, verbosity=0, n_jobs=-1)
clf_xgb = XGBClassifier(n_estimators=150, random_state=0, verbosity=0, n_jobs=-1)

### Hyperparameter Tuning

In [None]:
%%time
### HYPERPARAM TUNING WITH GRID-SEARCH ###

model = BoostSearch(clf_xgb, param_grid=param_grid)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)


8 trials detected for ('learning_rate', 'num_leaves', 'max_depth')



trial: 0001 ### iterations: 00021 ### eval_score: 0.2045
trial: 0002 ### iterations: 00026 ### eval_score: 0.19472
trial: 0003 ### iterations: 00021 ### eval_score: 0.2045
trial: 0004 ### iterations: 00026 ### eval_score: 0.19472
trial: 0005 ### iterations: 00045 ### eval_score: 0.19964
trial: 0006 ### iterations: 00050 ### eval_score: 0.20157
trial: 0007 ### iterations: 00045 ### eval_score: 0.19964
trial: 0008 ### iterations: 00050 ### eval_score: 0.20157
CPU times: total: 18 s
Wall time: 2.42 s


In [None]:
model.best_params_, model.best_score_

({'learning_rate': 0.2, 'num_leaves': 25, 'max_depth': 12},
 0.19471907359224538)

In [None]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape,
 model.predict_proba(X_clf_valid).shape)

(0.9138888888888889, (1800,), (1800, 2))

### Feature Selection

In [7]:
%%time
### BORUTA ###

model = BoostBoruta(clf_xgb, max_iter=200, perc=100)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)

CPU times: total: 42.3 s
Wall time: 6.61 s


In [None]:
model.n_features_

11

In [None]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape, 
 model.transform(X_clf_valid).shape,
 model.predict_proba(X_clf_valid).shape)

(0.9161111111111111, (1800,), (1800, 11), (1800, 2))

In [None]:
%%time
### RECURSIVE FEATURE ELIMINATION (RFE) ###

model = BoostRFE(regr_xgb, min_features_to_select=1, step=1)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)

CPU times: total: 26.5 s
Wall time: 3.83 s


In [None]:
model.n_features_

7

In [None]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

(0.7317444492376407, (1800,), (1800, 7))

In [None]:
%%time
### RECURSIVE FEATURE ADDITION (RFA) ###

model = BoostRFA(regr_xgb, min_features_to_select=1, step=1)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)

CPU times: total: 40.7 s
Wall time: 7.39 s


In [None]:
model.n_features_

8

### Feature Selection with SHAP

In [None]:
%%time
### BORUTA SHAP ###

model = BoostBoruta(
    clf_xgb, max_iter=200, perc=100,
    importance_type='shap_importances', train_importance=False
)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)

CPU times: total: 2min 15s
Wall time: 19.8 s


In [None]:
model.n_features_

10

In [None]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape, 
 model.transform(X_clf_valid).shape,
 model.predict_proba(X_clf_valid).shape)

(0.91, (1800,), (1800, 10), (1800, 2))

In [None]:
%%time
### RECURSIVE FEATURE ELIMINATION (RFE) SHAP ###

model = BoostRFE(
    regr_xgb, min_features_to_select=1, step=1,
    importance_type='shap_importances', train_importance=False
)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)

CPU times: total: 1min 9s
Wall time: 10.5 s


In [None]:
model.n_features_

7

In [None]:
(model.score(X_regr_valid, y_regr_valid), 
 model.predict(X_regr_valid).shape, 
 model.transform(X_regr_valid).shape)

(0.7317444492376407, (1800,), (1800, 7))

### Hyperparameters Tuning + Features Selection

In [None]:
%%time
### HYPERPARAM TUNING WITH GRID-SEARCH + BORUTA ###

model = BoostBoruta(clf_xgb, param_grid=param_grid, max_iter=200, perc=100)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)


8 trials detected for ('learning_rate', 'num_leaves', 'max_depth')



trial: 0001 ### iterations: 00026 ### eval_score: 0.20001
trial: 0002 ### iterations: 00022 ### eval_score: 0.20348
trial: 0003 ### iterations: 00026 ### eval_score: 0.20001
trial: 0004 ### iterations: 00022 ### eval_score: 0.20348
trial: 0005 ### iterations: 00048 ### eval_score: 0.19925
trial: 0006 ### iterations: 00052 ### eval_score: 0.20307
trial: 0007 ### iterations: 00048 ### eval_score: 0.19925
trial: 0008 ### iterations: 00052 ### eval_score: 0.20307
CPU times: total: 1h 55min 45s
Wall time: 21min 53s


In [None]:
model.best_params_, model.best_score_, model.n_features_

({'learning_rate': 0.1, 'num_leaves': 25, 'max_depth': 10},
 0.19924825629374635,
 11)

In [None]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape, 
 model.transform(X_clf_valid).shape,
 model.predict_proba(X_clf_valid).shape)

(0.9144444444444444, (1800,), (1800, 11), (1800, 2))

### Hyperparameter Tuning and Feature Selection with SHAP

In [None]:
%%time
### HYPERPARAM TUNING WITH GRID-SEARCH + BORUTA SHAP ###

model = BoostBoruta(
    clf_xgb, param_grid=param_grid, max_iter=200, perc=100,
    importance_type='shap_importances', train_importance=False
)
model.fit(X_clf_train, y_clf_train, eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0)


8 trials detected for ('learning_rate', 'num_leaves', 'max_depth')



trial: 0001 ### iterations: 00024 ### eval_score: 0.20151
trial: 0002 ### iterations: 00020 ### eval_score: 0.20876
trial: 0003 ### iterations: 00024 ### eval_score: 0.20151
trial: 0004 ### iterations: 00020 ### eval_score: 0.20876
trial: 0005 ### iterations: 00048 ### eval_score: 0.20401
trial: 0006 ### iterations: 00048 ### eval_score: 0.20575
trial: 0007 ### iterations: 00048 ### eval_score: 0.20401
trial: 0008 ### iterations: 00048 ### eval_score: 0.20575
CPU times: total: 1h 39min 46s
Wall time: 15min 17s


In [None]:
model.best_params_, model.best_score_, model.n_features_

({'learning_rate': 0.2, 'num_leaves': 25, 'max_depth': 10},
 0.2015091903600842,
 10)

In [None]:
(model.score(X_clf_valid, y_clf_valid), 
 model.predict(X_clf_valid).shape, 
 model.transform(X_clf_valid).shape,
 model.predict_proba(X_clf_valid).shape)

(0.9144444444444444, (1800,), (1800, 10), (1800, 2))

In [None]:
%%time
### HYPERPARAM TUNING WITH RANDOM-SEARCH + RECURSIVE FEATURE ELIMINATION (RFE) SHAP ###

model = BoostRFE(
    regr_xgb, param_grid=param_dist, min_features_to_select=1, step=1,
    n_iter=8, sampling_seed=0,
    importance_type='shap_importances', train_importance=False
)
model.fit(X_regr_train, y_regr_train, eval_set=[(X_regr_valid, y_regr_valid)], early_stopping_rounds=6, verbose=0)


8 trials detected for ('learning_rate', 'num_leaves', 'max_depth')



trial: 0001 ### iterations: 00021 ### eval_score: 0.25941
trial: 0002 ### iterations: 00064 ### eval_score: 0.25075
trial: 0003 ### iterations: 00075 ### eval_score: 0.25493
trial: 0004 ### iterations: 00084 ### eval_score: 0.25002
trial: 0005 ### iterations: 00093 ### eval_score: 0.25609
trial: 0006 ### iterations: 00039 ### eval_score: 0.2573
trial: 0007 ### iterations: 00074 ### eval_score: 0.25348
trial: 0008 ### iterations: 00032 ### eval_score: 0.2583
CPU times: total: 39min 32s
Wall time: 6min 51s


In [None]:
model.best_params_, model.best_score_, model.n_features_

({'learning_rate': 0.1669837381562427, 'num_leaves': 25, 'max_depth': 10},
 0.2500210691702108,
 11)

### Support of Custom Eval Metric

In [None]:
def AUC(y_hat, dtrain):
    y_true = dtrain.get_label()
    return 'auc', roc_auc_score(y_true, y_hat)

In [None]:
%%time
# CUSTOM METRIC (AUC) + HYPERPARAM TUNING WITH RANDOM-SEARCH + RFE ###

model = BoostRFE(
    clf_xgb, 
    param_grid=param_dist, min_features_to_select=1, step=1,
    n_iter=8, sampling_seed=0,
    greater_is_better=True
)
model.fit(
    X_clf_train, y_clf_train, 
    eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0,
    eval_metric=AUC
)


8 trials detected for ('learning_rate', 'num_leaves', 'max_depth')



trial: 0001 ### iterations: 00022 ### eval_score: 0.97585
trial: 0002 ### iterations: 00026 ### eval_score: 0.97603
trial: 0003 ### iterations: 00020 ### eval_score: 0.97539
trial: 0004 ### iterations: 00020 ### eval_score: 0.97626
trial: 0005 ### iterations: 00057 ### eval_score: 0.97603
trial: 0006 ### iterations: 00021 ### eval_score: 0.97611
trial: 0007 ### iterations: 00024 ### eval_score: 0.97619
trial: 0008 ### iterations: 00013 ### eval_score: 0.97562
CPU times: total: 14min 35s
Wall time: 3min 10s


In [None]:
model.best_params_, model.best_score_, model.n_features_

({'learning_rate': 0.1669837381562427, 'num_leaves': 25, 'max_depth': 10},
 0.976256,
 4)

## Issues

The package currently has issue with `hyperopt` hyperparameter tunning. Most likely due to the deprecation of `np.random.RandomState`. More discussions available [here](https://github.com/hyperopt/hyperopt/issues/838). The solution suggested by the developer is to update numpy version, numpy >= 1.21.0 did the trick for me. :) 

In [None]:
%%time
clf_xgb = XGBClassifier(n_estimators=150, random_state=0, verbosity=0, n_jobs=-1)

### HYPERPARAM TUNING WITH HYPEROPT ###

model = BoostSearch(
    clf_xgb, param_grid=param_dist_hyperopt,
    n_iter=8, sampling_seed=42
)
model.fit(
    X_clf_train, y_clf_train, trials=Trials(), 
    eval_set=[(X_clf_valid, y_clf_valid)], early_stopping_rounds=6, verbose=0
)


8 trials detected for ('max_depth', 'learning_rate', 'colsample_bytree')

trial: 0001 ### iterations: 00149 ### eval_score: 0.22727
trial: 0002 ### iterations: 00037 ### eval_score: 0.19728
trial: 0003 ### iterations: 00088 ### eval_score: 0.19878
trial: 0004 ### iterations: 00149 ### eval_score: 0.2297
trial: 0005 ### iterations: 00082 ### eval_score: 0.19436
trial: 0006 ### iterations: 00146 ### eval_score: 0.19585
trial: 0007 ### iterations: 00035 ### eval_score: 0.19797
trial: 0008 ### iterations: 00047 ### eval_score: 0.20141
CPU times: total: 42.7 s
Wall time: 5.77 s
