model using hyperopt library to tune hyperparams

In [1]:
from wrangling import *

In [2]:
# prediction
import xgboost as xgb

hyperopt

In [48]:
from hyperopt import fmin,tpe,hp,STATUS_OK,Trials
from sklearn.metrics import accuracy_score,roc_auc_score

from typing import Any, Dict, Union

def hyperparameter_tuning(hyperparams: Dict[str,Union[float,int]],
                          kag_X_train:pd.DataFrame,
                          kag_y_train:pd.Series,
                          kag_X_test:pd.DataFrame,
                          kag_y_test:pd.Series,
                          early_stopping_rounds:int=50,
                          metric:callable=accuracy_score)->Dict[str, Any]:
    int_vals=['max_depth','reg_alpha']
    hyperparams={k:(int(val) if k in int_vals else val)
           for k,val in hyperparams.items()}
    hyperparams['early_stopping_rounds']= early_stopping_rounds
    model = xgb.XGBClassifier(**hyperparams)
    evaluation=[(kag_X_train,kag_y_train),(kag_X_test,kag_y_test)]

    model.fit(kag_X_train,kag_y_train,
              eval_set=evaluation,
              verbose=False)
    pred=model.predict(kag_X_test)
    score=metric(kag_y_test,pred)
    return{'loss':-score,'status':STATUS_OK,'model':model}

hyperparams={'max_depth':hp.quniform('max_depth',1,8,1),
         'min_child_weight':hp.loguniform('min_child_weight',-2,3),
         'subsample':hp.uniform('subsample',0.5,1),
         'colsample_bytree':hp.uniform('colsample_bytree',0.5,1),
         'reg_alpha':hp.uniform('reg_alpha',0,10),
         'reg_lambda':hp.uniform('reg_lambda',1,10),
         'gamma':hp.loguniform('gamma',-10,10),
         'learning_rate':hp.loguniform('learning_rate',-7,0),
       #   'random_state':42
         }


code to run hyperopt 

In [50]:
# trials=Trials()
# best=fmin(fn=lambda hyperparams: hyperparameter_tuning(hyperparams,kag_X_train,kag_y_train,kag_X_test,kag_y_test),
#           space=hyperparams,
#           algo=tpe.suggest,
#           max_evals=2_000,
#           trials=trials)

# best

100%|██████████| 2000/2000 [13:07<00:00,  2.54trial/s, best loss: -0.8109062377402904]


{'colsample_bytree': 0.8037808098513046,
 'gamma': 8.519420372081054,
 'learning_rate': 0.2047887166422218,
 'max_depth': 4.0,
 'min_child_weight': 11.28319186084241,
 'reg_alpha': 1.2941127471018976,
 'reg_lambda': 1.976995326435837,
 'subsample': 0.6797060965412611}

In [51]:
# with noisy data
# best={'colsample_bytree': 0.8037808098513046,
#  'gamma': 8.519420372081054,
#  'learning_rate': 0.2047887166422218,
#  'max_depth': 4.0,
#  'min_child_weight': 11.28319186084241,
#  'reg_alpha': 1.2941127471018976,
#  'reg_lambda': 1.976995326435837,
#  'subsample': 0.6797060965412611}

In [35]:
best={'colsample_bytree': 0.7548336147601576,
 'gamma': 0.07384394880603823,
 'learning_rate': 0.04687842221296814,
 'max_depth': 4.0,
 'min_child_weight': 0.32575523925747074,
 'reg_alpha': 0.45553764910317607,
 'reg_lambda': 6.713863245468691,
 'subsample': 0.8380129565326875}

In [52]:
# hyperopt score
best['max_depth']=4
xghpt=xgb.XGBClassifier(**best,n_estimators=500)
xghpt.fit(Xtrain,ytrain)
res=xghpt.predict(Xtest)

In [53]:
# submission
result=pd.DataFrame({'PassengerId':raw_test.PassengerId,'Transported':res_encoder.inverse_transform(res)})
result.to_csv('model1.csv',index=False)