# XG Boost classifier  For Breast Cancer Dataset

In [1]:
import pandas as pd
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
import xgboost
import pickle

In [2]:
df=datasets.load_breast_cancer()

In [3]:
x=df.data
y=df.target

In [4]:
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=1)

# Hyper Parameter Tuning For XGboost

In [5]:
params={"learning_rate"    : [0.05, 0.10, 0.15, 0.20, 0.25, 0.30 ] ,
 "max_depth"        : [ 3, 4, 5, 6, 8, 10, 12, 15],
 "min_child_weight" : [ 1, 3, 5, 7 ],
 "gamma"            : [ 0.0, 0.1, 0.2 , 0.3, 0.4 ],
 "colsample_bytree" : [ 0.3, 0.4, 0.5 , 0.7 ] }

In [6]:
clf=xgboost.XGBClassifier()

In [7]:
model=RandomizedSearchCV(clf,param_distributions=params, n_iter=5,scoring='roc_auc',n_jobs=1,cv=5,verbose=3)

In [8]:
model.fit(x_train,y_train)

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV] min_child_weight=3, max_depth=3, learning_rate=0.2, gamma=0.2, colsample_bytree=0.5 
[CV]  min_child_weight=3, max_depth=3, learning_rate=0.2, gamma=0.2, colsample_bytree=0.5, score=0.992, total=   0.1s
[CV] min_child_weight=3, max_depth=3, learning_rate=0.2, gamma=0.2, colsample_bytree=0.5 
[CV]  min_child_weight=3, max_depth=3, learning_rate=0.2, gamma=0.2, colsample_bytree=0.5, score=0.989, total=   0.0s
[CV] min_child_weight=3, max_depth=3, learning_rate=0.2, gamma=0.2, colsample_bytree=0.5 
[CV]  min_child_weight=3, max_depth=3, learning_rate=0.2, gamma=0.2, colsample_bytree=0.5, score=0.999, total=   0.0s
[CV] min_child_weight=3, max_depth=3, learning_rate=0.2, gamma=0.2, colsample_bytree=0.5 
[CV]  min_child_weight=3, max_depth=3, learning_rate=0.2, gamma=0.2, colsample_bytree=0.5, score=0.995, total=   0.0s
[CV] min_child_weight=3, max_depth=3, learning_rate=0.2, gamma=0.2, colsample_bytree=0.5 
[CV]  min_child_we

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s



[CV] min_child_weight=3, max_depth=10, learning_rate=0.05, gamma=0.3, colsample_bytree=0.3 
[CV]  min_child_weight=3, max_depth=10, learning_rate=0.05, gamma=0.3, colsample_bytree=0.3, score=0.992, total=   0.1s
[CV] min_child_weight=3, max_depth=10, learning_rate=0.05, gamma=0.3, colsample_bytree=0.3 
[CV]  min_child_weight=3, max_depth=10, learning_rate=0.05, gamma=0.3, colsample_bytree=0.3, score=0.991, total=   0.0s
[CV] min_child_weight=3, max_depth=10, learning_rate=0.05, gamma=0.3, colsample_bytree=0.3 
[CV]  min_child_weight=3, max_depth=10, learning_rate=0.05, gamma=0.3, colsample_bytree=0.3, score=1.000, total=   0.0s
[CV] min_child_weight=3, max_depth=10, learning_rate=0.05, gamma=0.3, colsample_bytree=0.3 
[CV]  min_child_weight=3, max_depth=10, learning_rate=0.05, gamma=0.3, colsample_bytree=0.3, score=0.995, total=   0.1s
[CV] min_child_weight=3, max_depth=10, learning_rate=0.05, gamma=0.3, colsample_bytree=0.3 
[CV]  min_child_weight=3, max_depth=10, learning_rate=0.05,

[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed:    1.1s finished


RandomizedSearchCV(cv=5,
                   estimator=XGBClassifier(base_score=None, booster=None,
                                           colsample_bylevel=None,
                                           colsample_bynode=None,
                                           colsample_bytree=None, gamma=None,
                                           gpu_id=None, importance_type='gain',
                                           interaction_constraints=None,
                                           learning_rate=None,
                                           max_delta_step=None, max_depth=None,
                                           min_child_weight=None, missing=nan,
                                           monotone_constraints=None,
                                           n_estimators=100,...
                                           reg_lambda=None,
                                           scale_pos_weight=None,
                                           subsample=No

In [9]:
model.best_estimator_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.7, gamma=0.3, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.05, max_delta_step=0, max_depth=4,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [10]:
model.best_params_

{'min_child_weight': 1,
 'max_depth': 4,
 'learning_rate': 0.05,
 'gamma': 0.3,
 'colsample_bytree': 0.7}

In [11]:
model.best_score_

0.9929213582876851

In [14]:
classifire=xgboost.XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.7, gamma=0.3, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.05, max_delta_step=0, max_depth=4,
              min_child_weight=1, missing=None, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)