In [18]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, roc_auc_score,auc

In [19]:
import pandas as pd
import numpy as np

In [20]:
param_grid = {
    'n_estimators': [100, 200, 300],            # Number of boosting rounds
    'learning_rate': [0.01, 0.1, 0.2],          # Step size shrinkage
    'max_depth': [3, 4, 5],                     # Maximum depth of a tree
    'min_child_weight': [1, 3, 5],              # Minimum sum of instance weight needed in a child
    'gamma': [0, 0.1, 0.2],                     # Minimum loss reduction required to make a further partition on a leaf node of the tree
    'subsample': [0.6, 0.8, 1.0],               # Subsample ratio of the training instances
    'colsample_bytree': [0.6, 0.8, 1.0]         # Subsample ratio of columns when constructing each tree
}

In [21]:
data=load_breast_cancer()
X=data.data
y=data.target

In [22]:
scaler=StandardScaler()
X_std=scaler.fit_transform(X)

In [23]:
X_train,X_test,y_train,y_test=train_test_split(X_std,y,test_size=0.2,random_state=42)

In [24]:
xgb=XGBClassifier(random_state=42,use_label_encoder=False,eval_metric='logloss')
grid_search_xgb=GridSearchCV(xgb,param_grid,cv=5,scoring='accuracy',n_jobs=-1)
grid_search_xgb.fit(X_train,y_train)
print(grid_search_xgb.best_params_)

{'colsample_bytree': 0.8, 'gamma': 0.1, 'learning_rate': 0.2, 'max_depth': 4, 'min_child_weight': 1, 'n_estimators': 100, 'subsample': 0.6}


In [27]:
best_xgb=grid_search_xgb.best_estimator_
y_pred=best_xgb.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

1.0
[[43  0]
 [ 0 71]]
