In [2]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from xgboost import XGBClassifier


In [3]:
data = load_breast_cancer() 
X = pd.DataFrame(data.data, columns=data.feature_names) 
y = pd.Series(data.target) 

In [4]:
X_train,X_test,y_train,y_test = train_test_split( 
    X,y,test_size=0.2,random_state=42,stratify=y) 


In [9]:
model = XGBClassifier( 
    n_estimators = 100, 
    max_depth = 3, 
    learning_rate = 0.3, 
    subsample = 0.8, 
    colsample_bytree=0.8, 
    objective="binary:logistic",
    eval_metric="logloss", 
    random_state=42 
)
model.fit(X_train,y_train) 
y_pred = model.predict(X_test) 
y_prob = model.predict_proba(X_test)[:,1] 

In [11]:
print("Accuracy: ",accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy:  0.9649122807017544
              precision    recall  f1-score   support

           0       1.00      0.90      0.95        42
           1       0.95      1.00      0.97        72

    accuracy                           0.96       114
   macro avg       0.97      0.95      0.96       114
weighted avg       0.97      0.96      0.96       114



In [12]:
confusion_matrix(y_test,y_pred)

array([[38,  4],
       [ 0, 72]])

In [13]:
import  xgboost as xgb 
dtrain = xgb.DMatrix(X_train,label=y_train) 
dtest = xgb.DMatrix(X_test,label=y_test) 
params = {
    "objective":"binary:logistic", 
    "eval_metric":"logloss", 
    "max_depth":3, 
    "eta":0.05, 
    "subsample":0.8, 
    "colsample_bytree":0.8, 
    "seed":42 
} 
modelr = xgb.train( 
    params= params, 
    dtrain=dtrain, 
    num_boost_round=500, 
    evals=[(dtest,"eval")], 
    early_stopping_rounds=20 
) 

[0]	eval-logloss:0.61842
[1]	eval-logloss:0.58321
[2]	eval-logloss:0.55144
[3]	eval-logloss:0.52155
[4]	eval-logloss:0.49551
[5]	eval-logloss:0.47074
[6]	eval-logloss:0.44676
[7]	eval-logloss:0.42594
[8]	eval-logloss:0.40728
[9]	eval-logloss:0.38992
[10]	eval-logloss:0.37171
[11]	eval-logloss:0.35644
[12]	eval-logloss:0.34321
[13]	eval-logloss:0.32993
[14]	eval-logloss:0.31842
[15]	eval-logloss:0.30714
[16]	eval-logloss:0.29586
[17]	eval-logloss:0.28539
[18]	eval-logloss:0.27425
[19]	eval-logloss:0.26469
[20]	eval-logloss:0.25534
[21]	eval-logloss:0.24701
[22]	eval-logloss:0.23988
[23]	eval-logloss:0.23164
[24]	eval-logloss:0.22377
[25]	eval-logloss:0.21790
[26]	eval-logloss:0.21105
[27]	eval-logloss:0.20610
[28]	eval-logloss:0.20120
[29]	eval-logloss:0.19554
[30]	eval-logloss:0.19119
[31]	eval-logloss:0.18637
[32]	eval-logloss:0.18087
[33]	eval-logloss:0.17725
[34]	eval-logloss:0.17299
[35]	eval-logloss:0.16953
[36]	eval-logloss:0.16517
[37]	eval-logloss:0.16134
[38]	eval-logloss:0.15

In [14]:
y_prob = modelr.predict(dtest)

y_pred = (y_prob >= 0.5).astype(int)

acc = accuracy_score(y_test, y_pred)
print("Accuracy: ", acc)

Accuracy:  0.9649122807017544
