In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score

In [2]:
#Loading the dataset
df = load_breast_cancer()
x = df.data
y = df.target

In [3]:
df

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

In [4]:
#Splitting the data into test and training sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=123)

In [5]:
# Convert data to DMatrix
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [6]:
#Setting the parameters
params = {
    'max_depth': 3,
    'objective': 'binary:logistic',
    'min_child_weight': 1,
    'eta': 0.1,
    'subsample': 0.5,
    'colsample_bytree': 1,
    'eval_metric': 'logloss',
    'seed': 123,
}

In [7]:
#Training the model
num_round = 1000
bst = xgb.train(params, dtrain, num_round, evals=[(dtest, 'test')], early_stopping_rounds=10)

[0]	test-logloss:0.58133
[1]	test-logloss:0.51892
[2]	test-logloss:0.46557
[3]	test-logloss:0.42310
[4]	test-logloss:0.39069
[5]	test-logloss:0.35658


[6]	test-logloss:0.32730
[7]	test-logloss:0.29868
[8]	test-logloss:0.27415
[9]	test-logloss:0.25342
[10]	test-logloss:0.23750
[11]	test-logloss:0.22232
[12]	test-logloss:0.21029
[13]	test-logloss:0.19762
[14]	test-logloss:0.18761
[15]	test-logloss:0.17514
[16]	test-logloss:0.16736
[17]	test-logloss:0.15857
[18]	test-logloss:0.15319
[19]	test-logloss:0.14441
[20]	test-logloss:0.14027
[21]	test-logloss:0.13397
[22]	test-logloss:0.13015
[23]	test-logloss:0.12822
[24]	test-logloss:0.12449
[25]	test-logloss:0.11928
[26]	test-logloss:0.11430
[27]	test-logloss:0.10900
[28]	test-logloss:0.10752
[29]	test-logloss:0.10448
[30]	test-logloss:0.10375
[31]	test-logloss:0.10104
[32]	test-logloss:0.09747
[33]	test-logloss:0.09618
[34]	test-logloss:0.09365
[35]	test-logloss:0.09319
[36]	test-logloss:0.09166
[37]	test-logloss:0.09065
[38]	test-logloss:0.09087
[39]	test-logloss:0.09011
[40]	test-logloss:0.08923
[41]	test-logloss:0.08867
[42]	test-logloss:0.08693
[43]	test-logloss:0.08574
[44]	test-loglos

In [8]:
# Making the predictions
y_pred = bst.predict(dtest)

In [9]:
# Convert continuous predictions to binary values
y_pred_binary = [1 if pred >= 0.5 else 0 for pred in y_pred]

# Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred_binary)

In [10]:
accuracy

0.9824561403508771