In [1]:
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize XGBoost Classifier with parameters
xgb_classifier = xgb.XGBClassifier(
    n_estimators=500,        # Number of boosting rounds (trees)
    learning_rate=0.05,      # Step size shrinkage
    max_depth=4,             # Maximum depth of a tree
    min_child_weight=1,      # Minimum sum of instance weight needed in a child
    subsample=0.8,           # Fraction of samples used for training each tree
    colsample_bytree=0.8,    # Fraction of features used for training each tree
    gamma=0,                 # Minimum loss reduction required to make a split
    reg_alpha=0.01,          # L1 regularization (lasso)
    reg_lambda=1.0,          # L2 regularization (ridge)
    objective='binary:logistic',  # Binary classification objective
    random_state=42,         # Ensures reproducibility
    n_jobs=-1                # Use all CPU cores
)

# Train the model
xgb_classifier.fit(X_train, y_train)

# Make predictions
y_pred = xgb_classifier.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.9737
