In [1]:
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
dataset = pd.read_csv('Breast Cancer Detection Classification.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to DMatrix
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set parameters for XGBoost
params = {
    'objective': 'multi:softmax',  # Multi-class classification
    'num_class': 3,                # Number of classes in the target
    'eval_metric': 'mlogloss',     # Multi-class log loss
    'max_depth': 4,                # Depth of the decision trees
    'eta': 0.1,                    # Learning rate
    'subsample': 0.8,              # Fraction of samples to use for each tree
    'colsample_bytree': 0.8        # Fraction of features to use for each tree
}

# Train the model using the train method
num_round = 100
bst = xgb.train(params, dtrain, num_round)

# Make predictions on the test set
y_pred = bst.predict(dtest)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy using XGBoost train method: {accuracy * 100:.2f}%")


Accuracy using XGBoost train method: 96.49%
