# Breast Cancer Classification using XGBoost

### 1. Liberaries

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from xgboost import XGBClassifier

### 2. Loading the dataset

In [5]:
df = pd.read_csv("../Docs/data.csv")

### 3. Deleting unneseccory features

In [6]:
df = df.drop(columns=["id"])

### 4. Setting target values

In [7]:
df["diagnosis"] = df["diagnosis"].map({"M": 1, "B": 0})

### 5. Splitting X and Y

In [8]:
X = df.drop(columns=["diagnosis"])
y = df["diagnosis"]

### 6. Feature normalization

In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### 7. Test and train splition

In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

### 8. XGBoost configuration

In [11]:
model = XGBClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=4,
    subsample=0.9,
    colsample_bytree=0.9,
    objective="binary:logistic",
    eval_metric="logloss"
)

### 9. Train

In [12]:
model.fit(X_train, y_train)

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,0.9
,device,
,early_stopping_rounds,
,enable_categorical,False


### 10. Test

In [13]:
y_pred = model.predict(X_test)

### 11. Evaluation

In [14]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy:", acc)
print("Precision:", prec)
print("Recall:", rec)
print("F1:", f1)
print("Confusion Matrix:\n", cm)

Accuracy: 0.9649122807017544
Precision: 0.975609756097561
Recall: 0.9302325581395349
F1: 0.9523809523809523
Confusion Matrix:
 [[70  1]
 [ 3 40]]
