In [1]:
from xgboost import XGBClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report
import pandas as pd


In [3]:
#Load dữ liệu & chuẩn hóa
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="target")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [5]:
#Train mô hình XGBoost cơ bản
xgb_clf = XGBClassifier(
    n_estimators=200,        # số cây boosting
    learning_rate=0.1,      # bước cập nhật (eta)
    max_depth=3,            # độ sâu mỗi cây
    subsample=0.8,          # mỗi vòng dùng 80% mẫu
    colsample_bytree=0.8,   # mỗi cây dùng 80% feature
    objective="binary:logistic",
    eval_metric="logloss",
    random_state=42,
    n_jobs=-1
)

xgb_clf.fit(X_train_scaled, y_train)

y_pred_xgb = xgb_clf.predict(X_test_scaled)

acc = accuracy_score(y_test, y_pred_xgb)
f1 = f1_score(y_test, y_pred_xgb, average="weighted")
print("XGBoost Accuracy:", acc)
print("XGBoost F1-score:", f1)
print(classification_report(y_test, y_pred_xgb, target_names=data.target_names))


XGBoost Accuracy: 0.956140350877193
XGBoost F1-score: 0.9557756825927252
              precision    recall  f1-score   support

   malignant       0.97      0.90      0.94        42
      benign       0.95      0.99      0.97        72

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



In [None]:
configs = [
    {"learning_rate": 0.3, "max_depth": 3},
    {"learning_rate": 0.1, "max_depth": 3},
    {"learning_rate": 0.05, "max_depth": 4},
]

results = []
for cfg in configs:
    model = XGBClassifier(
        n_estimators=300,
        learning_rate=cfg["learning_rate"],
        max_depth=cfg["max_depth"],
        subsample=0.8,
        colsample_bytree=0.8,
        objective="binary:logistic",
        eval_metric="logloss",
        random_state=42,
        n_jobs=-1
    )
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")
    results.append((cfg["learning_rate"], cfg["max_depth"], acc, f1))

results_df = pd.DataFrame(results, columns=["lr", "max_depth", "accuracy", "f1"])
print(results_df)
