In [37]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [39]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    classification_report,
)

In [41]:
from sklearn.linear_model import LogisticRegression

In [43]:
from xgboost import XGBClassifier
import os

In [45]:
cancer = load_breast_cancer()

In [47]:
X = pd.DataFrame(cancer.data, columns=cancer.feature_names)

In [49]:
y = pd.Series(cancer.target, name="target")

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=11)

In [57]:
baseline_model = LogisticRegression(max_iter=5000)

In [61]:
baseline_model.fit(X_train, y_train)

In [63]:
y_pred_baseline = baseline_model.predict(X_test)

In [65]:
print(confusion_matrix(y_test, y_pred_baseline))

[[ 54   7]
 [  5 105]]


In [67]:
print(classification_report(y_test, y_pred_baseline))

              precision    recall  f1-score   support

           0       0.92      0.89      0.90        61
           1       0.94      0.95      0.95       110

    accuracy                           0.93       171
   macro avg       0.93      0.92      0.92       171
weighted avg       0.93      0.93      0.93       171



In [69]:
xgb_model = XGBClassifier(n_estimators=300,
        max_depth=4,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        objective="binary:logistic",
        eval_metric="logloss",
        tree_method="hist",
        random_state=42,
        n_jobs=-1
)

In [71]:
xgb_model.fit(X_train, y_train)

In [73]:
y_pred_xgb = xgb_model.predict(X_test)

In [75]:
print(confusion_matrix(y_test, y_pred_xgb))

[[ 58   3]
 [  2 108]]


In [77]:
print(classification_report(y_test, y_pred_xgb))

              precision    recall  f1-score   support

           0       0.97      0.95      0.96        61
           1       0.97      0.98      0.98       110

    accuracy                           0.97       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.97      0.97      0.97       171

