In [5]:
import pandas as pd
import numpy as np
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

warnings.filterwarnings("ignore")
sns.set(style="whitegrid")
pd.set_option('display.max_columns', None)

data = pd.read_csv("./breast-cancer.csv")

data.drop(columns=['id'], inplace=True)
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})  

X = data.drop(columns=['diagnosis'])  
y = data['diagnosis']  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rfc = RandomForestClassifier(random_state=1)
ada = AdaBoostClassifier(random_state=1)
xgb = XGBClassifier(use_label_encoder=False, random_state=1)

def evaluate_model(name, model, X_train_scaled, y_train, X_test_scaled, y_test):
    model.fit(X_train_scaled, y_train)
    y_pred=model.predict(X_test_scaled)
    print(f"\n{name} Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    
#Compare Models
evaluate_model("Random Forest", rfc, X_train_scaled, y_train, X_test_scaled, y_test)
evaluate_model("AdaBoost", ada, X_train_scaled, y_train, X_test_scaled, y_test)
evaluate_model("XGBoost", xgb, X_train_scaled, y_train, X_test_scaled, y_test)



Random Forest Accuracy: 0.9649
Confusion Matrix:
[[70  1]
 [ 3 40]]

AdaBoost Accuracy: 0.9737
Confusion Matrix:
[[70  1]
 [ 2 41]]

XGBoost Accuracy: 0.9561
Confusion Matrix:
[[69  2]
 [ 3 40]]
