Wisconsin Breast Cancer Exercise - Decision_Tree

In [7]:
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
import pandas as pd

# Load Dataset
dataset = fetch_ucirepo(id=15)
X = dataset.data.features
y = dataset.data.targets.values.ravel()

# Remap Labels: Benign=2 → 0, Malignant=4 → 1
y = pd.Series(y).replace({2: 0, 4: 1}).values

# Train/Test Split (75% Train, 25% Test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# Create Pipeline With Imputer And Model
pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="mean")),
    ("model", DecisionTreeClassifier(random_state=42))
])

# Train Model
pipeline.fit(X_train, y_train)

# Predict On Test Set
y_pred = pipeline.predict(X_test)

# Print Results
print("Decision Tree")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Decision Tree
Accuracy: 0.9371428571428572
Confusion Matrix:
 [[110   5]
 [  6  54]]
