In [3]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load and preprocess data
breast_cancer = fetch_ucirepo(id=15)
X = breast_cancer.data.features.replace('?', np.nan).apply(pd.to_numeric).dropna()
y = breast_cancer.data.targets.loc[X.index].replace({2:0, 4:1})

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train.values.ravel())

# Predictions
y_pred = knn.predict(X_test_scaled)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Create labeled confusion matrix
conf_matrix_df = pd.DataFrame(
    conf_matrix,
    index=['Actual Benign (TN/FP)', 'Actual Malignant (FN/TP)'],
    columns=['Predicted Benign', 'Predicted Malignant']
)

# Display results
print(f"KNN Accuracy: {accuracy:.4f}\n")
print("Confusion Matrix:")
print(conf_matrix_df)

KNN Accuracy: 0.9532

Confusion Matrix:
                          Predicted Benign  Predicted Malignant
Actual Benign (TN/FP)                  106                    5
Actual Malignant (FN/TP)                 3                   57
