In [2]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns


In [3]:
# Load the Dataset
print("Loading the Breast Cancer dataset...")
data = load_breast_cancer()
X = data.data
y = data.target
print(f"Dataset loaded. Number of samples: {X.shape[0]}, Number of features: {X.shape[1]}")
print(f"Target classes: {data.target_names}")

Loading the Breast Cancer dataset...
Dataset loaded. Number of samples: 569, Number of features: 30
Target classes: ['malignant' 'benign']


In [4]:
# Split the Data into Training and Testing Sets
print("\nSplitting data into training and testing sets...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Training samples: {X_train.shape[0]}, Testing samples: {X_test.shape[0]}")



Splitting data into training and testing sets...
Training samples: 398, Testing samples: 171


In [5]:
# Train the K-Nearest Neighbors (KNN) Model
print("\nTraining the K-Nearest Neighbors (KNN) model...")
knn_model = KNeighborsClassifier(n_neighbors=5) # You can change n_neighbors
knn_model.fit(X_train, y_train)
print("Model training complete.")


Training the K-Nearest Neighbors (KNN) model...
Model training complete.


In [6]:
# Make Predictions
print("\nMaking predictions on the test set...")
y_pred = knn_model.predict(X_test)


Making predictions on the test set...


In [7]:
y_pred

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1])

In [8]:
# 5. Evaluate the Model Performance

# Accuracy Score
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)

# Type 1 Error (False Positive - FP): Predicting positive when it's actually negative.
type_1_error = cm[0, 1]
print(f"\nType 1 Error (False Positives): {type_1_error}")

# Type 2 Error (False Negative - FN): Predicting negative when it's actually positive.
type_2_error = cm[1, 0]
print(f"Type 2 Error (False Negatives): {type_2_error}")


Accuracy: 0.9591

Confusion Matrix:
[[ 57   6]
 [  1 107]]

Type 1 Error (False Positives): 6
Type 2 Error (False Negatives): 1


In [9]:
# Precision, Recall, and F1-Score
print("\nClassification Report (Precision, Recall, F1-Score):")
print(classification_report(y_test, y_pred, target_names=data.target_names))


Classification Report (Precision, Recall, F1-Score):
              precision    recall  f1-score   support

   malignant       0.98      0.90      0.94        63
      benign       0.95      0.99      0.97       108

    accuracy                           0.96       171
   macro avg       0.96      0.95      0.96       171
weighted avg       0.96      0.96      0.96       171

