# Breast Cancer Classification using ML Models

This notebook compares three classification algorithms — Logistic Regression, k-Nearest Neighbors (k-NN), and Decision Tree — on the Breast Cancer Wisconsin dataset. The aim is to evaluate the models based on accuracy, precision, recall, and F1-score using `scikit-learn`.


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Create DataFrame for exploration
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y
df.head()


In [None]:
# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Initialize and train models
log_reg = LogisticRegression(max_iter=10000).fit(X_train_scaled, y_train)
knn = KNeighborsClassifier(n_neighbors=5).fit(X_train_scaled, y_train)
dtree = DecisionTreeClassifier(random_state=42).fit(X_train, y_train)


In [None]:
# Predictions
y_pred_log_reg = log_reg.predict(X_test_scaled)
y_pred_knn = knn.predict(X_test_scaled)
y_pred_dtree = dtree.predict(X_test)

# Classification Reports
print("Logistic Regression:\n", classification_report(y_test, y_pred_log_reg))
print("k-NN:\n", classification_report(y_test, y_pred_knn))
print("Decision Tree:\n", classification_report(y_test, y_pred_dtree))


In [None]:
# Plot confusion matrices
def plot_cm(cm, title):
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=data.target_names,
                yticklabels=data.target_names)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(title)
    plt.show()

plot_cm(confusion_matrix(y_test, y_pred_log_reg), "Confusion Matrix: Logistic Regression")
plot_cm(confusion_matrix(y_test, y_pred_knn), "Confusion Matrix: k-NN")
plot_cm(confusion_matrix(y_test, y_pred_dtree), "Confusion Matrix: Decision Tree")
