# Breast Cancer Classification using ML
This notebook performs classification on the Breast Cancer dataset using:
- Logistic Regression with L1 and L2 regularization
- Support Vector Machines with linear and RBF kernels
Evaluation includes ROC AUC, Accuracy, Precision, Recall, and F1-score.


In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, RocCurveDisplay
import matplotlib.pyplot as plt


In [None]:
#loading breast cancer dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

#Train-Test Split(80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
#Logistic Regression (L1 and L2)
log_reg_l1 = LogisticRegression(penalty='l1', solver='liblinear', max_iter=1000)
log_reg_l2 = LogisticRegression(penalty='l2', solver='liblinear', max_iter=1000)

log_reg_l1.fit(X_train_scaled, y_train)
log_reg_l2.fit(X_train_scaled, y_train)

#Support Vector Machines (Linear and RBF)
svc_linear = SVC(kernel='linear', probability=True)
svc_rbf = SVC(kernel='rbf', probability=True)

svc_linear.fit(X_train_scaled, y_train)
svc_rbf.fit(X_train_scaled, y_train)


In [None]:
#Predictions
y_pred_l1 = log_reg_l1.predict(X_test_scaled)
y_pred_l2 = log_reg_l2.predict(X_test_scaled)
y_pred_svm_linear = svc_linear.predict(X_test_scaled)
y_pred_svm_rbf = svc_rbf.predict(X_test_scaled)

#Evaluation 
def evaluate_model(model, X_test, y_test, y_pred):
    report = classification_report(y_test, y_pred, output_dict=True)
    auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    return {
        "Accuracy": report["accuracy"],
        "Precision": report["1"]["precision"],
        "Recall": report["1"]["recall"],
        "F1-Score": report["1"]["f1-score"],
        "ROC AUC": auc
    }

results = {
    "LogReg_L1": evaluate_model(log_reg_l1, X_test_scaled, y_test, y_pred_l1),
    "LogReg_L2": evaluate_model(log_reg_l2, X_test_scaled, y_test, y_pred_l2),
    "SVM_Linear": evaluate_model(svc_linear, X_test_scaled, y_test, y_pred_svm_linear),
    "SVM_RBF": evaluate_model(svc_rbf, X_test_scaled, y_test, y_pred_svm_rbf),
}

pd.DataFrame(results).T


In [None]:

from sklearn.metrics import ConfusionMatrixDisplay

#Ploting confusion matrices
models = {
    "Logistic Regression L1": (log_reg_l1, y_pred_l1),
    "Logistic Regression L2": (log_reg_l2, y_pred_l2),
    "SVM Linear": (svc_linear, y_pred_svm_linear),
    "SVM RBF": (svc_rbf, y_pred_svm_rbf),
}

for name, (model, y_pred) in models.items():
    disp = ConfusionMatrixDisplay.from_predictions(y_test, y_pred, display_labels=["Benign", "Malignant"])
    plt.title(f"Confusion Matrix - {name}")
    plt.grid(False)
    plt.show()


In [None]:

#L1 Coefficient Analysis
importance = np.abs(log_reg_l1.coef_[0])
features = X.columns
sorted_idx = np.argsort(importance)[::-1]

plt.figure(figsize=(10, 6))
plt.barh(range(len(sorted_idx[:10])), importance[sorted_idx[:10]], align='center')
plt.yticks(range(len(sorted_idx[:10])), features[sorted_idx[:10]])
plt.xlabel("Coefficient Magnitude")
plt.title("Top 10 Important Features (L1 Regularized Logistic Regression)")
plt.gca().invert_yaxis()
plt.show()


In [None]:
#Ploting ROC Curves
plt.figure(figsize=(10, 7))
RocCurveDisplay.from_estimator(log_reg_l1, X_test_scaled, y_test, name="LogReg L1")
RocCurveDisplay.from_estimator(log_reg_l2, X_test_scaled, y_test, name="LogReg L2")
RocCurveDisplay.from_estimator(svc_linear, X_test_scaled, y_test, name="SVM Linear")
RocCurveDisplay.from_estimator(svc_rbf, X_test_scaled, y_test, name="SVM RBF")
plt.title("ROC Curve Comparison")
plt.grid(True)
plt.show()
