## Loading libraries

In [1]:
 # Libraries
import numpy as np
import pandas as pd

from sklearn import datasets
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report
)


## Loading Dataset

In [2]:
#  Load Breast Cancer Dataset
cancer = datasets.load_breast_cancer()

X = cancer.data
y = cancer.target  # 0 = malignant, 1 = benign

df = pd.DataFrame(X, columns=cancer.feature_names)
df["target"] = y


## Explore Dataset

In [3]:
#   Explore Dataset
print("\n--- Dataset Columns ---")
print(df.columns)

print("\n--- Dataset Shape ---")
print(df.shape)

print("\n--- Dataset Head (first 5 rows) ---")
print(df.head())

print("\n--- Dataset Info ---")
print(df.info())

print("\n--- Dataset Description (summary stats) ---")
print(df.describe())



--- Dataset Columns ---
Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension',
       'target'],
      dtype='object')

--- Dataset Shape ---
(569, 31)

--- Dataset Head (first 5 rows) ---
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   

## Train-Test Split

In [4]:
#   Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.20,
    random_state=42,
    stratify=y
)

print("\nTraining samples:", X_train.shape[0])
print("Testing samples :", X_test.shape[0])



Training samples: 455
Testing samples : 114


## Pipieline,GRidsearch,SVM Model and Cross-validation

In [5]:
#   Pipeline (Scaling + SVM)
pipeline = Pipeline(steps=[
    ("scaler", StandardScaler()),
    ("svc", SVC(probability=True, random_state=42))
])


In [6]:
#   Hyperparameter Grid + Cross-Validation
param_grid = {
    "svc__kernel": ["rbf", "linear"],
    "svc__C": [0.1, 1, 10, 100],
    "svc__gamma": ["scale", 0.01, 0.1, 1]
}

cv = StratifiedKFold(
    n_splits=5,
    shuffle=True,
    random_state=42
)


## Model Evaluation Metrics

In [7]:
 # Grid Search Training
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    scoring="roc_auc",
    cv=cv,
    n_jobs=-1,
    refit=True
)

grid_search.fit(X_train, y_train)

print("\n--- Best Cross-Validation Results ---")
print("Best ROC-AUC:", round(grid_search.best_score_, 4))
print("Best Parameters:", grid_search.best_params_)



--- Best Cross-Validation Results ---
Best ROC-AUC: 0.9964
Best Parameters: {'svc__C': 10, 'svc__gamma': 0.01, 'svc__kernel': 'rbf'}


In [10]:
#  Test Evaluation + Sensitivity & Specificity
best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:, 1]

cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

# Benign as positive (class=1)
sensitivity_benign = tp / (tp + fn)
specificity_benign = tn / (tn + fp)

# Malignant as positive (class=0) - clinical view
sensitivity_malignant = tn / (tn + fp)
specificity_malignant = tp / (tp + fn)

print("\n--- Test Set Performance ---")
print("Accuracy     :", accuracy_score(y_test, y_pred))
print("Precision    :", precision_score(y_test, y_pred))
print("Recall       :", recall_score(y_test, y_pred))
print("F1-score     :", f1_score(y_test, y_pred))
print("ROC-AUC      :", roc_auc_score(y_test, y_proba))

print("\nConfusion Matrix:")
print(cm)

print("\n--- Sensitivity & Specificity ---")
print("Sensitivity (Benign as positive)    :", sensitivity_benign)
print("Specificity (Benign as positive)    :", specificity_benign)
print("Sensitivity (Malignant as positive) :", sensitivity_malignant)
print("Specificity (Malignant as positive) :", specificity_malignant)

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=cancer.target_names))



--- Test Set Performance ---
Accuracy     : 0.9824561403508771
Precision    : 0.9861111111111112
Recall       : 0.9861111111111112
F1-score     : 0.9861111111111112
ROC-AUC      : 0.9976851851851851

Confusion Matrix:
[[41  1]
 [ 1 71]]

--- Sensitivity & Specificity ---
Sensitivity (Benign as positive)    : 0.9861111111111112
Specificity (Benign as positive)    : 0.9761904761904762
Sensitivity (Malignant as positive) : 0.9761904761904762
Specificity (Malignant as positive) : 0.9861111111111112

Classification Report:
              precision    recall  f1-score   support

   malignant       0.98      0.98      0.98        42
      benign       0.99      0.99      0.99        72

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

