In [1]:

#  Import Libraries
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import joblib
from sklearnex import patch_sklearn
patch_sklearn()  # Enable IntelÂ® Extension for Scikit-learn* optimizations
import warnings
warnings.filterwarnings("ignore")


Extension for Scikit-learn* enabled (https://github.com/uxlfoundation/scikit-learn-intelex)


In [2]:

#  Load Dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

print("Dataset Shape:", X.shape)
print("Target Distribution:\n", y.value_counts())


Dataset Shape: (569, 30)
Target Distribution:
 1    357
0    212
Name: count, dtype: int64


In [3]:

#  Identify Numerical & Categorical Features
# (Breast Cancer dataset contains only numerical features)
numerical_features = X.columns.tolist()

#  Preprocessing using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features)
    ]
)


In [4]:

#  Create ML Pipeline (Preprocessing + Model)
pipeline = Pipeline(steps=[
    ('preprocessing', preprocessor),
    ('model', LogisticRegression(max_iter=10000))
])


In [5]:

#  Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [6]:

#  Train Model
pipeline.fit(X_train, y_train)

#  Predictions
y_pred = pipeline.predict(X_test)


In [7]:

#  Evaluation Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("\n Evaluation Metrics")
print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)
print("F1 Score :", f1)

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))



 Evaluation Metrics
Accuracy : 0.9824561403508771
Precision: 0.9861111111111112
Recall   : 0.9861111111111112
F1 Score : 0.9861111111111112

Classification Report:

              precision    recall  f1-score   support

           0       0.98      0.98      0.98        42
           1       0.99      0.99      0.99        72

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [8]:

#  Save Pipeline Model
joblib.dump(pipeline, "breast_cancer_pipeline.pkl")

print("\n Pipeline model saved as 'breast_cancer_pipeline.pkl'")



 Pipeline model saved as 'breast_cancer_pipeline.pkl'
