In [None]:
import joblib

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA

from sklearn.svm import SVC

from sklearn.metrics import classification_report, accuracy_score

from sklearn.datasets import load_breast_cancer


# 1. Load the dataset

cancer = load_breast_cancer(as_frame=True)

X, y = cancer.data, cancer.target


# 2. Split the data into training set and testing set

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# 3. Build a pipeline

model_pipeline = Pipeline([

    ('scaler', StandardScaler()),

    ('pca', PCA(n_components=10)),

    ('classifier', SVC(gamma='auto'))

])


# 4. Train the pipeline

model_pipeline.fit(X_train, y_train)

# 5. Evaluate the model (optional, for verification)

y_pred = model_pipeline.predict(X_test)

print("Model Accuracy:", accuracy_score(y_test, y_pred))

print("\nClassification Report:")

print(classification_report(y_test, y_pred, target_names=['malignant', 'benign']))


# 6. Save the trained pipeline

joblib.dump(model_pipeline, 'breast_cancer_model.pkl')


