In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)
X_sampled, _, y_sampled, _ = train_test_split(X, y, train_size=0.5, stratify=y, random_state=42)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_sampled)
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X_scaled)
X_train, X_test, y_train, y_test = train_test_split(X_pca, y_sampled, test_size=0.2, random_state=42)
param_grid = {
    'svm__C': [1, 10],
    'svm__gamma': ['scale', 0.01]
}
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVC())
])
grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)
print("Best Parameters from GridSearchCV:", grid_search.best_params_)
print("Best Cross-Validation Score:", grid_search.best_score_)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("\nTest Set Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Best Parameters from GridSearchCV: {'svm__C': 10, 'svm__gamma': 0.01}
Best Cross-Validation Score: 0.9649122807017544

Test Set Accuracy: 0.9824561403508771

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.95      0.98        22
           1       0.97      1.00      0.99        35

    accuracy                           0.98        57
   macro avg       0.99      0.98      0.98        57
weighted avg       0.98      0.98      0.98        57


Confusion Matrix:
 [[21  1]
 [ 0 35]]
