In [1]:
# Import necessary libraries
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

# Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize the SVM model
svm = SVC(kernel='rbf', C=1.0)

# 1. Evaluate SVM on original data
original_scores = cross_val_score(svm, X_scaled, y, cv=10)
print("Original Data Accuracy:", np.mean(original_scores))

# 2. Apply PCA and transform data with all components
pca_all = PCA()
X_pca_all = pca_all.fit_transform(X_scaled)
pca_all_scores = cross_val_score(svm, X_pca_all, y, cv=10)
print("PCA (All Components) Data Accuracy:", np.mean(pca_all_scores))

# 3. Apply PCA and transform data with 4 principal components
pca_4 = PCA(n_components=4)
X_pca_4 = pca_4.fit_transform(X_scaled)
pca_4_scores = cross_val_score(svm, X_pca_4, y, cv=10)
print("PCA (4 Components) Data Accuracy:", np.mean(pca_4_scores))

# Compare results
print("\nComparison of Accuracies:")
print(f"Original Data Accuracy: {np.mean(original_scores):.4f}")
print(f"PCA (All Components) Data Accuracy: {np.mean(pca_all_scores):.4f}")
print(f"PCA (4 Components) Data Accuracy: {np.mean(pca_4_scores):.4f}")


Original Data Accuracy: 0.9770989974937342
PCA (All Components) Data Accuracy: 0.9770989974937342
PCA (4 Components) Data Accuracy: 0.9578320802005011

Comparison of Accuracies:
Original Data Accuracy: 0.9771
PCA (All Components) Data Accuracy: 0.9771
PCA (4 Components) Data Accuracy: 0.9578


In [2]:
X_pca_4.shape

(569, 4)

In [3]:
X_pca_all.shape

(569, 30)

In [4]:
X.shape

(569, 30)