# Solution: Optimal Components

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.decomposition import PCA

# 1. Data
data = load_breast_cancer()
X = data.data

# 2. PCA Full
pca = PCA()
pca.fit(X)

cumsum = np.cumsum(pca.explained_variance_ratio_)

# 3. Plot
plt.figure(figsize=(10, 5))
plt.plot(cumsum, marker='o', linestyle='--')
plt.axhline(y=0.95, color='r', linestyle='-')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.grid()
plt.show()

# 4. Find N
n_components = np.argmax(cumsum >= 0.95) + 1
print(f"Components needed for 95% variance: {n_components}")