# Chapter 8 - Dimensionality Reduction

This notebook covers dimensionality reduction techniques, including:
- The Curse of Dimensionality
- Main Approaches for Dimensionality Reduction
- Principal Component Analysis (PCA)
- Kernel PCA
- Locally Linear Embedding (LLE)
- Other Dimensionality Reduction Techniques

## Setup

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "dim_reduction"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

## The Curse of Dimensionality

In [None]:
# Demonstrate the curse of dimensionality
import numpy as np
import matplotlib.pyplot as plt

# Generate random points in different dimensions
np.random.seed(42)
m = 1000
dimensions = [1, 2, 3, 5, 10, 20, 50, 100]
distances = []

for d in dimensions:
    # Generate random points in d-dimensional space
    X = np.random.rand(m, d)
    
    # Calculate distances from first point to all others
    dists = np.sqrt(np.sum((X - X[0])**2, axis=1))
    distances.append(dists[1:])

# Plot the distribution of distances
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
axes = axes.ravel()

for i, (d, dist) in enumerate(zip(dimensions, distances)):
    axes[i].hist(dist, bins=50, alpha=0.7, density=True)
    axes[i].set_title(f'{d}D space')
    axes[i].set_xlabel('Distance')
    axes[i].set_ylabel('Density')
    
plt.suptitle('Curse of Dimensionality: Distance Distributions', fontsize=16)
plt.tight_layout()
save_fig("curse_of_dimensionality")
plt.show()

# Calculate statistics
print("Dimension | Mean Distance | Std Distance | Min/Max Ratio")
print("-" * 60)
for d, dist in zip(dimensions, distances):
    mean_dist = np.mean(dist)
    std_dist = np.std(dist)
    min_max_ratio = np.min(dist) / np.max(dist)
    print(f"{d:9d} | {mean_dist:13.3f} | {std_dist:12.3f} | {min_max_ratio:13.3f}")

## Main Approaches for Dimensionality Reduction

### Projection

In [None]:
# Create a 3D dataset that lies close to a 2D plane
np.random.seed(4)
m = 60
w1, w2 = 0.1, 0.3
noise = 0.1

angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5
X = np.empty((m, 3))
X[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2
X[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2
X[:, 2] = X[:, 0] * w1 + X[:, 1] * w2 + noise * np.random.randn(m)

# Visualize the 3D data
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(12, 4))

# 3D plot
ax1 = fig.add_subplot(131, projection='3d')
ax1.scatter(X[:, 0], X[:, 1], X[:, 2], c=angles, cmap=plt.cm.hot)
ax1.set_xlabel('$x_1$')
ax1.set_ylabel('$x_2$')
ax1.set_zlabel('$x_3$')
ax1.set_title('3D Dataset')

# Projection onto x1-x2 plane
ax2 = fig.add_subplot(132)
ax2.scatter(X[:, 0], X[:, 1], c=angles, cmap=plt.cm.hot)
ax2.set_xlabel('$x_1$')
ax2.set_ylabel('$x_2$')
ax2.set_title('Projection onto $x_1$-$x_2$ plane')

# Projection onto x1-x3 plane
ax3 = fig.add_subplot(133)
ax3.scatter(X[:, 0], X[:, 2], c=angles, cmap=plt.cm.hot)
ax3.set_xlabel('$x_1$')
ax3.set_ylabel('$x_3$')
ax3.set_title('Projection onto $x_1$-$x_3$ plane')

save_fig("projection_example")
plt.show()

### Manifold Learning

In [None]:
# Swiss roll dataset - a classic manifold learning example
from sklearn.datasets import make_swiss_roll

X_swiss, color = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)

fig = plt.figure(figsize=(12, 4))

# 3D Swiss roll
ax1 = fig.add_subplot(131, projection='3d')
ax1.scatter(X_swiss[:, 0], X_swiss[:, 1], X_swiss[:, 2], c=color, cmap=plt.cm.hot)
ax1.set_xlabel('$x_1$')
ax1.set_ylabel('$x_2$')
ax1.set_zlabel('$x_3$')
ax1.set_title('Swiss Roll (3D)')

# Projection onto x1-x2 plane (bad)
ax2 = fig.add_subplot(132)
ax2.scatter(X_swiss[:, 0], X_swiss[:, 1], c=color, cmap=plt.cm.hot)
ax2.set_xlabel('$x_1$')
ax2.set_ylabel('$x_2$')
ax2.set_title('Bad Projection')

# Projection onto x1-x3 plane (better)
ax3 = fig.add_subplot(133)
ax3.scatter(X_swiss[:, 0], X_swiss[:, 2], c=color, cmap=plt.cm.hot)
ax3.set_xlabel('$x_1$')
ax3.set_ylabel('$x_3$')
ax3.set_title('Better Projection')

save_fig("swiss_roll_projections")
plt.show()

## Principal Component Analysis (PCA)

### Preserving the Variance

In [None]:
# Generate 2D data
np.random.seed(4)
m = 60
X_2d = np.random.randn(m, 2)
X_2d = X_2d.dot(np.array([[1.5, 0], [0, 0.5]]))
X_2d = X_2d.dot(np.array([[np.cos(np.pi/6), -np.sin(np.pi/6)], 
                          [np.sin(np.pi/6), np.cos(np.pi/6)]]))

# Perform PCA manually
X_centered = X_2d - X_2d.mean(axis=0)
U, s, Vt = np.linalg.svd(X_centered)
c1 = Vt.T[:, 0]
c2 = Vt.T[:, 1]

# Project data onto principal components
W2 = Vt.T[:, :2]
X2D_pca = X_centered.dot(W2)

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Original data with principal components
axes[0].scatter(X_2d[:, 0], X_2d[:, 1], alpha=0.7)
axes[0].arrow(X_2d.mean(0)[0], X_2d.mean(0)[1], c1[0]*3, c1[1]*3, 
              head_width=0.1, head_length=0.1, fc='red', ec='red')
axes[0].arrow(X_2d.mean(0)[0], X_2d.mean(0)[1], c2[0]*3, c2[1]*3, 
              head_width=0.1, head_length=0.1, fc='blue', ec='blue')
axes[0].set_xlabel('$x_1$')
axes[0].set_ylabel('$x_2$')
axes[0].set_title('Original Data with Principal Components')
axes[0].grid(True)

# Projected data
axes[1].scatter(X2D_pca[:, 0], X2D_pca[:, 1], alpha=0.7)
axes[1].set_xlabel('PC1')
axes[1].set_ylabel('PC2')
axes[1].set_title('Data Projected onto Principal Components')
axes[1].grid(True)

save_fig("pca_example")
plt.show()

print(f"Explained variance ratio: {s**2 / np.sum(s**2)}")

### Principal Components

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X2D = pca.fit_transform(X_2d)

print(f"Principal components:\n{pca.components_}")
print(f"Explained variance ratio: {pca.explained_variance_ratio_}")
print(f"Singular values: {pca.singular_values_}")

### Projecting Down to d Dimensions

In [None]:
pca = PCA(n_components=1)
X1D = pca.fit_transform(X_2d)
print(f"1D projection shape: {X1D.shape}")
print(f"Explained variance ratio: {pca.explained_variance_ratio_}")

# Reconstruct the data
X_recovered = pca.inverse_transform(X1D)

# Visualize projection and reconstruction
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Original data
axes[0].scatter(X_2d[:, 0], X_2d[:, 1], alpha=0.7, label='Original')
axes[0].set_xlabel('$x_1$')
axes[0].set_ylabel('$x_2$')
axes[0].set_title('Original 2D Data')
axes[0].grid(True)

# 1D projection
axes[1].scatter(X1D[:, 0], np.zeros_like(X1D[:, 0]), alpha=0.7)
axes[1].set_xlabel('PC1')
axes[1].set_ylabel('')
axes[1].set_title('1D Projection')
axes[1].grid(True)

# Reconstructed data
axes[2].scatter(X_2d[:, 0], X_2d[:, 1], alpha=0.7, label='Original')
axes[2].scatter(X_recovered[:, 0], X_recovered[:, 1], alpha=0.7, 
               marker='x', s=50, label='Reconstructed')
for i in range(len(X_2d)):
    axes[2].plot([X_2d[i, 0], X_recovered[i, 0]], 
                [X_2d[i, 1], X_recovered[i, 1]], 'k--', alpha=0.3)
axes[2].set_xlabel('$x_1$')
axes[2].set_ylabel('$x_2$')
axes[2].set_title('Original vs Reconstructed')
axes[2].legend()
axes[2].grid(True)

save_fig("pca_projection_reconstruction")
plt.show()

### Using Scikit-Learn

In [None]:
from sklearn.datasets import fetch_openml

# Load MNIST dataset
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
mnist.target = mnist.target.astype(np.uint8)

X = mnist["data"]
y = mnist["target"]

print(f"MNIST shape: {X.shape}")

# Apply PCA
pca = PCA()
pca.fit(X)

# Plot explained variance
cumsum = np.cumsum(pca.explained_variance_ratio_)

plt.figure(figsize=(12, 4))

plt.subplot(121)
plt.plot(pca.explained_variance_ratio_[:50])
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.title('Explained Variance per Component')
plt.grid(True)

plt.subplot(122)
plt.plot(cumsum[:200])
plt.axhline(y=0.95, color='r', linestyle='--', label='95% variance')
plt.axhline(y=0.99, color='g', linestyle='--', label='99% variance')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Cumulative Explained Variance')
plt.legend()
plt.grid(True)

save_fig("mnist_pca_variance")
plt.show()

# Find number of components for different variance levels
d = np.argmax(cumsum >= 0.95) + 1
print(f"Components needed for 95% variance: {d}")
d99 = np.argmax(cumsum >= 0.99) + 1
print(f"Components needed for 99% variance: {d99}")

### Choosing the Right Number of Dimensions

In [None]:
# PCA with 95% variance preserved
pca = PCA(n_components=0.95)
X_reduced = pca.fit_transform(X)
print(f"Reduced dimensions: {X_reduced.shape[1]}")
print(f"Compression ratio: {X.shape[1] / X_reduced.shape[1]:.1f}")

# Reconstruct images and visualize
X_recovered = pca.inverse_transform(X_reduced)

def plot_digits(instances, images_per_row=10, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.extend([np.zeros((size, size))] * n_empty)
    for row in range(n_rows):
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    plt.imshow(image, cmap = mpl.cm.binary, **options)
    plt.axis("off")

# Display original vs reconstructed images
fig, axes = plt.subplots(2, 1, figsize=(12, 6))

plt.subplot(211)
plot_digits(X[:10])
plt.title("Original Images")

plt.subplot(212)
plot_digits(X_recovered[:10])
plt.title(f"Reconstructed Images ({pca.n_components_} components)")

save_fig("mnist_pca_reconstruction")
plt.show()

### PCA for Compression

In [None]:
# Compare different compression levels
components = [2, 10, 50, 154, 784]
fig, axes = plt.subplots(1, len(components), figsize=(15, 3))

# Use subset of data for faster computation
X_sample = X[:1000]

for i, n_comp in enumerate(components):
    if n_comp == 784:
        # Original image
        X_rec = X_sample
        title = "Original"
    else:
        pca = PCA(n_components=n_comp)
        X_reduced = pca.fit_transform(X_sample)
        X_rec = pca.inverse_transform(X_reduced)
        compression_ratio = X_sample.shape[1] / n_comp
        title = f"{n_comp} components\n({compression_ratio:.1f}x compression)"
    
    plt.subplot(1, len(components), i + 1)
    plot_digits(X_rec[0:1])
    plt.title(title, fontsize=10)

save_fig("pca_compression_comparison")
plt.show()

### Randomized PCA

In [None]:
import time

# Compare regular PCA vs randomized PCA
X_sample = X[:1000]  # Use smaller sample for timing

# Regular PCA
start_time = time.time()
pca_regular = PCA(n_components=154, svd_solver="full")
X_reduced_regular = pca_regular.fit_transform(X_sample)
regular_time = time.time() - start_time

# Randomized PCA
start_time = time.time()
pca_randomized = PCA(n_components=154, svd_solver="randomized")
X_reduced_randomized = pca_randomized.fit_transform(X_sample)
randomized_time = time.time() - start_time

print(f"Regular PCA time: {regular_time:.3f} seconds")
print(f"Randomized PCA time: {randomized_time:.3f} seconds")
print(f"Speedup: {regular_time/randomized_time:.1f}x")

# Check if results are similar
np.allclose(X_reduced_regular, X_reduced_randomized)
print(f"Results are approximately equal: {np.allclose(X_reduced_regular, X_reduced_randomized)}")

### Incremental PCA

In [None]:
from sklearn.decomposition import IncrementalPCA

# For large datasets that don't fit in memory
n_batches = 100
inc_pca = IncrementalPCA(n_components=154)

# Simulate batch processing
X_sample = X[:1000]  # Use sample for demonstration
batch_size = len(X_sample) // n_batches

for i in range(n_batches):
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, len(X_sample))
    inc_pca.partial_fit(X_sample[start_idx:end_idx])

X_reduced_inc = inc_pca.transform(X_sample)
X_recovered_inc = inc_pca.inverse_transform(X_reduced_inc)

print(f"Incremental PCA shape: {X_reduced_inc.shape}")
print(f"Explained variance ratio sum: {inc_pca.explained_variance_ratio_.sum():.3f}")

# Compare with regular PCA
pca_regular = PCA(n_components=154)
X_reduced_reg = pca_regular.fit_transform(X_sample)
X_recovered_reg = pca_regular.inverse_transform(X_reduced_reg)

# Visualize comparison
fig, axes = plt.subplots(3, 1, figsize=(12, 9))

plt.subplot(311)
plot_digits(X_sample[:5])
plt.title("Original")

plt.subplot(312)
plot_digits(X_recovered_reg[:5])
plt.title("Regular PCA")

plt.subplot(313)
plot_digits(X_recovered_inc[:5])
plt.title("Incremental PCA")

save_fig("incremental_pca_comparison")
plt.show()

## Kernel PCA

In [None]:
# Create nonlinear dataset
from sklearn.datasets import make_swiss_roll
from sklearn.decomposition import KernelPCA

X_swiss, color = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)

# Apply different kernels
kernels = ['linear', 'rbf', 'sigmoid']
gammas = [None, 0.04, 1]

fig, axes = plt.subplots(2, 4, figsize=(16, 8))

# Original data
ax = fig.add_subplot(2, 4, 1, projection='3d')
ax.scatter(X_swiss[:, 0], X_swiss[:, 1], X_swiss[:, 2], c=color, cmap=plt.cm.hot)
ax.set_title('Original Swiss Roll')
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.set_zlabel('$x_3$')

# Regular PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_swiss)
axes[0, 1].scatter(X_pca[:, 0], X_pca[:, 1], c=color, cmap=plt.cm.hot)
axes[0, 1].set_title('Linear PCA')
axes[0, 1].set_xlabel('PC1')
axes[0, 1].set_ylabel('PC2')

# Kernel PCA with different kernels
for i, (kernel, gamma) in enumerate(zip(kernels, gammas)):
    kpca = KernelPCA(n_components=2, kernel=kernel, gamma=gamma, random_state=42)
    X_kpca = kpca.fit_transform(X_swiss)
    
    row = i // 2
    col = (i % 2) + 2
    axes[row, col].scatter(X_kpca[:, 0], X_kpca[:, 1], c=color, cmap=plt.cm.hot)
    axes[row, col].set_title(f'Kernel PCA ({kernel})')
    axes[row, col].set_xlabel('PC1')
    axes[row, col].set_ylabel('PC2')

# Hide unused subplot
axes[1, 3].axis('off')

plt.tight_layout()
save_fig("kernel_pca_comparison")
plt.show()

### Selecting a Kernel and Tuning Hyperparameters

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

# Create a classification dataset
from sklearn.datasets import make_moons
X_moons, y_moons = make_moons(n_samples=100, noise=0.15, random_state=42)

# Pipeline with Kernel PCA and Logistic Regression
clf = Pipeline([
        ("kpca", KernelPCA(n_components=2)),
        ("log_reg", LogisticRegression(solver="lbfgs"))
    ])

param_grid = [{
        "kpca__gamma": np.linspace(0.03, 0.05, 10),
        "kpca__kernel": ["rbf", "sigmoid"]
    }]

grid_search = GridSearchCV(clf, param_grid, cv=3)
grid_search.fit(X_moons, y_moons)

print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.3f}")

## Locally Linear Embedding (LLE)

In [None]:
from sklearn.manifold import LocallyLinearEmbedding

# Apply LLE to Swiss Roll
lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10, random_state=42)
X_lle = lle.fit_transform(X_swiss)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Original Swiss Roll
ax = fig.add_subplot(1, 3, 1, projection='3d')
ax.scatter(X_swiss[:, 0], X_swiss[:, 1], X_swiss[:, 2], c=color, cmap=plt.cm.hot)
ax.set_title('Original Swiss Roll')
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.set_zlabel('$x_3$')

# Linear PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_swiss)
axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=color, cmap=plt.cm.hot)
axes[1].set_title('Linear PCA')
axes[1].set_xlabel('PC1')
axes[1].set_ylabel('PC2')

# LLE
axes[2].scatter(X_lle[:, 0], X_lle[:, 1], c=color, cmap=plt.cm.hot)
axes[2].set_title('LLE')
axes[2].set_xlabel('Dimension 1')
axes[2].set_ylabel('Dimension 2')

save_fig("lle_comparison")
plt.show()

## Other Dimensionality Reduction Techniques

In [None]:
from sklearn.manifold import MDS, TSNE, Isomap
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Use a smaller subset for faster computation
X_subset = X_swiss[::2]  # Every other point
color_subset = color[::2]

# Apply different techniques
techniques = {
    'PCA': PCA(n_components=2),
    'LLE': LocallyLinearEmbedding(n_components=2, n_neighbors=10),
    'MDS': MDS(n_components=2, random_state=42),
    'Isomap': Isomap(n_components=2),
    't-SNE': TSNE(n_components=2, random_state=42, perplexity=30)
}

fig, axes = plt.subplots(2, 3, figsize=(15, 10))

# Original data
ax = fig.add_subplot(2, 3, 1, projection='3d')
ax.scatter(X_subset[:, 0], X_subset[:, 1], X_subset[:, 2], c=color_subset, cmap=plt.cm.hot)
ax.set_title('Original Swiss Roll')
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.set_zlabel('$x_3$')

# Apply each technique
for i, (name, technique) in enumerate(techniques.items()):
    row = (i + 1) // 3
    col = (i + 1) % 3
    
    try:
        X_transformed = technique.fit_transform(X_subset)
        axes[row, col].scatter(X_transformed[:, 0], X_transformed[:, 1], 
                              c=color_subset, cmap=plt.cm.hot)
        axes[row, col].set_title(name)
        axes[row, col].set_xlabel('Dimension 1')
        axes[row, col].set_ylabel('Dimension 2')
    except Exception as e:
        axes[row, col].text(0.5, 0.5, f'Error:\n{str(e)[:50]}...', 
                           ha='center', va='center', transform=axes[row, col].transAxes)
        axes[row, col].set_title(f'{name} (Error)')

plt.tight_layout()
save_fig("dimensionality_reduction_comparison")
plt.show()

## Performance Comparison on MNIST

In [None]:
# Performance comparison on MNIST subset
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import time

# Use smaller subset for comparison
X_mnist_subset = X[:2000]
y_mnist_subset = y[:2000]

# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_mnist_subset, y_mnist_subset, test_size=0.2, random_state=42)

# Baseline: No dimensionality reduction
start_time = time.time()
clf_baseline = RandomForestClassifier(n_estimators=100, random_state=42)
clf_baseline.fit(X_train, y_train)
y_pred_baseline = clf_baseline.predict(X_test)
baseline_time = time.time() - start_time
baseline_accuracy = accuracy_score(y_test, y_pred_baseline)

# PCA + Random Forest
start_time = time.time()
pca = PCA(n_components=0.95)  # Keep 95% of variance
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
clf_pca = RandomForestClassifier(n_estimators=100, random_state=42)
clf_pca.fit(X_train_pca, y_train)
y_pred_pca = clf_pca.predict(X_test_pca)
pca_time = time.time() - start_time
pca_accuracy = accuracy_score(y_test, y_pred_pca)

# Results
print("Performance Comparison:")
print("-" * 50)
print(f"Baseline (784 features):")
print(f"  Accuracy: {baseline_accuracy:.3f}")
print(f"  Time: {baseline_time:.2f} seconds")
print(f"\nPCA ({pca.n_components_} features):")
print(f"  Accuracy: {pca_accuracy:.3f}")
print(f"  Time: {pca_time:.2f} seconds")
print(f"  Speedup: {baseline_time/pca_time:.1f}x")
print(f"  Dimensionality reduction: {784/pca.n_components_:.1f}x")

# Visualize results
methods = ['Baseline\n(784 features)', f'PCA\n({pca.n_components_} features)']
accuracies = [baseline_accuracy, pca_accuracy]
times = [baseline_time, pca_time]

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].bar(methods, accuracies, color=['skyblue', 'lightgreen'])
axes[0].set_ylabel('Accuracy')
axes[0].set_title('Classification Accuracy')
axes[0].set_ylim([0.8, 1.0])

axes[1].bar(methods, times, color=['skyblue', 'lightgreen'])
axes[1].set_ylabel('Time (seconds)')
axes[1].set_title('Training + Prediction Time')

save_fig("pca_performance_comparison")
plt.show()

## Summary

In this chapter, we covered dimensionality reduction techniques:

### **Key Concepts:**

1. **Curse of Dimensionality**
   - Points become equidistant in high dimensions
   - Data becomes sparse
   - Computational complexity increases

2. **Main Approaches**
   - **Projection**: Project onto lower-dimensional space
   - **Manifold Learning**: Unfold complex structures

### **Techniques Covered:**

1. **Principal Component Analysis (PCA)**
   - Finds directions of maximum variance
   - Linear technique, fast and simple
   - Good for compression and noise reduction
   - Variants: Randomized, Incremental PCA

2. **Kernel PCA**
   - Nonlinear extension of PCA
   - Uses kernel trick for complex manifolds
   - Good for nonlinear dimensionality reduction

3. **Locally Linear Embedding (LLE)**
   - Preserves local relationships
   - Good for unfolding manifolds
   - Nonlinear technique

4. **Other Techniques**
   - **MDS**: Preserves distances
   - **Isomap**: Geodesic distances
   - **t-SNE**: Great for visualization

### **When to Use:**

- **PCA**: First choice, data compression, noise reduction
- **Kernel PCA**: Nonlinear relationships suspected
- **LLE**: Manifold structure, local relationships important
- **t-SNE**: Data visualization, clustering analysis

### **Benefits:**

- Faster training and prediction
- Reduced storage requirements
- Noise reduction
- Visualization of high-dimensional data
- Avoiding curse of dimensionality

### **Best Practices:**

- Always scale data before applying PCA
- Choose components to retain 95-99% variance
- Use cross-validation for hyperparameter tuning
- Consider computational cost vs. performance gain
- Validate that reduced dimensions preserve important information