## Practical Exercises on Dimensionality Reduction

### Exercise 1: PCA with Logistic Regression
Compare classification performance with and without PCA

In [None]:
# Load libraries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    digits.data, digits.target, test_size=0.2, random_state=2024
)

# Pipeline without PCA
pipeline_no_pca = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression(max_iter=1000))
])

# Pipeline with PCA
pipeline_with_pca = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=0.95)),  # Keep 95% of variance
    ('clf', LogisticRegression(max_iter=1000))
])

# Fit and evaluate both pipelines
pipeline_no_pca.fit(X_train, y_train)
pipeline_with_pca.fit(X_train, y_train)

# Print results
print("Accuracy without PCA:", 
      accuracy_score(y_test, pipeline_no_pca.predict(X_test)))
print("Accuracy with PCA:", 
      accuracy_score(y_test, pipeline_with_pca.predict(X_test)))

### Exercise 2: t-SNE for Clustering Visualization
Visualize how well t-SNE preserves cluster structure

In [None]:
# Load libraries
from sklearn.cluster import KMeans

# Apply K-means clustering
kmeans = KMeans(n_clusters=10, random_state=2024)
cluster_labels = kmeans.fit_predict(digits.data)

# Create side-by-side plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Plot using true labels
scatter1 = ax1.scatter(X_tsne[:, 0], X_tsne[:, 1], 
                      c=digits.target, cmap='viridis')
ax1.set_title('t-SNE with True Labels')
legend1 = ax1.legend(*scatter1.legend_elements(),
                    title="Digit Classes",
                    loc="center left",
                    bbox_to_anchor=(1, 0.5))

# Plot using cluster labels
scatter2 = ax2.scatter(X_tsne[:, 0], X_tsne[:, 1], 
                      c=cluster_labels, cmap='viridis')
ax2.set_title('t-SNE with K-means Clusters')
legend2 = ax2.legend(*scatter2.legend_elements(),
                    title="Clusters",
                    loc="center left", 
                    bbox_to_anchor=(1, 0.5))

plt.tight_layout()
plt.show()