In [1]:
# 9.1 Reducing Features Using Principal Components

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets
# load digits dataset
digits = datasets.load_digits()

features = StandardScaler().fit_transform(digits.data)
pca = PCA(n_components=0.99, whiten=True)

features_pca = pca.fit_transform(features)
print("Original number of features:", features.shape[1])
print("Reduced number of features:", features_pca.shape[1])

Original number of features: 64
Reduced number of features: 54


In [5]:
# 9.2 Reducing Features When Data Is Linearly Inseparable

# Load libraries
from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles
# Create linearly inseparable data
features, _ = make_circles(n_samples=1000, random_state=1, noise=0.1, factor=0.1)
# Apply kernal PCA with radius basis function (RBF) kernel
kpca = KernelPCA(kernel="rbf", gamma=15, n_components=1)
features_kpca = kpca.fit_transform(features)
print("Original number of features:", features.shape[1])
print("Reduced number of features:", features_kpca.shape[1])

Original number of features: 2
Reduced number of features: 1


In [9]:
# 9.3 Reducing Features by Maximizing Class Separability

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# Load Iris flower dataset:
iris = datasets.load_iris()
features = iris.data
target = iris.target
# Create and run an LDA, then use it to transform the features
lda = LinearDiscriminantAnalysis(n_components=1)
features_lda = lda.fit(features, target).transform(features)
# Print the number of features
print("Original number of features:", features.shape[1])
print("Reduced number of features:", features_lda.shape[1])

Original number of features: 4
Reduced number of features: 1


In [14]:
# 9.4 Reducing Features Using Matrix

from sklearn.decomposition import NMF
digits = datasets.load_digits()
features = digits.data
nmf = NMF(n_components=10, random_state=1)
nmf_features = nmf.fit_transform(digits.data)
print("Original number of features:", features.shape[1])
print("Reduced number of features:", nmf_features.shape[1])

Original number of features: 64
Reduced number of features: 10




In [15]:
# 9.5 Reducing Features on Sparse Data

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
import numpy as np


# Standardize feature matrix
features = StandardScaler().fit_transform(digits.data)
# Make sparse matrix
features_sparse = csr_matrix(features)
# Create a TSVD
tsvd = TruncatedSVD(n_components=10)
# Conduct TSVD on sparse matrix
features_sparse_tsvd = tsvd.fit(features_sparse).transform(features_sparse)
# Show results
print("Original number of features:", features_sparse.shape[1])
print("Reduced number of features:", features_sparse_tsvd.shape[1])

Original number of features: 64
Reduced number of features: 10
