# Dimensionally Reduction Using Feature Extraction

In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets

  return f(*args, **kwds)


## Reducing Features Using Principal Components

In [2]:
digits = datasets.load_digits()
features = StandardScaler().fit_transform(digits.data)
pca = PCA(n_components=0.99, whiten=True)
feature_pca = pca.fit_transform(features)
features.shape[1], feature_pca.shape[1]

(64, 54)

## Reducing Features When Data is Linearly Inseparable 

In [5]:
from sklearn.decomposition import KernelPCA
from sklearn.datasets import make_circles

In [6]:
features,_ = make_circles(n_samples=1000, random_state=1,noise=0.1,factor=0.1)
kpca = KernelPCA(kernel="rbf", gamma=15, n_components=1)
feature_kpca = kpca.fit_transform(features)
features.shape[1], feature_kpca.shape[1]

(2, 1)

## Reducing Features by Maximizing Class Separability

In [7]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [8]:
iris = datasets.load_iris()
features = iris.data
target = iris.target
lda = LinearDiscriminantAnalysis(n_components=1)
features_lda = lda.fit(features, target).transform(features)
features.shape[1], features_lda.shape[1]

(4, 1)

In [9]:
lda.explained_variance_ratio_

array([0.9912126])

In [11]:
lda = LinearDiscriminantAnalysis(n_components=None)
features_lda = lda.fit(features, target)
lda_var_ratios = lda.explained_variance_ratio_
def select_n_components(var_ratio, goal_var: float) -> int:
    total_variance =0.0
    n_components = 0
    for explained_variance in var_ratio:
        total_variance += explained_variance
        n_components += 1
        if total_variance >= goal_var:
            break
    return n_components

select_n_components(lda_var_ratios, 0.95)

1

## Reducing Features Using Matrix Factorization

In [14]:
from sklearn.decomposition import NMF
features = digits.data
nmf = NMF(n_components=10, random_state=1)
feature_nmf = nmf.fit_transform(features)
features.shape[1], feature_nmf.shape[1]

(64, 10)

## Reducing Features on Sparse Data

In [15]:
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
import numpy as np

In [16]:
features = StandardScaler().fit_transform(digits.data)
features_sparse = csr_matrix(features)
tsvd = TruncatedSVD(n_components=10)
features_sparse_tsvd = tsvd.fit(features_sparse).transform(features_sparse)
features_sparse.shape[1], features_sparse_tsvd.shape[1]

(64, 10)

In [17]:
select_n_components(tsvd.explained_variance_ratio_, 0.95)

10

In [18]:
tsvd.explained_variance_ratio_[0:3].sum()

0.3003938538257617