# Feature extraction methods

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import warnings
warnings.filterwarnings('ignore')

## Importing the MNIST dataset

In [3]:
from keras.datasets import mnist

Using TensorFlow backend.


In [4]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [5]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [6]:
X_train = X_train[:10000, :, :].reshape(10000, 28 * 28)
y_train = y_train[:10000]
X_test = X_test[:5000, :, :].reshape(5000, 28 * 28)
y_test = y_test[:5000]

In [7]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(10000, 784)
(10000,)
(5000, 784)
(5000,)


In [8]:
X_total = np.append(X_train, X_test, axis=0)

In [9]:
X_total.shape

(15000, 784)

## Simple classifier model using Gaussian Naive Bayes

In [10]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [11]:
accuracy = classifier.score(X_test, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 53.5%


## Principal Components Analysis

In [10]:
from sklearn.decomposition import PCA

In [11]:
pca = PCA(n_components=5)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [12]:
classifier.fit(X_train_pca, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [13]:
accuracy = classifier.score(X_test_pca, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 60.8%


## Kernel Principal Components Analysis

In [26]:
from sklearn.decomposition import KernelPCA

In [27]:
k_pca = KernelPCA(n_components=5, kernel='poly')
X_train_kpca = k_pca.fit_transform(X_train)
X_test_kpca = k_pca.transform(X_test)

In [28]:
classifier.fit(X_train_kpca, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [29]:
accuracy = classifier.score(X_test_kpca, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 42.1%


## Multidimensional Scaling

In [30]:
from sklearn.manifold import MDS

In [40]:
mds = MDS(n_components=5)

In [44]:
X_total_mds = mds.fit_transform(X_total)

In [45]:
X_train_mds = X_total_mds[:X_train.shape[0], :]
X_test_mds = X_total_mds[X_train.shape[0]:, :]

In [46]:
classifier.fit(X_train_mds, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [47]:
accuracy = classifier.score(X_test_mds, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 62.96000000000001%


## Isomap

In [32]:
from sklearn.manifold import Isomap

In [33]:
isomap = Isomap(n_components=5)
X_train_isomap = isomap.fit_transform(X_train)
X_test_isomap = isomap.transform(X_test)

In [35]:
classifier.fit(X_train_isomap, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [36]:
accuracy = classifier.score(X_test_isomap, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 75.3%


## Locally Linear Embedding

In [48]:
from sklearn.manifold import LocallyLinearEmbedding

In [49]:
lle = LocallyLinearEmbedding(n_components=5)

In [50]:
X_train_lle = lle.fit_transform(X_train)
X_test_lle = lle.transform(X_test)

In [52]:
classifier.fit(X_train_lle, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [53]:
accuracy = classifier.score(X_test_lle, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 65.53999999999999%


## Laplacian Eigenmap

In [60]:
from sklearn.manifold import SpectralEmbedding

In [61]:
le = SpectralEmbedding(n_components=5)

In [62]:
X_total_le = le.fit_transform(X_total)

In [63]:
X_train_le = X_total_le[:X_train.shape[0], :]
X_test_le = X_total_le[X_train.shape[0]:, :]

In [64]:
classifier.fit(X_train_le, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [65]:
accuracy = classifier.score(X_test_le, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 60.12%


## t-distributed Stochastic Neighbor Embedding

In [54]:
from sklearn.manifold import TSNE

In [55]:
tsne = TSNE(n_components=3)

In [56]:
X_total_tsne = tsne.fit_transform(X_total)

In [57]:
X_train_tsne = X_total_tsne[: X_train.shape[0], :]
X_test_tsne = X_total_tsne[X_train.shape[0]:, :]

In [58]:
classifier.fit(X_train_tsne, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [59]:
accuracy = classifier.score(X_test_tsne, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 88.4%


## Fisher Linear Discriminant Analysis

In [67]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [68]:
lda = LinearDiscriminantAnalysis(n_components=5)

In [85]:
X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)

In [86]:
classifier.fit(X_train_lda, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [87]:
accuracy = classifier.score(X_test_lda, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 76.03999999999999%


## Kernel Linear Discriminant Analysis

## Supervised Principal Component Analysis

In [12]:
from supervised_pca import SupervisedPCA

In [89]:
spca = SupervisedPCA(n_components=5)

In [90]:
X_train_spca = spca.fit_transform(X_train, y_train)
X_test_spca = spca.transform(X_test)

In [91]:
X_train_spca.shape

(10000, 5)

In [92]:
classifier.fit(X_train_spca, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [93]:
accuracy = classifier.score(X_test_spca, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 51.38%


## Metric learning

In [94]:
from metric_learn import NCA

In [95]:
ml = NCA(n_components=5)

In [97]:
X_train_ml = ml.fit_transform(X_train, y_train)
X_test_ml = ml.transform(X_test)

In [98]:
classifier.fit(X_train_ml, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [99]:
accuracy = classifier.score(X_test_ml, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 80.58%


In [100]:
from metric_learn import LMNN

In [102]:
ml = LMNN(n_components=5)

In [103]:
X_train_ml = ml.fit_transform(X_train, y_train)
X_test_ml = ml.transform(X_test)

In [104]:
classifier.fit(X_train_ml, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [105]:
accuracy = classifier.score(X_test_ml, y_test)
print('Accuracy: {}%'.format(accuracy * 100))

Accuracy: 73.1%
