# Снижение размерности

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline

In [None]:
help(PCA)

## Данные на прямой
Понижаем размерность. Иногда это получается очень хорошо:

In [None]:
X = np.dot(np.random.random(size=(2, 2)), np.random.normal(size=(2, 200))).T
plt.plot(X[:, 0], X[:, 1], 'o')
plt.axis('equal');

In [None]:
pca = PCA(n_components=2)
pca.fit(X)
print(pca.explained_variance_ratio_)
print(pca.components_)

In [None]:
plt.plot(X[:, 0], X[:, 1], 'o', alpha=0.5)
for length, vector in zip(pca.explained_variance_ratio_, pca.components_):
    v = vector * 3 * np.sqrt(length)
    plt.plot([0, v[0]], [0, v[1]], '-k', lw=3)
plt.axis('equal');

Число компонент может выбираться на основе explained_variance_ratio:

In [None]:
clf = PCA(0.95) # keep 95% of variance
X_trans = clf.fit_transform(X)
print(X.shape)
print(X_trans.shape)

Сжатые данные в исходном пространстве:

In [None]:
X_new = clf.inverse_transform(X_trans)
plt.plot(X[:, 0], X[:, 1], 'o', alpha=0.2)
plt.plot(X_new[:, 0], X_new[:, 1], 'ob', alpha=0.8)
plt.axis('equal');

## Данные на торе

In [None]:
def torus(n = 300, R = 5, r = 0.2):
    phi = 2 * np.pi * np.random.rand(n)
    psi = 2 * np.pi * np.random.rand(n)
    xs = 2 * R * np.cos(phi) + r * np.cos(psi) * np.cos(phi)
    ys = R * np.sin(phi) + r * np.cos(psi) * np.sin(phi)
    zs = r * np.sin(psi)
    return np.asarray((xs, ys, zs))

fig = plt.figure(figsize = (10, 10))
ax = fig.add_subplot(111, projection = '3d')
ax.set_zlim((-2, 2))
ax.set_xlim((-15, 15))
ax.set_ylim((-15, 15))
ax.view_init(30, 40)
ps3 = torus()
ax.scatter(ps3[0], ps3[1], ps3[2], c = 'r');

 Проводим плоскость:

In [None]:
from sklearn.decomposition import PCA
X = ps.transpose()
pca = PCA(n_components=2)
pca.fit(X)

ps2 = pca.inverse_transform(pca.transform(X)).transpose()

print(pca.explained_variance_ratio_)
print(pca.components_)
plt.scatter(pca.transform(X)[:, 0], pca.transform(X)[:, 1], c = 'g');

In [None]:
from sklearn.decomposition import PCA
X = ps.transpose()
pca = PCA(n_components=1)
pca.fit(X)
ps1 = pca.inverse_transform(pca.transform(X)).transpose()
print(pca.explained_variance_ratio_)
print(pca.components_)

In [None]:
fig = plt.figure(figsize = (10, 10))
ax = fig.add_subplot(111, projection = '3d')
ax.set_zlim((-2, 2))
ax.view_init(30, 40)
ps = torus()
ax.scatter(ps[0], ps[1], ps[2], c = 'r')
ax.scatter(ps2[0], ps2[1], ps2[2], c = 'g')
ax.scatter(ps1[0], ps1[1], ps1[2], c = 'b');

Два тора?

# Лица
*Часть кода взята с githib.com/ddtm*

In [None]:
import scipy.io

image_h, image_w = 32, 32

data = scipy.io.loadmat('faces_data.mat')

X_train = data['train_faces'].reshape((image_w, image_h, -1)).transpose((2, 1, 0)).reshape((-1, image_h * image_w))
y_train = data['train_labels'] - 1
X_test = data['test_faces'].reshape((image_w, image_h, -1)).transpose((2, 1, 0)).reshape((-1, image_h * image_w))
y_test = data['test_labels'] - 1

n_features = X_train.shape[1]
n_train = len(y_train)
n_test = len(y_test)
n_classes = len(np.unique(y_train))

print('Dataset loaded.')
print('  Image size        : {}x{}'.format(image_h, image_w))
print('  Train images      : {}'.format(n_train))
print('  Test images       : {}'.format(n_test))
print('  Number of classes : {}'.format(n_classes))

In [None]:
def plot_gallery(images, titles, h, w, n_row=3, n_col=6):
    """Helper function to plot a gallery of portraits"""
    plt.figure(figsize=(1.5 * n_col, 1.7 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray, interpolation='nearest')
        plt.title(titles[i], size=12)
        plt.xticks(())
        plt.yticks(())
        
titles = [str(y[0]) for y in y_train]
plot_gallery(X_train, titles, image_h, image_w)

In [None]:
X_train_processed = (X_train - X_train.mean(axis = 1)[..., None]) / X_train.std(axis = 1)[..., None]

from sklearn.decomposition import RandomizedPCA

n_components = 64

Воспользуйтесь классом RandomizedPCA для снижения размерности картинок, с помощью функции plot_gallery визуализируйте главные компоненты

Обучите ваш любимый классификатор определению лиц, используя новое уменьшенное пространство признаков