In [1]:
from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import svd
from sklearn.preprocessing import StandardScaler


digits = datasets.load_digits()

In [36]:
digits.data.shape

(1797, 64)

In [46]:
def drawDigitsData(x, y, z, angle=0):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(projection='3d')
    scale = 20
    fontsize = 16
    color = [
        '#1f77b4', '#ff7f0e', '#2ca02c', 
        '#d62728', '#9467bd', '#8c564b', 
        '#e377c2', '#7f7f7f', '#bcbd22',
        '#17becf'
    ]
    for digit in range(10):
        ax.scatter(
            x[digits.target == digit], 
            y[digits.target == digit], 
            z[digits.target == digit],
            c=color[digit], s=scale, alpha=0.8, label=digit
        )

    fig.suptitle("Digits' distribution", fontsize=fontsize + 2, fontweight='bold')
    # ax.set_xlabel("Значение по первому признаку", fontsize=fontsize)
    # ax.set_ylabel("Значение по второму признаку", fontsize=fontsize)
    ax.legend(bbox_to_anchor=(0., 1.04, 1., .102), loc='lower left', ncol=10, mode="expand", borderaxespad=0.)
    ax.view_init(-140, angle)

In [47]:
def into_gif(x, y, z, name):
    from tqdm import tqdm

    params = np.linspace(0, 360, 360)
    # params = np.arange(100, 1000)
    for i, t in tqdm(enumerate(params)):
        drawDigitsData(x, y, z, angle=t)
        plt.savefig(f'./img/img_{i}.png', 
                    transparent = False,  
        )
        plt.close()

    import imageio
    frames = []
    for i, t in tqdm(enumerate(params)):
        image = imageio.v2.imread(f'./img/img_{i}.png')
        frames.append(image)

    imageio.mimsave(f'./{name}',
                    frames, 
                    fps = 30)

In [48]:
data = StandardScaler().fit_transform(digits.data)

In [49]:
(data.mean(axis=0)**2).sum() # check for mean = 0

8.33955492765316e-30

In [50]:
data.var(axis=0) # check for std = 1 (almost)

array([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
       1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

## SVD version

In [51]:
U, S, VT = svd(data, full_matrices=False)

w1, w2, w3 = VT[0,:].reshape(-1, 1), VT[1,:].reshape(-1, 1), VT[2, :].reshape(-1, 1)
x, y, z = data @ w1, data @ w2, data @ w3

In [52]:
into_gif(x, y, z, 'svd.gif')

360it [01:01,  5.86it/s]
360it [00:05, 64.50it/s]


## TSNE version

In [53]:
from sklearn.manifold import TSNE


X_embedded = TSNE(n_components=3, learning_rate='auto',
                   init='random', perplexity=3).fit_transform(data)

In [54]:
into_gif(X_embedded[:, 0], X_embedded[:, 1], X_embedded[:, 2], 'tsne.gif')

360it [01:02,  5.78it/s]
360it [00:05, 65.09it/s]
