# 3-D t-Distributed Stochastic Neighbor Embedding

Unsupervised learning; tSNE doesn't receive labels yet forms clusters from high dimensional patterns.

In [1]:
import sklearn
from sklearn.manifold import TSNE
from sklearn import datasets
from k3d import plot, points, nice_colors, text2d
import numpy as np

In [2]:
digits = datasets.load_digits()

In [3]:
tsne = TSNE(n_components=3, verbose=1, perplexity=40, n_iter=1000)

In [4]:
tsne_results = tsne.fit_transform(digits.data).astype(np.float32)

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1797 samples in 0.036s...
[t-SNE] Computed neighbors for 1797 samples in 0.665s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1797
[t-SNE] Computed conditional probabilities for sample 1797 / 1797
[t-SNE] Mean sigma: 8.394135
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.873466
[t-SNE] KL divergence after 1000 iterations: 0.582597


In [5]:
def legend(p, axes=None):
    """Display classes' names in their color."""
#     k3dplot = plot(axes=['\:'.join(a.split(' ')) for a in axes.tolist()])
    k3dplot = plot()
    k3dplot += p
    for i, name in enumerate(digits.target_names):
        k3dplot += text2d(text=str(name), color=nice_colors[i], position=(0, i / 10))
    return k3dplot

def point_size(data, resolution=20.):
    span = max(np.max(data, axis=0) - np.min(data, axis=0))
    return (span / resolution) * 0.4

common = dict(
    point_size=point_size(digits.data), 
    colors=[nice_colors[i] for i in digits.target]
)

In [6]:
tsne_results[:5]

array([[ -1.692848 ,  17.24548  ,   2.342643 ],
       [ -4.292169 ,  -4.6276927,  -6.6231384],
       [ -8.631628 ,  -0.4690918,   2.015494 ],
       [ -1.508995 ,   2.3031564,  10.510045 ],
       [  4.5734644,  -6.5477595, -13.281244 ]], dtype=float32)

In [7]:
tsne_results_offset = np.array([0, 2, -0.75], dtype=np.float32)
tsne_results_scale = 0.05

In [8]:
tsne_plotted = np.multiply(tsne_results, tsne_results_scale, dtype=np.float32)
tsne_plotted = np.add(tsne_plotted, tsne_results_offset)
print(tsne_plotted)

[[-0.0846424   2.862274   -0.6328679 ]
 [-0.21460846  1.7686154  -1.081157  ]
 [-0.4315814   1.9765455  -0.6492253 ]
 ...
 [-0.20286834  1.8362269  -0.7517934 ]
 [ 0.20804654  2.0607526  -0.41300562]
 [-0.20379885  1.904791   -0.5449871 ]]


In [9]:
common['point_size'] = point_size(tsne_plotted)
legend(points(tsne_plotted, **common, shader='mesh'))

Plot(antialias=3, axes=['x', 'y', 'z'], axes_helper=1.0, background_color=16777215, camera=[4.5, 4.5, 4.5, 0.0…