## Dimension reduction  of digits dataset via <br>Uniform Manifold Approximation and Projection embedding (UMAP) vs<br>t-distributed Stochastic Neighbor Embedding (t-SNE) ##

In this jupyter notebook we illustrate the performance of umap over t-sne 3d embedding of digits dataset.

python 3.6.4

sklearn version 0.19.1

umap version 0.2.1# see https://github.com/lmcinnes/umap

In [5]:
!pip install umap
import numpy as np
import umap
from sklearn.manifold import TSNE
from sklearn.datasets import load_digits



google-cloud 0.33.1 has requirement google-api-core<0.2.0dev,>=0.1.2, but you'll have google-api-core 0.1.1 which is incompatible.
google-cloud 0.33.1 has requirement google-cloud-pubsub<0.31dev,>=0.30.0, but you'll have google-cloud-pubsub 0.35.4 which is incompatible.
google-cloud-pubsub 0.35.4 has requirement google-api-core[grpc]<2.0.0dev,>=0.1.3, but you'll have google-api-core 0.1.1 which is incompatible.


In [6]:
digits = load_digits()

In [9]:
my_data = np.vstack([digits.data[digits.target==d] for d in range(10)])
digit_type = np.hstack([digits.target[digits.target==d]  for d in range(10)])

print(my_data.shape)
print(digit_type.shape)

(1797, 64)
(1797,)


In [4]:
digit_color=['rgba(236,223,189, 0.85)', 'rgba(184, 219, 86, 0.85)',
             'rgba(120,192,168, 0.85)',  'rgba((255,183,50, 0.85))', 
             'rgba(219, 174, 86, 0.85)', 'rgba(86, 211, 219, 0.85)',
             'rgba(86, 131, 219, 0.85)', 'rgba(121, 86, 219, 0.85)',
             'rgba(200, 86, 219,0.85)', 'rgba((255,102,102, 0.7))']

colors=[digit_color[d] for d in digit_type]
tooltips=list(map(str, digit_type))

In [5]:
from plotly.offline import download_plotlyjs, init_notebook_mode,  iplot, plot
init_notebook_mode(connected=True)

In [6]:
def get_trace3d(reduced_data,  cluster_colors=colors, text=tooltips):
    
    return dict(type='scatter3d',
                x=reduced_data[:,0],
                y=reduced_data[:,1],
                z=reduced_data[:,2],
                mode='markers',
                marker=dict(size=6, color=cluster_colors,line=dict(width=0.5, color='rgb(100,100,100)')),
                text=text,
                hoverinfo='text')                    

In [7]:
from plotly import tools

In [8]:
fig = tools.make_subplots(specs=[[{'is_3d': True},  {'is_3d': True}]], 
                          rows=1,
                          cols=2,
                          print_grid=True,
                          horizontal_spacing=-0.05,
                         )

This is the format of your plot grid:
[ (1,1) scene1 ]  [ (1,2) scene2 ]



In [9]:
#help(umap.UMAP)

In [11]:
%time dim_reduced = umap.UMAP(n_neighbors=16, n_components=3, min_dist=0.98, random_state=7654321).fit_transform(my_data)    

Wall time: 7.83 s


In [12]:
%time proj_3d = TSNE(n_components=3, perplexity=20, random_state=7654321).fit_transform(my_data)

Wall time: 1min 31s


In [13]:
tsnet=get_trace3d(proj_3d,  cluster_colors=colors, text=tooltips)
umapt=get_trace3d(dim_reduced,  cluster_colors=colors, text=tooltips)
fig.append_trace(tsnet, 1, 1)
fig.append_trace(umapt, 1, 2)

In [16]:
axis3d = dict(showbackground=True,
              backgroundcolor="rgb(235, 235, 235)",
              gridcolor="rgb(255, 255, 255)",
              zerolinecolor="rgb(255, 255, 255)",
            )

In [17]:
for k in [1,2]:
    fig['layout']['scene{}'.format(k)].update(camera=dict(eye=dict(x=1.6, y=1.2, z=0.9)),
                                       xaxis=axis3d,
                                       yaxis=axis3d, 
                                       zaxis=axis3d,
                                       aspectratio=dict(x=1., y=1., z=0.9))

In [18]:
title='3d Embedding of digits dataset via t-sne (left) and  umap (right)'

fig['layout'].update(title=title,
                     showlegend=False, 
                     width=1000,
                     height=550,
                     autosize=False)
#offline plot
#iplot(fig) 

In [19]:
#online plot
import plotly.plotly as py
py.sign_in('empet', 'api_key')
py.iplot(fig, filename='umap-vs-tsne')