In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from sklearn.manifold import TSNE
import zipfile
import time

Numpy bitmaps - 28x28 grayscale bitmaps provided by quick draw can be loaded using np.load().
161666 images are avaialble under the face category.
Using a subset of just 400 images for more clear visualisation 

In [2]:
zip_ref = zipfile.ZipFile('smiley face.npy.zip', 'r')
zip_ref.extractall()
zip_ref.close()


In [4]:
faces = np.load('smiley face.npy')
faces_subset = faces[0:400]

t-Distributed stochastic neighbour(t-SNE) is a technique to reduce higher dimentional data into a lower dimentional dataset.
The dataset has 28x28 = 784 dimensions which is being reduced to 2D 

In [5]:
#t-SNE
time_start = time.time()
tsne = TSNE(n_components=2, verbose=1, perplexity=40)
faces_tsne = tsne.fit_transform(faces_subset)
print 't-SNE completed! {} seconds'.format(time.time()-time_start)

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 400 samples in 0.006s...
[t-SNE] Computed neighbors for 400 samples in 0.194s...
[t-SNE] Computed conditional probabilities for sample 400 / 400
[t-SNE] Mean sigma: 747.429135
[t-SNE] KL divergence after 250 iterations with early exaggeration: 70.620079
[t-SNE] KL divergence after 1000 iterations: 0.857696
t-SNE completed! 2.21873188019 seconds


In [6]:
# for viewing all the images used 
def view_all_images(faces):
    rows = 28
    cols = 28
    index = 0
    dimensions = int(np.sqrt(faces_subset.shape[0]))
    face_image = np.ones((cols * dimensions, rows * dimensions))
    for drow in range(dimensions):
        for dcol in range(dimensions):  
            face_image [drow * cols: (drow + 1) * cols, dcol * rows: (dcol + 1) * rows
            ] = faces_subset[index].reshape(28, 28)
            index += 1   
    plt.imshow(face_image, cmap='gray')
    plt.show()

This function is used to visualise the t-SNE tranformed data . 

In [7]:
#scatter plot for visualisation
def scatterPlot_images(tsne2d, faces, figsize=(28,28)):
    fig, axis = plt.subplots(figsize=figsize)
    artists = []
    for xy, i in zip(tsne2d, faces):
        x, y = xy
        img = OffsetImage(i, zoom=1, cmap='gray')
        box = AnnotationBbox(img, (x, y), xycoords='data', frameon=False)
        artists.append(axis.add_artist(box))
    axis.update_datalim(tsne2d)
    axis.autoscale()
    plt.show()

In [None]:
scatterPlot_images(faces_tsne, faces = [np.reshape(i, (28,28)) for i in faces])

In [None]:
view_all_images(faces_subset)