This converts `fingerprints.npy` to `.tsv` formatted list of colors based on k-means clustering in the high dimensional space.

In [None]:
data_root = 'data/drums'
tsne_type = 'fingerprints.256.64'
n_clusters = 128

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
from utils import *
from sklearn.cluster import MiniBatchKMeans
from os.path import join
import numpy as np

In [None]:
def load_tsv(fn):
    return np.genfromtxt(fn)
def save_tsv(data, fn):
    np.savetxt(fn, data, fmt='%.5f', delimiter='\t')

In [None]:
tsne2d = load_tsv(join(data_root, 'tsne/{}.2d.tsv'.format(tsne_type)))
tsne3d = load_tsv(join(data_root, 'tsne/{}.3d.tsv'.format(tsne_type)))

In [None]:
# before
plt.figure(figsize=(16,16))
plt.scatter(tsne2d[:,0], tsne2d[:,1], edgecolor='', s=2, c=tsne3d)
plt.show()

In [None]:
fingerprints = np.load(join(data_root, 'fingerprints.npy'))
fingerprints = fingerprints.reshape(len(fingerprints), -1)

In [None]:
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning) 
kmeans = MiniBatchKMeans(n_clusters=n_clusters)
kmeans.fit(fingerprints)

In [None]:
clusters = kmeans.predict(fingerprints) # this is the approach from the largevis paper
# clusters = fingerprints.argmax(axis=1) # this is another approach
save_tsv(plt.cm.rainbow(clusters)[:,:3], join(data_root, 'colors.tsv'))

In [None]:
# after
plt.figure(figsize=(16,16))
plt.scatter(tsne2d[:,0], tsne2d[:,1], edgecolor='', s=2, c=clusters, cmap='hsv')
plt.show()