<font size = 5><b>Clustering, Viz, Assessing Reconstruction</b></font><br>
Now that we've gotten a dictionary, let's cluster some stuff. We'll load the dictionary V, the data and its labels, and do sparse encoding on the fly. Note: can I write several of the steps into one def?

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerLine2D
import pylab as pl

import numpy as np
import time
import os
#import re
import sys
import math
import pickle
import pandas as pd
import csv
from __future__ import division
import itertools

from skimage import io, filters, util, data, img_as_float
import scipy
import brewer2mpl

from sklearn.cluster import KMeans
from sklearn.decomposition import MiniBatchDictionaryLearning, SparseCoder, sparse_encode, PCA
from sklearn.feature_extraction.image import extract_patches_2d, PatchExtractor, reconstruct_from_patches_2d
from sklearn.manifold import TSNE
from skimage.transform import downscale_local_mean

#from sklearn.grid_search import GridSearchCV, RandomizedSearchCV

import skynet.pipeline
import skynet.utils
import skynet.dl_utils as dl
import skynet.viz as viz
import skynet.sparse_encoding as se

from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import dendrogram, leaves_list

data_path = "/Users/don/Documents/patch_data/"
dl_path = "/Users/don/Documents/DL/"

#Tell numpy to skip division by zero in broadcasting
np.seterr(divide = 'ignore', invalid = 'ignore')

In [None]:
# Load the desired dictionary
V_name = 'V46a1e-1'; V_fn = V_name+'.npy'
V_ls = np.load(dl_path+V_fn)
V = V_ls[1]; dataset_fn = V_ls[0][0]
print(V_ls[0])

d0 = pickle.load(open(data_path+'p1_tr.p',"rb"))
d1 = pickle.load(open(data_path+'p1_te.p',"rb"))

In [None]:
t0 = time.time()
alfa_ls = [1,2,4,8,16,32]

X_dict1 = se.patch_and_sparse_encode(d0, V)
X_dict2 = se.patch_and_sparse_encode(d1, V)
print("")
print("Done in %.2fs" % (time.time() - t0))

In [None]:
W = (V - V.min())/(V.max() - V.min())

#imshow the learnt dictionary
plt.figure(figsize = (8, 8))
for i, comp in enumerate(W[:100]):
    plt.subplot(10, 10, i+1)
    plt.imshow(comp.reshape(30,30,3))
    plt.xticks(())
    plt.yticks(())
    plt.suptitle('%s\n DL Params: %s(%s), ncols = %s, n_iter = %s ' 
                 % (V_name, V_ls[0][2], V_ls[0][3], V_ls[0][1], V_ls[0][4]))
    plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)

<font size = 5><b>Reconstruction Error</b></font><br>First we manually eyeball some reconstructions, then do some calculations for reconstruction error.

In [None]:
# Get error of all the data
# First get all the reconstructions, XV_dict
alfa_ls = [1,2,4,8,16,32]
XV_dict1 = {1:'', 2:'', 4:'', 8:'', 16:'', 32:''}
XV_dict2 = {1:'', 2:'', 4:'', 8:'', 16:'', 32:''}

for alfa in alfa_ls:
    recon = np.dot(X_dict1[alfa], V)
    recon = (recon - recon.min())/(recon.max() - recon.min())
    XV_dict1[alfa] = recon
    
    recon = np.dot(X_dict2[alfa], V)
    recon = (recon - recon.min())/(recon.max() - recon.min())
    XV_dict2[alfa] = recon

In [None]:
# Note that error computations also need normalized image data

err_tr_dict = {1:[], 2:[], 4:[], 8:[], 16:[], 32:[]}
err_te_dict = {1:[], 2:[], 4:[], 8:[], 16:[], 32:[]}
# Separate arrays to contain the patch-level error
err_arr_dict1 = {}
err_arr_dict2 = {}

# Let's not record the individual error array for now - just mu and sigma
img_data_n = get_img_data(d0)
for k in list(XV_dict1.keys()):
    err_arr, mu, sigma = viz.get_reconstruction_error(XV_dict1[k], img_data_n, k)
    err_arr_dict1[k] = err_arr
    loss_arr = viz.get_loss(X_dict1[k], XV_dict1[k], img_data_n, k)
    err_tr_dict[k]=[mu, sigma, np.average(loss_arr)]


img_data_n = get_img_data(d1)
for k in list(XV_dict2.keys()):
    err_arr, mu, sigma = viz.get_reconstruction_error(XV_dict2[k], img_data_n, k)
    err_arr_dict2[k] = err_arr
    loss_arr = viz.get_loss(X_dict2[k], XV_dict2[k], img_data_n, k)
    err_te_dict[k]=[mu, sigma, np.average(loss_arr)]

err_dict = {'tr_err':err_tr_dict, 'te_err':err_te_dict}


In [None]:
err_tr_dict

In [None]:
err_te_dict

In [None]:
# Save the sparse dict
# sps_dict fn: <dict name>_<encoding algo>
# sps_dict cols: 1, 2, 4, 8, 16, 32, 64, img_idx, moa
sps_dict1 = X_dict1.copy()
for k in list(sps_dict1.keys()):
    sps_dict1[k] = list(sps_dict1[k])
# Let's just hope that patch-level order is preserved
sps_dict1['img_idx'] = list(d0['img_idx'])
sps_dict1['moa'] = list(d0['moa'])
sps_dict1['cpd'] = list(d0['cpd'])
sps_dict1['cc'] = list(d0['cc'])
sps_df1 = pd.DataFrame(sps_dict1)


sps_dict2 = X_dict2.copy()
for k in list(sps_dict2.keys()):
    sps_dict2[k] = list(sps_dict2[k])
# Let's just hope that patch-level order is preserved
sps_dict2['img_idx'] = list(d1['img_idx'])
sps_dict2['moa'] = list(d1['moa'])
sps_dict2['cpd'] = list(d1['cpd'])
sps_dict2['cc'] = list(d1['cc'])
sps_df2 = pd.DataFrame(sps_dict2)


save_str = data_path + V_name + '_lars.p'
print(save_str)

# For g6s, because it didn't get a tr/te split
#pickle.dump([sps_df1, err_tr_dict], open(save_str, 'wb'))
pickle.dump([sps_df1, sps_df2, err_dict], open(save_str, 'wb'))

<font size = 5><b>Loading Block</b></font><br>Not working?

In [None]:
dict_encoding_fn = 'V46a01_lars'
mydata = np.load(data_path+dict_encoding_fn+'.p')
sps_tr = mydata[0]
sps_te = mydata[1]
err_dict = mydata[2]
print(sps_tr.shape)
print(sps_te.shape)

In [None]:
X_dict1 = {1:'', 2:'', 4:'', 8:'', 16:'', 32:'', 64:''}
X_dict2 = {1:'', 2:'', 4:'', 8:'', 16:'', 32:'', 64:''}
alfa_ls = [1,2,4,8,16,32]

for alfa in alfa_ls:
    X_dict1[alfa] = np.array(list(sps_tr[alfa]))
    X_dict2[alfa] = np.array(list(sps_te[alfa]))
    
# Run the cell above to get back XV_dict

<font size = 5><b>Histogram of Testing Reconstruction Error</b></font>

In [None]:
# Plot histogram of reconstruction errors
# Can only plot 3 values of alpha at a time, because your screen isn't fat enough
# You'll need the err_arr_dicts{} from the RECONSTRUCTION block above
alfa_ls = [1,2,4,8,16,32]

plt.figure(figsize=(12,7))
plt.suptitle("D = %s\n\nx-axis = Test set Reconstruction error (euc dist)\ny-axis = ???" % V_name, fontsize=14)
for i in range(len(alfa_ls)):
    plt.subplot(3, 2, i+1)
    plt.title('Alpha = %s' % alfa_ls[i])
    plt.hist(err_arr_dict2[alfa_ls[i]], 30, normed=1)
    plt.xticks(np.arange(1,8))
    plt.yticks(np.arange(7))
    plt.subplots_adjust(0.07, 0.02, 0.92, 0.73, 0.3, 0.23)

<font size = 5><b>Quilt Viz</b></font>

In [None]:
i0 = 10 # Starting index to visualize
n_patches = 140 # Number of patches from i0 to viz
ht = 14 # height of quilt
p_len = 30
a = 1

x_ax = np.arange(ht)
y_ax = np.arange(int(n_patches/ht))

img_data_n = img_data_n.reshape(len(img_data_n), p_len, p_len, 3)
XV = XV_dict2[a]
XV = XV.reshape(len(XV), p_len, p_len, 3)
print(XV.shape)

quilt_o = create_quilt(i0, n_patches, img_data_n, ht)
quilt_XV = create_quilt(i0, n_patches, XV, ht)

# Plot
plt.figure(figsize=(18,12))
plt.suptitle("D = %s, alpha = %s, idx = %s: %s" % (V_name, a, i0, i0+n_patches), fontsize=14)
plt.subplot(1, 2, 1)
plt.title('Originals')
plt.imshow(quilt_o)

plt.subplot(1, 2, 2)
plt.title('Reconstructions (test data)')
plt.imshow(quilt_XV)

In [None]:
"""Deprecated by cell above?"""

# Choose the number of patches to visualize, and the quilt dimensions
n_patches = 25
p_len = 30
quilt_w = 5
quilt_h = 5
a=1

# Select the starting index of the data subset to viz
idx = 0

block2 = []
for patch in XV_dict1[a][idx:idx+n_patches,:]:
    patch = patch.reshape(p_len, p_len, 3)
    block2.append(patch)
block2 = np.array(block2)
print(block2.shape)

block_data = []
for patch in img_data_n[idx:idx+n_patches]:
    patch = patch.reshape(p_len, p_len, 3)
    block_data.append(patch)
block_data = np.array(block_data)

quilt_block = viz.form_quilt(quilt_w, block_data)
quilt = viz.form_quilt(quilt_w, block2)
print(quilt.shape)

# Plot
plt.figure(figsize=(16,8))
plt.subplot(1, 2, 1)
plt.title('Originals')
plt.imshow(quilt_block)

plt.subplot(1, 2, 2)
plt.title('Reconstructions')
plt.imshow(quilt)
plt.suptitle("D = %s, alpha = %s, idx = %s: %s" % (V_name, a, idx, idx+n_patches), fontsize=15)

<font size = 5><b>Viz a Single Patch, and its chosen atoms</font></b>

In [None]:
idx = 100
alfa = 1
p_len = 30

img_data = get_img_data(d0, normalize=True)
chosen_orig = img_data[idx].reshape(p_len, p_len, 3)
chosen_recon = XV_dict1[alfa][idx].reshape(p_len, p_len, 3)

plt.figure(figsize=(12,8))
plt.subplot(1, 2, 1)
plt.imshow(chosen_orig)
plt.xticks(())
plt.yticks(())
plt.subplot(1, 2, 2)
plt.imshow(chosen_recon)
plt.title('alpha=%s'% alfa)
plt.xticks(())
plt.yticks(())

In [None]:
alfa_ls = [1,2,4,8,16,32,64]

plt.figure(figsize=(16,8))
for i in range(len(alfa_ls)):
    plt.subplot(2, 4, i+1)
    chosen_recon = XV_dict1[alfa_ls[i]][idx].reshape(50, 50, 3)
    plt.imshow(chosen_recon)
    plt.title('alpha=%s'% alfa_ls[i])
    plt.xticks(())
    plt.yticks(())

In [None]:
alfa_ls = [1,2,4,8,16,32,64]
for alfa in alfa_ls:
    X = X_dict1[alfa][idx]
    #print(X_dict1[alfa][idx])
    #print(X.shape)
    print(len(np.flatnonzero(X)))

In [None]:
# Only works for a small number of atoms
X = X_dict1[32][idx].copy

indices = np.flatnonzero(X)
chosen_atoms = []
for idx in indices:
    chosen_atoms.append(V[idx])

coef_vals = []
for idx in indices:
    coef_vals.append(X[idx])

plt.figure(figsize=(24,10))
for i in range(len(chosen_atoms)):
    plt.subplot(1, 6, i+1)
    chosen_n = (chosen_atoms[i] - chosen_atoms[i].min())/(chosen_atoms[i].max()-chosen_atoms[i].min())
    plt.imshow(chosen_n.reshape(50,50,3))
    plt.title("Atom index(coeff. val) = %s (%.2f)" % (indices[i], coef_vals[i]))
    plt.xticks(())
    plt.yticks(())

<font size = 5><b>Viz all 80 Patches of a chosen Image, and its Recon</font></b>

In [None]:
n_patches = 80
p_len = 50
alfa = 4

sps_df1[:0]

In [None]:
idx = 1507
a = 16
p_len = 50
# Choose index 2 to see an example of selecting multiple objects from diff atoms

chosen_patch = img_data[idx].reshape(p_len, p_len, 3)
#XV1[a-1][idx].reshape(p_len, p_len, 3)
sparse_vec = X_dict[a][idx]
chosen_atoms = np.where(sparse_vec != 0)[0]
chosen_coeffs = sparse_vec[(chosen_atoms)]

# Get the reconstruction
recon_full = np.dot(X_dict[a], V)
recon = recon_full[idx].reshape(p_len, p_len, 3)
#recon = (recon - recon.min())/(recon.max() - recon.min())

atom_arrs = [chosen_patch, recon]
for i in range(len(chosen_atoms)):
    atom = V[chosen_atoms[i]].reshape(p_len, p_len, 3)
    atom_arrs.append(atom)
atom_arrs = np.array(atom_arrs)

# Normalize for viz
for i in range(len(atom_arrs)):
    atom = (atom_arrs[i]-atom_arrs[i].min())/(atom_arrs[i].max()-atom_arrs[i].min())
    atom_arrs[i] = atom
    
err = np.sqrt(np.sum((atom_arrs[0]-atom_arrs[1])**2))

title_strings = ['Original', 'Reconstruction (Err='+str(err)[:6]+')']
for i in range(len(chosen_coeffs)):
    title_string = 'Atom index (val) = \n'+str(chosen_atoms[i])+'('+str(chosen_coeffs[i])[:8]+')'
    title_strings = title_strings + [title_string]


plt.figure(figsize=(14,14))
for i in range(len(atom_arrs)):
    plt.subplot(1,len(atom_arrs), i+1)
    atom = (atom_arrs[i]-atom_arrs[i].min())/(atom_arrs[i].max()-atom_arrs[i].min())
    plt.imshow(atom)
    plt.title(title_strings[i], loc='center')
    plt.xticks(())
    plt.yticks(())
    #plt.suptitle('Dictionary learned from %d patches' % len(mydata))
    plt.subplots_adjust(0.01, 0.3, 0.9, 0.7, 0.08, 0.23)

In [None]:
# Now let's try doing the histogram error for each class
XV_subset = XV1[0:500,:]
mydata_subset = mydata[0:500,:]

err_arr, mu, sigma = get_reconstruction_error(XV_subset, mydata_subset)
print("Error (s.d.) = %.2fs (%.2fs)" % (mu, sigma))

# Plot histogram of reconstruction errors
plt.figure(figsize = (7,6))
plt.hist(err_arr, 30, normed=1)
plt.xlabel('Error (Euclidean Distance)')
plt.ylabel('Frequency')

<font size = 5><b>Biclustering, If It Helps</b></font>

In [None]:
X = X_dict[2]

yr, yc = viz.bicluster(X, linkage_method='average', distance_metric='correlation')

In [None]:
plot_bicluster(X, yr, yc, x_label='Sparse Encoding', y_label= 'Patches')

<font size = 5><b>Scatter Plot of Nonzero Indices</b></font><br>
Also try removing blank atoms, see what's left. 

In [None]:
# Select a sparse encoding, i.e. some particular value of alpha, from XV1:
for i in range(len(err_stats_ls)):
    print("%s: a=%s, %.3f (%.3f)" % (i, alpha[i], err_stats_ls[i][1], err_stats_ls[i][2]))

X = X1[3]
a = alpha[3]
print(X.shape)

In [None]:
# Since the number of nonzeros isn't easy to see, let's do some plots instead
df_nz = []
for i in range(len(X)):
    n_nz = len(np.flatnonzero(X[i]))
    df_nz.append(n_nz)
df_nz = np.array(df_nz)
print(df_nz.shape)

#plt.figure(figsize=(12,8))
#plt.bar(range(len(df_nz)), df_nz)

H, dX = np.histogram(df_nz, bins=20, normed=True)
dx = dX[1] - dX[0]
F1 = np.cumsum(H)*dx
plt.plot(dX[1:], F1)
plt.xlabel('Num. of nonzero entries')

In [None]:
# This works for either LARS or OMP
r, c = np.nonzero(X)

pl.figure(figsize=(15,15))
pl.scatter(c, r, marker='.', alpha=0.5)

In [None]:
# Get the binary representation of X, X_b
# X_b.shape = X.shape
# X_b[i, j] = 1 if X[i, j] != 0; 0 otherwise

X_b = np.zeros_like(X)
for i in range(len(X)):
    for j in range(len(X[i])):
        if X[i][j] != 0:
            X_b[i][j] = 1


# Plot of the non-zero indices of the sparse encoding, X
# Get the indices where X is nonzero

X_nz = []
X_nz_vals = []
for row in X:
    nz_indices = np.where(row != 0)[0]
    X_nz.append(nz_indices)
    
    vals = []
    for j in range(len(nz_indices)):
        vals.append(row[nz_indices[j]])
    X_nz_vals.append(vals)

X_nz = np.array(X_nz)
X_nz_vals = np.array(X_nz_vals)
print(X_nz.shape)

In [None]:
# n_nz = no. of nonzero coeffs in X
n_nz = X_nz.shape[1]

x_ls = []
x_keys = []
for i in range(n_nz):
    x_ls.append(X_nz[:,i])
    x_keys.append('x'+str(i))
x_ls = np.array(x_ls)

df_dict = {}
df_dict = {x_keys[i]:x_ls[i] for i in range(n_nz)}
df_dict['y'] = np.arange(len(X_nz))
df_dict['label']=moa_labels


colors = itertools.cycle(['r','g','b',
          'c','m','y',
          'k','#38FF24','grey',
          '#440073','#FF33FC','#FFB833'])

df = pd.DataFrame(df_dict)
groups = df.groupby('label')


fig, ax = plt.subplots(figsize=(12,8))
for name, group in groups:
    moa_color = next(colors)
    ax.plot(group.x0,
            group.y,
            marker='.',
            markersize=3,
            linestyle='',
            color=moa_color,
            label=name
            )
    ax.plot(group.x1,
            group.y,
            marker='.',
            markersize=3,
            linestyle='',
            color=moa_color,
            label=''
            )
    """
    ax.plot(group.x2,
            group.y,
            marker='.',
            markersize=3,
            linestyle='',
            color=moa_color,
            label=''
           )
    ax.plot(group.x3,
            group.y,
            marker='.',
            markersize=3,
            linestyle='',
            color=moa_color,
            label=''
            )
    ax.plot(group.x4,
            group.y,
            marker='.',
            markersize=3,
            linestyle='',
            color=moa_color,
            label=''
           )
    ax.plot(group.x5,
            group.y,
            marker='.',
            markersize=3,
            linestyle='',
            color=moa_color,
            label=''
            )
    ax.plot(group.x6,
            group.y,
            marker='.',
            markersize=3,
            linestyle='',
            color=moa_color,
            label=''
           )
    ax.plot(group.x7,
            group.y,
            marker='.',
            markersize=3,
            linestyle='',
            color=moa_color,
            label=''
            )
"""
    plt.xticks(np.arange(0,X.shape[1],25))

    
plt.suptitle("D = %s, alpha = %s" % (V_name, a), fontsize=15)
ax.legend(scatterpoints=1, 
          loc='upper center', 
          bbox_to_anchor=(0.5, -0.05),
          fancybox=True, 
          shadow=True, 
          ncol=4)

In [None]:
# Get the x most preferred atoms at class level
a = 2
class_idx = 2
idx_start = class_idx*500
idx_end = (class_idx + 1)*500

X_0 = X1[a-1][idx_start:idx_end]
X_0 = X_0.T

atom_counts = []
for i in range(len(X_0)):
    atom_counts.append(len(np.flatnonzero(X_0[i])))

#print(np.sum(atom_counts))
atom_counts = atom_counts/np.sum(atom_counts)*100
plt.figure(figsize=(7, 7))
plt.plot(atom_counts, marker='.',linewidth=0)
plt.xlabel('Atom index')
plt.ylabel('Frequency counts (%)')

In [None]:
bmap = brewer2mpl.get_map("Paired", "Qualitative", 12)
color_dict = dict(zip(list(set(moa_labels)), bmap.mpl_colors))

In [None]:
# Try pruning universal atoms
# First convert X to a binary matrix X_b, where X_b[i, j] == 1 if X[i, j] != 0, 0 otherwise
X_b = []
for row in X:
    nz_indices = np.flatnonzero(row)
    nz_row = np.zeros(X.shape[1])
    for nz_index in nz_indices:
        nz_row[nz_index] = 1
    X_b.append(nz_row)

X_b = np.array(X_b)
print(X_b.shape)

atom_df = np.sum(X_b, axis=0)
atom_df = atom_df/len(X_b)

plt.plot(atom_df)

for i in range(len(atom_df)):
    if atom_df[i] > 0.1:
        print(i)

<font size=5><b>Clustering: PCA, K-means, tSNE</b></font>

In [None]:
# Get the explained variance ratios
# Not good...the first 2 PCs only account for about 7% of variance each
n_PCs=2
x_pca_obj = PCA(n_components = n_PCs)
x_pca_obj_fit = x_pca_obj.fit_transform(Z)
POV = x_pca_obj.explained_variance_ratio_
for i in range(n_PCs):
    print(POV[i])

print("Total explained variances =",sum(POV))

In [None]:
#Do PCA on data, X
n_PCs = 2
X_redux = PCA(n_components = n_PCs).fit_transform(Z)
X_redux.shape

In [None]:
bmap = brewer2mpl.get_map("Paired", "Qualitative", 12)
color_scale2 = dict(zip(list(set(moa_labels)), bmap.mpl_colors))

x = X_redux[:,0]
y = X_redux[:,1]

df0 = pd.DataFrame(dict(x=x, y=y,label=moa_labels))

groups = df0.groupby('label')

fig, ax = plt.subplots(figsize=(12,12))
ax.margins(0.05)
for name, group in groups:
    ax.scatter(group.x, 
               group.y, 
               marker='.',
               lw=0,
               s=45, 
               label=name,
              c=color_scale2[name])
    
ax.legend(scatterpoints=1, 
          loc='upper center', 
          bbox_to_anchor=(0.5, -0.05),
          fancybox=True, 
          shadow=True, 
          ncol=4)

In [None]:
#Do k means on the transformed data
kmeans = KMeans(n_clusters=12, n_init=10) #estimator object
km_fit = kmeans.fit_transform(X_redux)

km_labels = kmeans.labels_
#type(km_labels) np.array
#for i in range(len(km_labels)):
#    print("%s: %s" % (i, km_labels[i]))

print(len(list(set(km_labels))))

In [None]:
bmap = brewer2mpl.get_map("Paired", "Qualitative", 12)
color_scale = dict(zip(list(set(km_labels)), bmap.mpl_colors))
color_scale2 = dict(zip(list(set(moa_labels2)), bmap.mpl_colors))

colors = itertools.cycle(['r','g','b',
          'c','m','y',
          'k','#38FF24','grey',
          '#440073','#FF33FC','#FFB833'])


df_km = pd.DataFrame(dict(x=X_redux[:,0], 
                          y=X_redux[:,1],
                          label=moa_labels2))

groups = df_km.groupby('label')

In [None]:
# k-means plot

fig, ax = plt.subplots(figsize=(12,12))
ax.margins(0.05)
for name, group in groups:
    ax.scatter(group.x, 
               group.y, 
               s=45, 
               label=name,
               marker='o',
               linewidth=0,
               c=color_scale2[name])
    
ax.legend(scatterpoints=1, 
          loc='upper center', 
          bbox_to_anchor=(0.5, -0.05),
          fancybox=True, 
          shadow=True, 
          ncol=4)

In [None]:
lr = 600
perp = 50
ee = 1

t0 = time.time()

tsne_m0 = TSNE(n_components=2, 
               learning_rate = lr,
               perplexity=perp,
               early_exaggeration=ee,
               random_state=0,
              verbose=1)
X_tsne = tsne_m0.fit_transform(Z)

print("Done in %.2fs" % (time.time()-t0))
# Takes awhile: about 300s

In [None]:
bmap = brewer2mpl.get_map("Paired", "Qualitative", 12)
color_scale2 = dict(zip(list(set(moa_labels2)), bmap.mpl_colors))

df_tsne = pd.DataFrame(dict(x=X_tsne[:,0], 
                            y=X_tsne[:,1],
                            label=moa_labels2))

groups = df_tsne.groupby('label')

# Plot
fig, ax = plt.subplots(figsize=(10,10))

ax.margins(0.05)
for name, group in groups:
    ax.scatter(group.x, 
               group.y, 
               s=45, 
               label=name,
               marker='o',
               linewidth=0,
               c=color_scale2[name])
    ax.set_title('PCA50 --> tSNE2 clusters\n'+\
                 'Based on DL: 200, omp2, 30000 iters\n'+\
                 'tSNE2 params:perplexity = %s, early exaggeration = %s' 
                 % (perp, ee))
    
ax.legend(scatterpoints=1, 
          loc='upper center', 
          bbox_to_anchor=(0.5, -0.05),
          fancybox=True, 
          shadow=True, 
          ncol=4)

In [None]:
v1 = [10.032, 10.074, 8.018, 6.872, 7.208, 11.278, 11.757]
v2 = [9.949, 9.61, 8.656, 9.948, 11.698, 13.675, 16.103]
v3 = [11.381, 11.132, 10.22, 8.697, 9.286, 10.819, 12.571]
v4 = [12.308, 11.262, 10.437, 9.704, 7.598, 7.623, 10.927]

In [None]:
labels = ['1','2','4','8','16','32','64']
y = [1, 2, 4, 8, 16, 32, 64]

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)
xticks=()
ax.set_xlabel("alpha for sparse encoding (LARS)")
ax.set_ylabel('Mean reconstruction error using Euc. Dist.')
line1, = plt.plot(y, v1, marker='o', label='32000')
line2, = plt.plot(y, v2, marker='o', label='64000')
line3, = plt.plot(y, v3, marker='o', label='96000')
line4, = plt.plot(y, v4, marker='o', label='128000')

plt.legend(handler_map={line1: HandlerLine2D(numpoints=4)}, loc='top')

In [None]:
alfa_idx = 3
x1 = [v1[0], v2[0], v3[0], v4[0]]
x = [v1[3], v2[3], v3[3], v4[3]] # Error for n_nz = 8
plt.plot([32, 64, 96, 128], x1, marker='o', linewidth=0.5)
plt.scatter([32, 64, 96, 128], x, marker='o')
plt.title('Sparse encoding error for LARS, n_nonzero_coefs = 16')
plt.xlabel('No. of iterations (in thousands)')
plt.ylabel('Mean Error (Euc. Dist)')

In [None]:
v_all = [v1, v2, v3, v4]
v_all = np.array(v_all)
v_all.T
y = [32, 64, 96, 128]

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)
xticks=()
ax.set_xlabel('No. of iterations (in thousands)')
ax.set_ylabel('Mean  reconstruction error (Euc. Dist)')
line1, = plt.plot(y, v_all.T[0], marker='o', label='alpha=1')
line2, = plt.plot(y, v_all.T[1], marker='o', label='alpha=2')
line3, = plt.plot(y, v_all.T[2], marker='o', label='alpha=4')
line4, = plt.plot(y, v_all.T[3], marker='o', label='alpha=8')
line5, = plt.plot(y, v_all.T[4], marker='o', label='alpha=16')
line6, = plt.plot(y, v_all.T[5], marker='o', label='alpha=32')
line7, = plt.plot(y, v_all.T[6], marker='o', label='alpha=64')

plt.legend(handler_map={line1: HandlerLine2D(numpoints=4)}, loc='top')