In [1]:
%matplotlib widget
from sklearn import datasets, manifold, mixture, metrics
import matplotlib.pyplot as plt
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from scipy.spatial.distance import pdist, squareform
from scipy import stats, linalg
import itertools

In [2]:
n_samples = 500
x, y = datasets.make_s_curve( n_samples=n_samples )

idx = y.argsort()
y.sort()
x = x[idx]

y = []
for i in range( n_samples ):
    if i < n_samples / 2:
        y.append( 'purple' )
    else:
        y.append( 'blue' )

In [3]:
x = np.concatenate( (x[:300], x[300:]) )
y = np.concatenate( (y[:300], y[300:]) )

In [4]:
fig1 = plt.figure()
ax = fig1.add_subplot( 111, projection='3d' )

x_vals = x[:, 0]
z_vals = x[:, 1]
y_vals = x[:, 2]

ax.scatter( x_vals, y_vals, z_vals, c=y )
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [5]:
def fit_plot( x, y, n_neighbors, n_components ):
    isomap = manifold.Isomap( n_neighbors=n_neighbors, n_components=n_components )
    y_isomap = isomap.fit_transform( x )
    
    lle = manifold.LocallyLinearEmbedding( n_neighbors=n_neighbors, n_components=n_components )
    y_lle = lle.fit_transform( x )
    
    laplace = manifold.SpectralEmbedding( n_neighbors=n_neighbors, n_components=n_components )
    y_laplace = laplace.fit_transform( x )
    
    ltsa = manifold.LocallyLinearEmbedding( n_neighbors=n_neighbors, n_components=n_components, method='ltsa' )
    y_ltsa = ltsa.fit_transform( x )
    
    fig = plt.figure()
    gs = fig.add_gridspec( 2, 2 )
    ax1 = fig.add_subplot( gs[0, 0] )
    ax2 = fig.add_subplot( gs[0, 1] )
    ax3 = fig.add_subplot( gs[1, 0] )
    ax4 = fig.add_subplot( gs[1, 1] )
    
    ax1.set_title( 'isomap' )
    ax2.set_title( 'lle' )
    ax3.set_title( 'laplace' )
    ax4.set_title( 'ltsa' )
    
    x1_vals = y_isomap[:, 0]
    y1_vals = y_isomap[:, 1]
    ax1.scatter( x1_vals, y1_vals, c=y )
    ax1.set_xticklabels( [] )
    ax1.set_yticklabels( [] )
    
    x2_vals = y_lle[:, 0]
    y2_vals = y_lle[:, 1] 
    ax2.scatter( x2_vals, y2_vals, c=y )
    ax2.set_xticklabels( [] )
    ax2.set_yticklabels( [] )
    
    x3_vals = y_laplace[:, 0]
    y3_vals = y_laplace[:, 1]
    ax3.scatter( x3_vals, y3_vals, c=y )
    ax3.set_xticklabels( [] )
    ax3.set_yticklabels( [] )
    
    x4_vals = y_ltsa[:, 0]
    y4_vals = y_ltsa[:, 1]
    ax4.scatter( x4_vals, y4_vals, c=y )
    ax4.set_xticklabels( [] )
    ax4.set_yticklabels( [] )

    plt.show()
    return (y_isomap, y_lle, y_laplace, y_ltsa)
    
def residual_variance( dx, dy ):
    corr = stats.spearmanr( dx, dy, axis=1 ).correlation
    return corr

In [6]:
y_isomap, y_lle, y_laplace, y_ltsa = fit_plot( x, y, n_neighbors=10, n_components=2 )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [7]:
color_iter = itertools.cycle(['purple', 'blue'])

def plot_results(X, Y_, means, covariances, index, title):
    fig = plt.figure()
    splot = fig.add_subplot()
    for i, (mean, covar, color) in enumerate(zip(
            means, covariances, color_iter)):
        v, w = linalg.eigh(covar)
        v = 2. * np.sqrt(2.) * np.sqrt(v)
        u = w[0] / linalg.norm(w[0])
        # as the DP will not use every component it has access to
        # unless it needs it, we shouldn't plot the redundant
        # components.
        if not np.any(Y_ == i):
            continue
        plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)

        # Plot an ellipse to show the Gaussian component
        angle = np.arctan(u[1] / u[0])
        angle = 180. * angle / np.pi  # convert to degrees
        ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color)
        ell.set_clip_box(splot.bbox)
        ell.set_alpha(0.5)
        splot.add_artist(ell)
    plt.xticks(())
    plt.yticks(())
    plt.title(title)

In [8]:
gmm = mixture.GaussianMixture( n_components=2, covariance_type='full' )

gmm.fit( y_isomap )
plot_results( y_isomap, gmm.predict( y_isomap ), gmm.means_, gmm.covariances_, 0, 'Isomap' )

# gmm.fit( y_lle )
# plot_results( y_lle, gmm.predict( y_lle ), gmm.means_, gmm.covariances_, 0, 'LLE' )

# gmm.fit( y_laplace )
# plot_results( y_laplace, gmm.predict( y_laplace ), gmm.means_, gmm.covariances_, 0, 'Laplace' )

# gmm.fit( y_ltsa )
# plot_results( y_ltsa, gmm.predict( y_ltsa ), gmm.means_, gmm.covariances_, 0, 'LTSA' )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
true_labels = []
for c in y:
    if c == 'purple':
        true_labels.append( 0 )
    else:
        true_labels.append( 1 )

true_labels.reverse()
metrics.adjusted_rand_score( true_labels, gmm.predict( y_isomap ) )

0.9214428226736621