In [1]:
%matplotlib widget
from sklearn import datasets, manifold, mixture, metrics
import matplotlib.pyplot as plt
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from scipy.spatial.distance import pdist, squareform
from scipy import stats, linalg
import itertools
from math import ceil

In [2]:
n_samples = 500
x, y = datasets.make_s_curve( n_samples=n_samples )

idx = y.argsort()
y.sort()
x = x[idx]

y = []
for i in range( n_samples ):
    if i < n_samples / 2:
        y.append( 'purple' )
    else:
        y.append( 'blue' )

In [3]:
# para adicionar 'buracos' na curva S
x = np.concatenate( (x[:300], x[300:]) )
y = np.concatenate( (y[:300], y[300:]) )

In [4]:
fig1 = plt.figure()
ax = fig1.add_subplot( 111, projection='3d' )

x_vals = x[:, 0]
z_vals = x[:, 1]
y_vals = x[:, 2]

ax.scatter( x_vals, y_vals, z_vals, c=y )
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [5]:
def fit_plot( x, y, n_neighbors, n_components ):
    isomap = manifold.Isomap( n_neighbors=n_neighbors, n_components=n_components )
    y_isomap = isomap.fit_transform( x )
    
    lle = manifold.LocallyLinearEmbedding( n_neighbors=n_neighbors, n_components=n_components )
    y_lle = lle.fit_transform( x )
    
    laplace = manifold.SpectralEmbedding( n_neighbors=n_neighbors, n_components=n_components )
    y_laplace = laplace.fit_transform( x )
    
    ltsa = manifold.LocallyLinearEmbedding( n_neighbors=n_neighbors, n_components=n_components, method='ltsa' )
    y_ltsa = ltsa.fit_transform( x )
    
    fig = plt.figure()
    gs = fig.add_gridspec( 2, 2 )
    ax1 = fig.add_subplot( gs[0, 0] )
    ax2 = fig.add_subplot( gs[0, 1] )
    ax3 = fig.add_subplot( gs[1, 0] )
    ax4 = fig.add_subplot( gs[1, 1] )
    
    ax1.set_title( 'isomap' )
    ax2.set_title( 'lle' )
    ax3.set_title( 'laplace' )
    ax4.set_title( 'ltsa' )
    
    x1_vals = y_isomap[:, 0]
    y1_vals = y_isomap[:, 1]
    ax1.scatter( x1_vals, y1_vals, c=y )
    ax1.set_xticklabels( [] )
    ax1.set_yticklabels( [] )
    
    x2_vals = y_lle[:, 0]
    y2_vals = y_lle[:, 1] 
    ax2.scatter( x2_vals, y2_vals, c=y )
    ax2.set_xticklabels( [] )
    ax2.set_yticklabels( [] )
    
    x3_vals = y_laplace[:, 0]
    y3_vals = y_laplace[:, 1]
    ax3.scatter( x3_vals, y3_vals, c=y )
    ax3.set_xticklabels( [] )
    ax3.set_yticklabels( [] )
    
    x4_vals = y_ltsa[:, 0]
    y4_vals = y_ltsa[:, 1]
    ax4.scatter( x4_vals, y4_vals, c=y )
    ax4.set_xticklabels( [] )
    ax4.set_yticklabels( [] )
    
    plt.show()

    return (y_isomap, y_lle, y_laplace, y_ltsa)
    
def residual_variance( dx, dy ):
    corr = stats.spearmanr( dx, dy, axis=1 ).correlation
    return corr

In [6]:
y_isomap, y_lle, y_laplace, y_ltsa = fit_plot( x, y, n_neighbors=8, n_components=2 )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [7]:
color_iter = itertools.cycle(['purple', 'blue'])

def plot_results(X, Y_, means, covariances, index, title):
    fig = plt.figure()
    splot = fig.add_subplot()
    for i, (mean, covar, color) in enumerate(zip(
            means, covariances, color_iter)):
        v, w = linalg.eigh(covar)
        v = 2. * np.sqrt(2.) * np.sqrt(v)
        u = w[0] / linalg.norm(w[0])
        # as the DP will not use every component it has access to
        # unless it needs it, we shouldn't plot the redundant
        # components.
        if not np.any(Y_ == i):
            continue
        plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)

        # Plot an ellipse to show the Gaussian component
        angle = np.arctan(u[1] / u[0])
        angle = 180. * angle / np.pi  # convert to degrees
        ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color)
        ell.set_clip_box(splot.bbox)
        ell.set_alpha(0.5)
        splot.add_artist(ell)
    plt.xticks(())
    plt.yticks(())
    plt.title(title)
    plt.show()

In [13]:
gmm = []
for i in range(4):
    gmm.append( mixture.GaussianMixture( n_components=2, covariance_type='full' ) )
    
gmm_isomap = gmm[0].fit( y_isomap )
plot_results( y_isomap, gmm_isomap.predict( y_isomap ), gmm_isomap.means_, gmm_isomap.covariances_, 0, 'Isomap' )
# gmm[0].fit( y_isomap )
# plot_results( y_isomap, gmm[0].predict( y_isomap ), gmm[0].means_, gmm[0].covariances_, 0, 'Isomap' )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
gmm[1].fit( y_lle )
plot_results( y_lle, gmm[1].predict( y_lle ), gmm[1].means_, gmm[1].covariances_, 0, 'LLE' )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [10]:
gmm[2].fit( y_laplace )
plot_results( y_laplace, gmm[2].predict( y_laplace ), gmm[2].means_, gmm[2].covariances_, 0, 'Laplace' )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [18]:
gmm_ltsa = gmm[0].fit( y_ltsa )
plot_results( y_ltsa, gmm_ltsa.predict( y_ltsa ), gmm_ltsa.means_, gmm_ltsa.covariances_, 0, 'Isomap' )
# gmm[3].fit( y_ltsa )
# plot_results( y_ltsa, gmm[3].predict( y_ltsa ), gmm[3].means_, gmm[3].covariances_, 0, 'LTSA' )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [15]:
true_labels = []
for c in y:
    if c == 'purple':
        true_labels.append( 0 )
    else:
        true_labels.append( 1 )
true_labels.reverse()

In [16]:
# metrics.adjusted_rand_score( true_labels, gmm[0].predict( y_isomap ) )
metrics.adjusted_rand_score( true_labels, gmm_isomap.predict( y_isomap ) )

0.8909180844997732

In [14]:
metrics.adjusted_rand_score( true_labels, gmm[1].predict( y_lle ) )

0.008829814126288974

In [15]:
metrics.adjusted_rand_score( true_labels, gmm[2].predict( y_laplace ) )

0.7117724463970773

In [19]:
# metrics.adjusted_rand_score( true_labels, gmm[3].predict( y_ltsa ) )
metrics.adjusted_rand_score( true_labels, gmm_ltsa.predict( y_ltsa ) )

0.936897671921253

In [17]:
def plot_isomap( y, color, p ):
    fig = plt.figure()
    print(len(y))
    gs = fig.add_gridspec(1 + len(y) // p, 3)
    axs = []
    for i in range(len(y)):
        gs_row = i // p
        gs_col = i % 3
        axs.append(fig.add_subplot(gs[gs_row, gs_col]))
        x_vals = y[i][:, 0]
        y_vals = y[i][:, 1]
        axs[i].scatter(x_vals, y_vals, c=color)
        axs[i].set_xticklabels( [] )
        axs[i].set_yticklabels( [] )

In [18]:
p = 5
y_isomap = []
neighbors_range = [i for i in range(3, 100) if i % p == 0] 
for i in neighbors_range:
    isomap = manifold.Isomap( n_neighbors=i, n_components=2 )
    y_isomap.append( isomap.fit_transform( x ) )
plot_isomap( y_isomap, y, p )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

19


  axs.append(fig.add_subplot(gs[gs_row, gs_col]))


In [19]:
ari = []
for i in range(len(y_isomap)):
    gmm[0].fit( y_isomap[i] )
    ari.append(metrics.adjusted_rand_score(true_labels, gmm[0].predict(y_isomap[i])))
    print(i, ari[i])
    
fig = plt.figure()
ax = fig.add_subplot()
x_vals = neighbors_range
ax.scatter(x_vals, ari)
plt.show()

0 0.6983046432431614
1 0.9137635159715524
2 0.9214428226736621
3 0.9137635159715524
4 0.9214428226736621
5 0.9214428226736621
6 0.9214428226736621
7 0.23701579865557681
8 0.9524808260462735
9 0.6783513802964738
10 0.6652077886046366
11 0.6329109654161913
12 0.9137635159715524
13 0.6329109654161913
14 0.8985011454877563
15 0.8024243393139477
16 0.9137635159715524
17 0.9061162897752376
18 0.8096222967410293


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [20]:
p = 5
y_ltsa = []
neighbors_range = [i for i in range(3, 100) if i % p == 0] 
for i in neighbors_range:
    ltsa = manifold.LocallyLinearEmbedding( n_neighbors=i, n_components=2, method='ltsa' )
    y_ltsa.append( ltsa.fit_transform( x ) )
plot_isomap( y_ltsa, y, p )

  self.M_lu = lu_factor(M)


ValueError: Error in determining null-space with ARPACK. Error message: 'ARPACK error 3: No shifts could be applied during a cycle of the Implicitly restarted Arnoldi iteration. One possibility is to increase the size of NCV relative to NEV. '. Note that method='arpack' can fail when the weight matrix is singular or otherwise ill-behaved.  method='dense' is recommended. See online documentation for more information.

In [None]:
ari = []
for i in range(len(y_ltsa)):
    gmm[3].fit( y_ltsa[i] )
    ari.append(metrics.adjusted_rand_score(true_labels, gmm[3].predict(y_ltsa[i])))
    print(i, ari[i])
    
fig = plt.figure()
ax = fig.add_subplot()
x_vals = neighbors_range
ax.scatter(x_vals, ari)
plt.show()