This is a simple notebook that gives a demonstration of k-means, DBSCAN and GMM.

It accompanies Chapter 7 of the book (1 of 4) and shows how different figures were made.

A little less polished than others as a lecture notebook.

Author: Viviana Acquaviva, with contributions by Jake Postiglione and Olga Privman; see also other sources.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from sklearn import metrics

In [None]:
from mlxtend.plotting import plot_decision_regions

In [None]:
X, y_true = make_blobs(n_samples=300, centers=4,
                       cluster_std=0.6, random_state=2)

fig = plt.figure(figsize=(12,6))

start = np.array([[-1,1],[1,-1],[3,-3],[-5,-10]]) #initial points (fixed for reproducibility)

plt.subplot(2,2,1)
plt.scatter(X[:, 0], X[:, 1], s =3, c ='gray') # plot original points
plt.scatter(start[:,0],start[:,1], s = 20, c = 'k', label = 'Iteration 0');
plt.xlim(-10,5);
plt.annotate('Iteration 0', xy=(77, 20), xycoords='axes points',
            size=14, ha='right', va='top')
#plt.legend(loc='lower left');

for i in range(1,4):
    plt.subplot(2,2,i+1)
    kmeans = KMeans(n_clusters=4, max_iter = i, init = start, n_init=1)
    kmeans.fit(X)
    y_kmeans = kmeans.predict(X)
    centroids = kmeans.cluster_centers_
    plt.scatter(X[:, 0], X[:, 1], s = 3, c = y_kmeans, cmap = 'rainbow') # plot original points
    plt.scatter(centroids[:, 0], centroids[:, 1], s=20, \
                edgecolor = 'k', label = 'Iteration'+str(i), c = [0,1,2,3],cmap = 'rainbow');
    plt.xlim(-10,5);
#    plt.legend(loc='lower left', numpoints=1);
    plt.annotate('Iteration '+str(i), xy=(77, 20), xycoords='axes points',
            size=14, ha='right', va='top')
#            bbox=dict(boxstyle='round', fc='w'))
#plt.savefig('Clustering_iterations.png', dpi = 300)

### Learning Check-in
    
Q: Based on the top left graph labled "Iteration 0" how many groups of dots would you say there are?

<details>
<summary style="display: list-item;">Answer</summary>
<p>
There are 4 groups of dots or "clusters". This may have been an easy proccess for a human, but how do we effectively have a computer due this for us. Check out the examples below detailing some more difficult situations we can run into with clustering.
</p>
</details>

<br/>

### First slightly tricky example: overlapping blobs of different size/density.

In [None]:
X1b, y1b = make_blobs(n_samples=200, centers=[(1.25,1)],
                       cluster_std=0.2, random_state=1)

X2b, y2b = make_blobs(n_samples=400, centers=[(0,1)],
                       cluster_std=0.5, random_state=2)

X3b, y3b = make_blobs(n_samples=200, centers=[(-1.25,1)],
                       cluster_std=0.2, random_state=3)

In [None]:
fig = plt.figure(figsize=(12,6))

plt.scatter(X1b[:, 0], X1b[:, 1], s =10, c ='gray') # plot original points

plt.scatter(X2b[:, 0], X2b[:, 1], s =10, c ='violet') # plot original points

plt.scatter(X3b[:, 0], X3b[:, 1], s =10, c ='teal') # plot original points

In [None]:
Xb = np.vstack([X1b,X2b,X3b])

In [None]:
kmeans = KMeans(n_clusters=3, random_state=33) #predicts 0,1,2
kmeans.fit(Xb)
yb_kmeans = kmeans.predict(Xb)
centersb = kmeans.cluster_centers_

In [None]:
yb = np.concatenate([np.zeros(len(y1b)),np.zeros(len(y2b))+1,np.zeros(len(y3b))+2])

In [None]:
plt.figure(figsize=(8,6))
model = KMeans(n_clusters=3, random_state=33) #predicts 0,1,2
model.fit(Xb)
plot_decision_regions(Xb, yb.astype(int), clf=model, legend=0, markers = '...', colors = 'lightgray,violet,teal')
plt.scatter(X1b[:,0],X1b[:,1], s = 30, c = 'lightgray',edgecolors='k')
plt.scatter(X2b[:,0],X2b[:,1], s = 30, c = 'teal', edgecolors='k')
plt.scatter(X3b[:,0],X3b[:,1],s = 30, c = 'violet', edgecolors='k')
plt.scatter(centersb[:, 0], centersb[:, 1], c='black', s=100, alpha=0.5);

plt.xlim(-2.5,2.5)
plt.ylim(-0.5,2.5);

#plt.savefig('ClustersBad.png', dpi = 300)

### Now we move on to a different distribution (smiley face).

In [None]:
from math import pi, cos, sin
from random import random

def point(h, k, r):
    theta = random() * 2 * pi
    return h + cos(theta) * r, k + sin(theta) * r + 0.2*random()

xy = [point(1,2,1) for _ in range(100)]

In [None]:
X1, y1 = make_blobs(n_samples=10, centers=[(0.5,2.5)],
                       cluster_std=0.05, random_state=1)

X2, y2 = make_blobs(n_samples=10, centers=[(1.5,2.5)],
                       cluster_std=0.05, random_state=2)

X3, y3 = make_blobs(n_samples=10, centers=[(1,1.7)],
                       cluster_std=0.05, random_state=2)

In [None]:
X3_stretch = np.array([X3[:,0]*3, X3[:,1]]) #make the mouth :) 

In [None]:
plt.axes().set_aspect('equal', 'datalim')
plt.scatter(*zip(*xy))
plt.scatter(X1[:,0],X1[:,1])
plt.scatter(X2[:,0],X2[:,1])
plt.scatter(X3_stretch.T[:,0]-1.9,X3_stretch.T[:,1])

plt.show()

In [None]:
X = np.vstack([xy,X1,X2,np.array([X3_stretch.T[:,0]-1.9,X3_stretch.T[:,1]]).T])

### Let's see how k-means clusters these points.

In [None]:
kmeans = KMeans(n_clusters=4) #We can also change the number of clusters
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
centers = kmeans.cluster_centers_

print(centers)
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=10, cmap='viridis')
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=100, alpha=0.5);

In [None]:
y = np.concatenate([np.zeros(len(xy)), np.zeros(len(y1))+1,np.zeros(len(y2))+2,np.zeros(len(y3))+3])

In [None]:
plt.figure(figsize=(8,6))
plot_decision_regions(X, y.astype(int), clf=kmeans, legend=0, markers = '.', colors = 'lightgray,teal,yellow,violet')
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=100, alpha=0.5);
plt.scatter(*zip(*xy), s = 30, c = 'lightgray', edgecolors='k')
plt.scatter(X1[:,0],X1[:,1], s = 30, c = 'teal',edgecolors='k')
plt.scatter(X2[:,0],X2[:,1], s = 30, c = 'yellow', edgecolors='k')
plt.scatter(X3_stretch.T[:,0]-1.9,X3_stretch.T[:,1],s = 30, c = 'violet', edgecolors='k')
plt.xlim(-0.5,2.5);
plt.ylim(0.5,3.5);
#plt.savefig('ClustersBad2.png', dpi = 300)

### Learning Check-in
    
Q: Does this fit look good? Why might out clustering method be preforming like this?

<details>
<summary style="display: list-item;">Answer</summary>
<p>
This is not a very good fit! The smiley face is a really tricky example, for several reasons! One of these reasons is the ring surrounding three smaller clusters. When our algorithm fits for the number of clusters, it has a hard time distigusing exactly how the ring contributes to any given cluster.
</p>
</details>

#### The Elbow curve can be used to infer the number of clusters.

This is for the smiley face...

In [None]:
inertias = []
for k in range(2, 10):
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(X)
    inertias.append(kmeans.inertia_)

In [None]:
fig = plt.figure(figsize=(8, 6))
plt.plot(range(2, 10), inertias)
#plt.grid(True)
plt.title('Elbow curve for smiley face');
plt.xlabel('Number of clusters $k$', fontsize = 14);
plt.ylabel('$k$-means cost function', fontsize = 14);
#plt.savefig('ElbowSmiley.png', dpi = 300)

... and this is for the blobs.

In [None]:
inertiasb = []
for k in range(2, 10):
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(Xb)
    inertiasb.append(kmeans.inertia_)

In [None]:
fig = plt.figure(figsize=(8, 6))
plt.plot(range(2, 10), inertiasb)
#plt.grid(True)
plt.title('Elbow curve for blobs');
plt.xlabel('Number of clusters $k$', fontsize = 14);
plt.ylabel('$k$-means cost function', fontsize = 14);
plt.savefig('ElbowBlobs.png', dpi = 300)


### Silhouette score

In [None]:
from sklearn.metrics import silhouette_samples, silhouette_score

In [None]:
#Smiley face

n_clusters = [2,3,4,6]

for n in n_clusters:
    
    model = KMeans(n_clusters = n)

    model.fit(X)

    y_kmeans = model.predict(X)

    silhouette_scores = silhouette_samples(X, y_kmeans)

    xlower = 10

    fig, axs = plt.subplots(1, 2, figsize=(16, 8))
    
    ax = axs[1]
    colors = plt.cm.Accent(y_kmeans.astype(float)/n)
    ax.scatter(X[:, 0], X[:, 1], c=colors, s=40, cmap='flare', edgecolor='k');
    ax.tick_params(axis='both', which='both', labelsize=20);

    ax = axs[0]

    for i in np.unique(y_kmeans):
        ind = y_kmeans==i
        silh = np.sort(silhouette_scores[ind])
        size_cluster_i = silh.shape[0]
        xupper = xlower + size_cluster_i
        color = plt.cm.Accent(float(i)/model.n_clusters)
        ax.fill_between(np.arange(xlower, xupper), 0, silh, facecolor=color, edgecolor=color, alpha=0.7)
        ax.axhline(y=0, c='k', lw=2)
        ax.text(0.05, 0.95, '%0.0f clusters'%n, transform=ax.transAxes, fontsize=20)
        ax.text(0.45, 0.95, 'Mean S. score: %0.2f'%np.mean(silhouette_scores), transform=ax.transAxes, fontsize=20)
        xlower = xupper + 10
        ax.set_ylabel('Silhouette score', fontsize=16)
        ax.set_ylim(-0.2,0.8)
        
    ax.axhline(y=np.mean(silhouette_scores), color="red", linestyle="--")
    ax.tick_params(axis='both', which='both', labelsize=20);
    ax.set_xticks([]);
#    figname = 'SilhouetteSmiley'+str(n)+'.png'
#    plt.savefig(figname, dpi = 300)

### Learning Check-in
    
Q: A mean silhouette score closer to 1 means there is littler overlap between the clusters. With that in mind, what is the best Mean S. Score we see above?

<details>
<summary style="display: list-item;">Answer</summary>
<p>
The best score we see is a 0.50
</p>
</details>

In [None]:
#Blobs 

n_clusters = np.arange(2, 6)

for n in n_clusters:
    
    model = KMeans(n_clusters = n)

    model.fit(Xb)

    y_kmeans = model.predict(Xb)
    
    silhouette_scores = silhouette_samples(Xb, y_kmeans)

    xlower = 10

    fig, axs = plt.subplots(1, 2, figsize=(16, 8))
    
    ax = axs[1]
    colors = plt.cm.Accent(y_kmeans.astype(float)/n)
    ax.scatter(Xb[:, 0], Xb[:, 1], c=colors, s=40, cmap='flare', edgecolor='k');
    ax.tick_params(axis='both', which='both', labelsize=20);

    ax = axs[0]

    for i in np.unique(y_kmeans):
        ind = y_kmeans==i
        silh = np.sort(silhouette_scores[ind])
        size_cluster_i = silh.shape[0]
        xupper = xlower + size_cluster_i
        color = plt.cm.Accent(float(i)/model.n_clusters)
        ax.fill_between(np.arange(xlower, xupper), 0, silh, facecolor=color, edgecolor=color, alpha=0.7)
        ax.axhline(y=0, c='k', lw=2)
        ax.text(0.05, 0.95, '%0.0f clusters'%n, transform=ax.transAxes, fontsize=20)
        ax.text(0.45, 0.95, 'Mean S. score: %0.2f'%np.mean(silhouette_scores), transform=ax.transAxes, fontsize=20)
        xlower = xupper + 10
        ax.set_ylabel('Silhouette score', fontsize=16)
        ax.set_ylim(-0.15,0.85)
        
    ax.axhline(y=np.mean(silhouette_scores), color="red", linestyle="--")
    ax.tick_params(axis='both', which='both', labelsize=20);
    ax.set_xticks([]);
#    figname = 'SilhouetteBlobs'+str(n)+'.png'
#    plt.savefig(figname, dpi = 300)

### Density based clustering.

In [None]:
from sklearn.cluster import DBSCAN

#Probably missing a source here

In [None]:
# #############################################################################
# Compute DBSCAN
db = DBSCAN(eps=0.25, min_samples=2).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)

# #############################################################################

# Black removed and is used for noise instead.
unique_labels = set(labels)
colors = [plt.cm.Spectral(each)
          for each in np.linspace(0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = [0, 0, 0, 1]

    class_member_mask = (labels == k)

    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
             markeredgecolor='k', markersize=14)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
             markeredgecolor='k', markersize=6)

plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()

In [None]:
# #############################################################################
# Compute DBSCAN

for i,eps in enumerate([0.2, 0.25, 0.3, 0.35]):
    
    plt.figure(figsize = (6,6))
    
    db = DBSCAN(eps=eps, min_samples=2).fit(X)

    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise_ = list(labels).count(-1)

    print('Estimated number of clusters: %d' % n_clusters_)
    print('Estimated number of noise points: %d' % n_noise_)

# #############################################################################


# Black removed and is used for noise instead.
    unique_labels = set(labels)
    colors = [plt.cm.Spectral(each)
          for each in np.linspace(0, 1, len(unique_labels))]
    for k, col in zip(unique_labels, colors):
        if k == -1:
        # Black used for noise.
            col = [0, 0, 0, 1]

        class_member_mask = (labels == k)

        xy = X[class_member_mask & core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
             markeredgecolor='k', markersize=10)

        xy = X[class_member_mask & ~core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
             markeredgecolor='k', markersize=6)

    plt.title('$\epsilon$ = %0.2f; estimated number of clusters: %d' % (eps, n_clusters_))
    
   # plt.savefig('DBSCAN_'+str(i)+'.png', dpi = 300)
    
    

### OPTICS

In [None]:
from sklearn.cluster import OPTICS

Partial source:
    
https://scikit-learn.org/stable/auto_examples/cluster/plot_optics.html

In [None]:
# #############################################################################

op = OPTICS(xi=0.05, min_cluster_size=.05).fit(X)

labels = op.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)

# Black removed and is used for noise instead.
unique_labels = np.unique(labels)

colors = [plt.cm.Spectral(each)
          for each in np.linspace(0, 1, len(unique_labels))]

plt.figure(figsize=(6,6))

for klass, color in zip(unique_labels[1:], colors[1:]):

    Xk = X[op.labels_ == klass]
    plt.scatter(Xk[:, 0], Xk[:, 1], 60, np.array([color,]),\
                'o', edgecolors='k',linewidths=1, )#, ls = 'None')
    
plt.plot(X[op.labels_ == -1, 0], X[op.labels_ == -1, 1], 'k+', ls = 'None')

plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()

#### Changing the parameter xi will change the estimate.

In [None]:
# #############################################################################

op = OPTICS(xi=0.2, min_cluster_size=.05).fit(X)

labels = op.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)

# Black removed and is used for noise instead.
unique_labels = np.unique(labels)

colors = [plt.cm.Spectral(each)
          for each in np.linspace(0, 1, len(unique_labels))]

plt.figure(figsize=(6,6))

for klass, color in zip(unique_labels[1:], colors[1:]):

    Xk = X[op.labels_ == klass]
    plt.scatter(Xk[:, 0], Xk[:, 1], 60, np.array([color,]),\
                'o', edgecolors='k',linewidths=1, )#, ls = 'None')
    
plt.plot(X[op.labels_ == -1, 0], X[op.labels_ == -1, 1], 'k+', ls = 'None')

plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()

### GMM

In [None]:
from sklearn import mixture

In [None]:
from matplotlib.patches import Ellipse

In [None]:
#Functions taken from Vanderplas' book (Data Science w Python)

def draw_ellipse(gmm, ax, **kwargs):
    """Draw an ellipse with a given position and covariance"""
    for n in range(gmm.n_components):
        if gmm.covariance_type == 'full':
            covariances = gmm.covariances_[n]
        elif gmm.covariance_type == 'tied':
            covariances = gmm.covariances_
        elif gmm.covariance_type == 'diag':
            covariances = np.diag(gmm.covariances_[n])
        elif gmm.covariance_type == 'spherical':
            covariances = np.eye(gmm.means_.shape[1]) * gmm.covariances_[n]
        v, w = np.linalg.eigh(covariances)
        u = w[0] / np.linalg.norm(w[0])
        angle = np.arctan2(u[1], u[0])
        angle = 180 * angle / np.pi  # convert to degrees
        v = 2. * np.sqrt(2.) * np.sqrt(v)
        
        # Draw the Ellipse
        for nsig in range(1, 4): #1, 2, and 3 sigma
            ell = Ellipse(gmm.means_[n], nsig *v[0], nsig *v[1], angle, **kwargs)
            ax.add_patch(ell)


In [None]:
#Adapted from Vanderplas
 
fig, ax = plt.subplots(1, 3, figsize=(14, 4), sharey=True)
fig.subplots_adjust(wspace=0.05)

rng = np.random.RandomState(20)
Xe = np.dot(rng.randn(500, 2), rng.randn(2, 2))

for i, cov_type in enumerate(['full','diag', 'spherical']):
    model = mixture.GaussianMixture(n_components=1, covariance_type=cov_type).fit(X)
    ax[i].axis('equal')
    ax[i].scatter(Xe[:, 0], Xe[:, 1], edgecolor='k', alpha=0.5)
    ax[i].set_title('covariance_type="{0}"'.format(cov_type), size=14, family='monospace')
    draw_ellipse(gmm=model, ax=ax[i], alpha=0.1, edgecolor='k', facecolor='#808080')
    ax[i].xaxis.set_major_formatter(plt.NullFormatter())
    ax[i].yaxis.set_major_formatter(plt.NullFormatter())
    
    ax[i].set_xlim(-5, 5)
    
#plt.savefig('GMM_Covariances.png', dpi = 300)
    
#plt.show()
    #plt.xlim(-5, 5)

### Modeling with a Gaussian Mixture Model predicts probabilities.

In [None]:
model = mixture.GaussianMixture(n_components=3, covariance_type='full',random_state=37) #Good! 0, 1, 2

model.fit(Xb)

y_GMM = model.predict(Xb)

probs = model.predict_proba(Xb)

size = 50 * probs.max(axis=1)**4

In [None]:
y_GMM #(0,1,2)

### GMM decision function for a "full" covariance:

In [None]:
model = mixture.GaussianMixture(n_components=3, covariance_type='full',random_state=37)

model.fit(Xb)

fig, ax = plt.subplots(1, 1, figsize=(8, 6))

plt.axis('equal')

plot_decision_regions(Xb, yb.astype(int), 
        clf=model, legend=0, markers = '...', colors = 'lightgray,violet,teal')

plt.scatter(X1b[:,0],X1b[:,1], s = size[:len(y1b)], c = 'lightgray',edgecolors='k')

plt.scatter(X2b[:,0],X2b[:,1], s = size[len(y1b):len(y1b)+len(y2b)], c = 'violet', edgecolors='k')

plt.scatter(X3b[:,0],X3b[:,1],s = size[len(y1b)+len(y2b):], c = 'teal', edgecolors='k')

ax.set_title('covariance_type="{0}"'.format('full'), size=14, family='monospace')

ax.set_xlim(-2.5, 2.5)
ax.set_ylim(-0.5,2.5)

#plt.savefig('GMM_blobs_full.png', dpi = 300)

### Now with a spherical covariance:

In [None]:
model = mixture.GaussianMixture(n_components=3, covariance_type='spherical',random_state=37)

model.fit(Xb)

fig, ax = plt.subplots(1, 1, figsize=(8, 6))

plt.axis('equal')

plot_decision_regions(Xb, yb.astype(int), 
        clf=model, legend=0, markers = '...', colors = 'lightgray,violet,teal')

plt.scatter(X1b[:,0],X1b[:,1], s = size[:len(y1b)], c = 'lightgray',edgecolors='k')

plt.scatter(X2b[:,0],X2b[:,1], s = size[len(y1b):len(y1b)+len(y2b)], c = 'violet', edgecolors='k')

plt.scatter(X3b[:,0],X3b[:,1],s = size[len(y1b)+len(y2b):], c = 'teal', edgecolors='k')

ax.set_title('covariance_type="{0}"'.format('spherical'), size=14, family='monospace')

ax.set_xlim(-2.5, 2.5)
ax.set_ylim(-0.5,2.5)

#plt.savefig('GMM_blobs_spherical.png', dpi = 300)

### Finally, we generate predictions for the smiley face.

In [None]:
X.shape

In [None]:
gmm4 = mixture.GaussianMixture(n_components=4, covariance_type='full', random_state=0)

gmm4.fit(X)

plt.figure(figsize=(8,6))

xy = [point(1,2,1) for _ in range(100)]

plot_decision_regions(X, y.astype(int), 
        clf=gmm4, legend=0, markers = '.', colors = 'lightgray,yellow,teal,violet')

plt.scatter(*zip(*xy), s = 30, c = 'lightgray', edgecolors='k')
plt.scatter(X1[:,0],X1[:,1], s = 30, c = 'teal',edgecolors='k')
plt.scatter(X2[:,0],X2[:,1], s = 30, c = 'violet', edgecolors='k')
plt.scatter(X3_stretch.T[:,0]-1.9,X3_stretch.T[:,1],s = 30, c = 'yellow', edgecolors='k')


plt.xlim(-0.5,2.5);

plt.ylim(0.5,3.5);

#plt.savefig('GMMbad.png', dpi = 300)

### We can use the BIC criterion to figure out how many components best fit the smiley face in the GMM model.

In [None]:
n_components = np.arange(1, 30)
models = [mixture.GaussianMixture(n, covariance_type='full', random_state=0).fit(X)
          for n in n_components]


fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)
ax.plot(n_components, [m.bic(X) for m in models], label='BIC')
ax.legend(loc='best', fontsize=20)
ax.set_xlabel('n_components', fontsize=20);
ax.tick_params(axis='both', which='both', labelsize=20);
#plt.savefig('GMM_smiley_BIC.png', dpi = 300)

### And re-do the plot with the appropriate number of components.

In [None]:
#These two functions (note that "draw_ellipse" is not the same as before!) are also from Jake Vanderplas' book.

def draw_ellipse(position, covariance, ax=None, **kwargs):
    """Draw an ellipse with a given position and covariance"""
    ax = ax or plt.gca()
    
    # Convert covariance to principal axes
    if covariance.shape == (2, 2):
        U, s, Vt = np.linalg.svd(covariance)
        angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
        width, height = 2 * np.sqrt(s)
    else:
        angle = 0.0
        width, height = 2 * np.sqrt(covariance)
    
    # Draw the Ellipse
    for nsig in range(1, 4):
        ax.add_patch(Ellipse(position, nsig * width, nsig * height, angle, **kwargs))

def plot_gmm(gmm, X, label=True, ax=None):
    ax = ax or plt.gca()
    labels = gmm.fit(X).predict(X)
    if label:
        ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='Accent', zorder=2, edgecolor='k')
    else:
        ax.scatter(X[:, 0], X[:, 1], s=40, zorder=2, edgecolor='k')
    ax.axis('equal')
    
    w_factor = 0.2 / gmm.weights_.max()
    
    for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
        draw_ellipse(pos, covar, facecolor='#808080', edgecolor='k', alpha=w * w_factor)
        
    ax.tick_params(axis='both', which='both', labelsize=20);

In [None]:
gmm10 = mixture.GaussianMixture(n_components=10, covariance_type='full', random_state=0)

fig = plt.figure(figsize=(8, 8))

ax = fig.add_subplot(111, aspect='equal')

plot_gmm(gmm10, X, label=False, ax=ax)

plt.xlim(-0.5,2.5);

plt.ylim(0.5,3.5);

plt.text(-0.3,0.7,'Original', fontsize = 18)

ax.tick_params(axis='both', which='both', labelsize=20);

#plt.savefig('Smiley_GMM_10.png', dpi = 300)


### Finally, we can use our GMM with 10 components as a generative model to generate new samples along the smiley face distribution!

In [None]:
Xnew = gmm10.sample(n_samples=500)

fig = plt.figure(figsize=(8, 8))

ax2 = fig.add_subplot(111, aspect='equal', sharey = ax)

ax2.scatter(Xnew[0][:, 0], Xnew[0][:, 1], s = 40, facecolor='r', edgecolor='k', alpha=0.5);

ax2.tick_params(axis='both', which='both', labelsize=20);

plt.xlim(-0.5,2.5);

plt.ylim(0.5,3.5);

plt.text(-0.3,0.7,'Generated', fontsize = 18)

#plt.savefig('Smiley_GMM_generated.png', dpi = 300)

### Learning Check-in
    
Q: How does the GMM method do at trying to reproduce the smiely face?

<details>
<summary style="display: list-item;">Answer</summary>
<p>
Comparing the two grapgs above, we can see where GMM decided to place various distributions. When we use those distributions to generate new points, it can be slightly off and any inaccuracies in the fit become quite apparent.
    
How do you think any of these cluster methods would fair looking at the generated model? Try it your self!
</p>
</details>