In [32]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [33]:

def find_closest_centroids(X, centroids):

    # Set K
    K = centroids.shape[0]

    # You need to return the following variables correctly
    idx = np.zeros(X.shape[0], dtype=int)
    for i in range(X.shape[0]):
      # Array to hold distance between X[i] and each centroids[j]
      distance = [] 
      for j in range(centroids.shape[0]):
          norm_ij = np.linalg.norm(X[i] - centroids[j])
          distance.append(norm_ij)

      idx[i] = np.argmin(distance)
    return idx

In [34]:
def compute_centroids(X, idx, K):
    
    # Useful variables
    m, n = X.shape
    
    # You need to return the following variables correctly
    centroids = np.zeros((K, n))
    
    ### START CODE HERE ###
    for k in range(K):   
          points = X[idx == k]  
          centroids[k] = np.mean(points, axis = 0)

    ### END CODE HERE ## 
    
    return centroids

In [29]:
def plot_kmeans(X, centroids, previous, idx, K, axes):
    for i, ax in enumerate(axes):
        sns.scatterplot(x=X[:,0], y=X[:,1], hue=idx.ravel(), legend=False, palette=['r', 'g', 'b'], ax=ax)
        if i==0:
            sns.scatterplot(x=centroids[:,0], y=centroids[:,1], marker='X', color='k', legend=False, s=100, ax=ax)
        else:
            sns.scatterplot(x=centroids[:,0], y=centroids[:,1], marker='X', color='k', legend=False, s=100, ax=ax)
        for i in range(centroids.shape[0]):
            ax.plot([centroids[i,0], previous[i,0]], [centroids[i,1], previous[i,1]], '--k');  

In [37]:
def run_kMeans(X, initial_centroids, max_iters, plot_progress):
    if plot_progress:
        ncols = 3
        nrows = int(max_iters/ncols)
        if max_iters % ncols > 0:
            nrows = nrows + 1
        fig, axes = plt.subplots(nrows=nrows,ncols=ncols,figsize=(20,nrows*8))
        ax_tuple = list(np.ndindex(nrows,ncols))
        for ax in ax_tuple[max_iters:]:
            axes[ax].set_axis_off()
        ax_tuple = ax_tuple[:max_iters]
        
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids

    for i in range(max_iters):
        idx = find_closest_centroids(X, centroids)
        
        if plot_progress:
            plot_axes = [axes[axi] for axi in ax_tuple[i:]]
            axes[ax_tuple[i]].set_title('K-Means iteration {0}/{1}'.format(i+1, max_iters))
            plot_kmeans(X, centroids, previous_centroids, idx, K, plot_axes)
            previous_centroids = centroids
            previous_ax = plt.gca()
        else:
            print('K-Means iteration {0}/{1}'.format(i+1, max_iters))
        centroids = compute_centroids(X, idx, K)
        print(idx)
    
    if plot_progress:
        plt.show()
    return centroids, idx

In [45]:
# X = np.array([[10,20],[30,40],[50,20],[60,30],[40,50]])
X = np.random.randint(1,10, size=(20, 2))
print(X)
# Set initial centroids
initial_centroids = np.array([[3,3],[6,2],[8,5]])
K = 3

# Number of iterations
max_iters = 10


centroids, idx = run_kMeans(X, initial_centroids, max_iters, False)
print(idx)

[[2 2]
 [6 9]
 [2 9]
 [5 9]
 [7 7]
 [2 5]
 [4 8]
 [2 6]
 [9 2]
 [7 6]
 [2 4]
 [9 4]
 [9 4]
 [8 7]
 [5 7]
 [7 9]
 [4 5]
 [4 5]
 [7 8]
 [6 3]]
K-Means iteration 1/10
[0 2 0 2 2 0 2 0 1 2 0 2 2 2 2 2 0 0 2 1]
K-Means iteration 2/10
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
K-Means iteration 3/10
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
K-Means iteration 4/10
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
K-Means iteration 5/10
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
K-Means iteration 6/10
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
K-Means iteration 7/10
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
K-Means iteration 8/10
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
K-Means iteration 9/10
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
K-Means iteration 10/10
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
[0 2 0 2 2 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1]
