In [None]:
import numpy as np
import matplotlib.pyplot as plt
from utils import *

%matplotlib inline

Implementing K-means
The K-means algorithm is a method to automatically cluster similar data points together.

Concretely, you are given a training set  {𝑥(1),...,𝑥(𝑚)} , and you want to group the data into a few cohesive “clusters”.
K-means is an iterative procedure that
Starts by guessing the initial centroids, and then
Refines this guess by
Repeatedly assigning examples to their closest centroids, and then
Recomputing the centroids based on the assignments.


In [None]:
# Function for finding the closest centroid to each datapoint.
def find_closest_centroids(X, centroids):

    K = centroids.shape[0]

    idx = np.zeros(X.shape[0], dtype=int)

    for i in range(X.shape[0]):
        dist=[]
        for j in range(K):
            er = np.linalg.norm(X[i]-centroids[j])
            dist.append(er)
            idx[i]= np.argmin(dist)
    return idx

In [None]:
# Function for finding the new centroids by taking the average of the coordinate of the data points. 
def compute_centroids(X, idx, K):

    m, n = X.shape

    centroids = np.zeros((K, n))

    for k in range(K):
        points = X[idx==k,:]
        centroids[k]= np.mean(points, axis=0)

    return centroids

In [None]:
# Function to run K-means algorithm
def run_kMeans(X, initial_centroids, max_iters=10, plot_progress=False):

    m, n = X.shape
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids    
    idx = np.zeros(m)

    for i in range(max_iters):

        print("K-Means iteration %d/%d" % (i, max_iters-1))
        
        idx = find_closest_centroids(X, centroids)

        if plot_progress:
            plot_progress_kMeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids

        centroids = compute_centroids(X, idx, K)
    plt.show() 
    return centroids, idx

In [None]:
# Function for randomly initializing the centroids. 
def kMeans_init_centroids(X, K):

    randidx = np.random.permutation(X.shape[0])

    centroids = X[randidx[:K]]
    
    return centroids