In [20]:
from __future__ import absolute_import

import random
import numpy as np

import matplotlib.pyplot as plt
import cv2

import scipy.io
import scipy.misc

In [None]:
def choose_random_centroids(samples, K):
    """
    Randomly choose K centroids from samples.
    :param samples: samples.
    :param K: K as in K-means. Number of clusters.
    :return: an array of centroids.
    """
    shuffle_samples = np.random.permutation(samples)

    centroids = shuffle_samples[0:K,:]
    
    return centroids

In [21]:


def find_closest_centroids(samples, centroids):
    """
    Find the closest centroid for all samples.

    :param samples: samples.
    :param centroids: an array of centroids.
    :return: a list of cluster_id assignment.
    """

    results = []

    for row in range(samples.shape[0]):
        diff = samples[row] - centroids
        distance = (diff[:,0] **2 + diff[:,1] **2 + diff[:,2] **2) ** .5
        results.append(np.argmin(distance)) # return index - cluster assignment
        
    return results

In [24]:
def run_k_means(samples, centroids, n_iter):
    """
    Run K-means algorithm. The number of clusters 'K' is defined by the size of initial_centroids
    :param samples: samples.
    :param initial_centroids: a list of initial centroids.
    :param n_iter: number of iterations.
    :return: a pair of cluster assignment and history of centroids.
    """

    centroid_history = []
    current_centroids = centroids
    clusters = []
    for iteration in range(n_iter):
        centroid_history.append(current_centroids)
        print("Iteration %d, Finding centroids for all samples..." % iteration)
        clusters = find_closest_centroids(samples, current_centroids)
        print("Recompute centroids...")
        current_centroids = get_centroids(samples, clusters)

    return clusters, centroid_history

In [None]:
print(y)

In [30]:
def get_centroids(samples, clusters):
    """
    Find the centroid given the samples and their cluster.
    :param samples: samples.
    :param clusters: list of clusters corresponding to each sample.
    :return: an array of centroids.
    """
    cluster_label = np.unique(clusters)
    centroids = []

    for label in cluster_label:
        rows_cluster = [] #store pixel rows for each cluster
        for i in range(len(clusters)):
            if clusters[i] == label:
                rows_cluster.append(i)
            
        cluster_mean = np.mean(samples[rows_cluster,:], axis=0)
        centroids.append(cluster_mean.reshape(1,3))
    new_centroids = np.concatenate(centroids, axis=0)

    return new_centroids

In [13]:
img = cv2.imread('boston-1993606_1280.jpg')
depth, rows, columns = img.shape
samples = img.reshape(depth*rows, columns)
centroids = choose_random_centroids(samples, 4)

In [14]:
print(centroids)

[[ 77  67  50]
 [150 147 139]
 [163 180 201]
 [104 115 137]]


In [17]:
x = find_closest_centroids(samples, centroids)
print(len(x))

1088000


In [31]:
y = run_k_means(samples, centroids, 3) #clusters and centroid history

Iteration 0, Finding centroids for all samples...
Recompute centroids...
Iteration 1, Finding centroids for all samples...
Recompute centroids...
Iteration 2, Finding centroids for all samples...
Recompute centroids...


In [35]:
print(len(y))

2


In [47]:
print(len(y[1]))
print(y[1][0].shape)
print(y[1][1].shape)
print(y[1][2].shape)

3
(4, 3)
(4, 3)
(4, 3)


In [54]:

print(y[1][1])

[[129.71205826 135.38550623 127.12477715]
 [131.5316525  136.13044069 127.46871233]
 [126.98501663 134.59481918 128.19414258]
 [128.83861976 134.71961494 127.27018782]]


In [57]:


img = cv2.imread('boston-1993606_1280.jpg')
depth, rows, col = img.shape

#reshape array
samples = img.reshape(depth * rows, 3)

#choose centroids, select k value
centroids = choose_random_centroids(samples, 4)

#k-means
clusters, centroid_history = run_k_means(samples, centroids, 20)




Iteration 0, Finding centroids for all samples...
Recompute centroids...
Iteration 1, Finding centroids for all samples...
Recompute centroids...
Iteration 2, Finding centroids for all samples...
Recompute centroids...
Iteration 3, Finding centroids for all samples...
Recompute centroids...
Iteration 4, Finding centroids for all samples...
Recompute centroids...
Iteration 5, Finding centroids for all samples...
Recompute centroids...
Iteration 6, Finding centroids for all samples...
Recompute centroids...
Iteration 7, Finding centroids for all samples...
Recompute centroids...
Iteration 8, Finding centroids for all samples...
Recompute centroids...
Iteration 9, Finding centroids for all samples...
Recompute centroids...
Iteration 10, Finding centroids for all samples...
Recompute centroids...
Iteration 11, Finding centroids for all samples...
Recompute centroids...
Iteration 12, Finding centroids for all samples...
Recompute centroids...
Iteration 13, Finding centroids for all samples.

In [66]:
replaced_pixel = [] #centroid and their cluster
for cluster in clusters:
    best_centroid = np.array(centroid_history[-1][cluster]) # replace centroids with latest iteration 
    replaced_pixel.append(best_centroid)
new_image = np.concatenate(replaced_pixel) # grouping into np array
new_image_int = new_image.astype('uint8') # converting data types
final_image = new_image_int.reshape(depth,rows,columns)  # reshaping the same as input image

In [69]:
print(len(replaced_pixel))
print(replaced_pixel[0:5])

1088000
[array([206.58368875, 191.05737575, 172.13449983]), array([206.58368875, 191.05737575, 172.13449983]), array([206.58368875, 191.05737575, 172.13449983]), array([206.58368875, 191.05737575, 172.13449983]), array([206.58368875, 191.05737575, 172.13449983])]


In [None]:
print(replaced_pixel)

In [63]:
print(len(centroid_history))
print(centroid_history[-1]) # 4 clusters, 4 centroids

20
[[ 88.29754048 107.61447301 102.82333081]
 [206.58368875 191.05737575 172.13449983]
 [ 41.10136071  55.89159297  50.3182471 ]
 [127.66680955 142.80023209 145.11997435]]


In [None]:
# reassign 