## KMEANS

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import PIL as pil

def kmeans(img_1d, k_clusters, max_iter, init_centroids='random'):
    # height <-> row, width <-> col, depth = 3 (RGB)
    height, width, depth = img_1d.shape

    # reshape 3D -> 2D with (height * width) row and each row has 3 elements(RGB)
    reshapedIMG = img_1d.reshape(height * width, depth)

    # initial centroid with k clusters
    if init_centroids == "random":
        centroids = np.random.choice(256, size=(k_clusters, depth)) #2D array with k_cluster row, each row has width element
    elif init_centroids == "in_pixels":
        centroids = reshapedIMG[np.random.choice(height * width, k_clusters)] 

    # find the final centroids
    for _ in range(max_iter):
        # previous centroid and current newly created centroid (to check converge)
        prevCentroid = centroids

        # find the distances between pixels and centroids
        dist = np.zeros((reshapedIMG.shape[0], k_clusters))
        for k in range(k_clusters):
            vectorSubtract = np.subtract(reshapedIMG, prevCentroid[k])
            vectorNormalized = np.linalg.norm(vectorSubtract, axis=1)
            dist[:,k] = np.square(vectorNormalized)
        
        # min distance
        label = np.argmin(dist, axis=1) 

        # update new centroid
        # initial the centroids array
        for k in range(k_clusters):
            x = reshapedIMG[label == k, :]
            centroids[k,:] = np.nanmean(x, axis=0)

        # converge
        if np.allclose(prevCentroid, centroids, atol=2):
            break
    
    # assign centroid's value for every pixel
    for k in range(k_clusters):
        reshapedIMG[label == k] = centroids[k]

    # reshape the final image array in to 3D
    reshapedIMG = reshapedIMG.reshape(height, width, depth)

    return reshapedIMG, centroids
    
def main():
    imgName = input("Enter image name: ")
    imgName = "1.jpg"
    imgExt = input("Enter image extension: ")

    img = pil.Image.open(imgName)
    npArray = np.array(img)

    k_clus = 1000
    newIMG, centroids = kmeans(npArray, k_clusters=k_clus, max_iter=1000, init_centroids="in_pixels")
    
    newImgName = imgName.partition(".")[0] + str(k_clus) + "." + imgExt
    plt.imshow(newIMG)

    plt.imsave(newImgName, newIMG)

    print("Centroids: ")
    print(centroids)

    print("Image array: ")
    print(newIMG)


main()


In [76]:
from sklearn.cluster import KMeans

# I = pil.Image.open("1.jpg")
# a = np.asarray(I,dtype=np.float32)/255
# w, h = I.size
# colors = I.getcolors(w * h)
# num_colors = len(colors) 
# num_pixels = w*h 
# x, y, z = a.shape
# a1 = a.reshape(x*y, z)
# n = 60
# k_means = KMeans(n_clusters=n)
# k_means.fit(a1)
# centroids = k_means.cluster_centers_
# labels = k_means.labels_
# a2 = centroids[labels]
# a3 = a2.reshape(x,y,z)
# plt.imshow(a3)