#### KMeans

##### Image Preparation
1) Open the image.
2) Convert to an array (width,height,rgb)
3) Convert to an array (rgb, width*height)

##### KMeans Application
4) Fit k-means from converted array.
5) (Predict for converted array? Or, just use labels from fitting?)
6) Create array of fit/predicted labels and centroids.

##### Image Recovery
7) Reshape fit-array into image size.
8) Convert to image from array.
9) Enjoy :)

In [11]:
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans

In [12]:
def reshapeImageAsArray(image):
    '''Assumes image is an Image object with only RGB values.
    That is, image.shape = (columns,rows,3).  Returns an array
    with 3 rows and imagecolumns*imagerows columns.'''
    imarray = np.asarray(image)
    if imarray.shape[2]!=3:
        return "Image does not have just RGB values."
    
    rows, columns = imarray.shape[0], imarray.shape[1]
    newimarray = np.empty([3,rows*columns])
    for i in range(rows):
        for j in range(columns):
            newimarray[:,i*columns+j] = imarray[i][j]
    return np.transpose(newimarray)

def returnArrayToImage(imarray,rows,columns):
    '''Just returns the array back into the correct image ratio
    and returns an image from that back-converted array.'''
    imarray = np.transpose(imarray)
    newimarray = np.empty([rows,columns,3])
    for i in range(rows*columns):
        newimarray[int(i/columns)][i%columns] = imarray[:,i]
    return Image.fromarray((newimarray*1).astype(np.uint8))

def kmeanspp(imageurl,k,max_it):
    '''performs kmeans++ on an image at imageurl.  k is the number 
    of centroids, max_it is the max iteration count.'''
    #load image from imageurl
    picture = Image.open(imageurl)
    #Take data on image shape.
    rows, columns = np.asarray(picture).shape[0], np.asarray(picture).shape[1]
    #Convert the image to an array of pixel data.
    # Might be equivalent to Image.getdata()
    imarray = reshapeImageAsArray(picture)
    #Perform kmeans++ on the image with k clusters and max_it max iteration count.
    kmeans = KMeans(n_clusters = k, init = 'k-means++', max_iter = max_it).fit(imarray)
    #Use labels and centroids to form an array to be converted to the new image.
    kmeansarray = np.array([kmeans.cluster_centers_[kmeans.labels_[i]] for i in range(len(kmeans.labels_))])
    #Return the image after reshaping the array into the proper form of the og image.
    return returnArrayToImage(kmeansarray,rows,columns)

In [13]:
#Insert picture url here:
url = 'dummyname.jpg'
#Change to numer of clusters
kay = 3
#Set higher if you'd like closer approximations.
max_iter = 200
#Save output as [insert filename]:
filename = "filename.jpg"

In [14]:
#UNCOMMENT AND RUN THE KMEANS ALGORITHM ON THE PROVIDED IMAGE AT URL ABOVE.
#im = kmeanspp(imageurl=url,k=kay,max_it=max_iter)

In [15]:
#Saves the output of the kmeans program to an image
#im.save(filename,format='JPEG')