#### KMeans

##### Image Preparation
1) Open the image.
2) Convert to an array (width,height,rgb)
3) Convert to an array (rgb, width*height)

##### KMeans Application
4) Fit k-means from converted array.
5) (Predict for converted array? Or, just use labels from fitting?)
6) Create array of fit/predicted labels and centroids.

##### Image Recovery
7) Reshape fit-array into image size.
8) Convert to image from array.
9) Enjoy :)

In [1]:
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans

In [2]:
def reshapeImageAsArray(image):
    # Need to check the indexing.  *Some images are rotated*
    '''Assumes image is an Image object with only RGB values.
    That is, image.shape = (columns,rows,3).  Returns an array
    with 3 rows and imagecolumns*imagerows columns.'''
    imarray = np.asarray(image)
    if imarray.shape[2]!=3:
        return "Image does not have just RGB values."
    
    rows, columns = imarray.shape[0], imarray.shape[1]
    newimarray = np.empty([3,rows*columns])
    for i in range(rows):
        for j in range(columns):
            newimarray[:,i*columns+j] = imarray[i][j]
    return np.transpose(newimarray)

def returnArrayToImage(imarray,rows,columns):
    # Need to check the indexing.  *Some images are rotated*
    '''Just returns the array back into the correct image ratio
    and returns an image from that back-converted array.'''
    imarray = np.transpose(imarray)
    newimarray = np.empty([rows,columns,3])
    for i in range(rows*columns):
        newimarray[int(i/columns)][i%columns] = imarray[:,i]
    return Image.fromarray((newimarray*1).astype(np.uint8))

def kmeanspp(imageurl,k,max_it):
    '''performs kmeans++ on an image at imageurl.  k is the number 
    of centroids, max_it is the max iteration count.'''
    #load image from imageurl
    picture = Image.open(imageurl)
    #Take data on image shape.
    rows, columns = np.asarray(picture).shape[0], np.asarray(picture).shape[1]
    #Convert the image to an array of pixel data.
    # Might be equivalent to Image.getdata()
    imarray = reshapeImageAsArray(picture)
    #Perform kmeans++ on the image with k clusters and max_it max iteration count.
    kmeans = KMeans(n_clusters = k, init = 'k-means++', max_iter = max_it).fit(imarray)
    #Use labels and centroids to form an array to be converted to the new image.
    kmeansarray = np.array([kmeans.cluster_centers_[kmeans.labels_[i]] for i in range(len(kmeans.labels_))])
    #Return the image after reshaping the array into the proper form of the og image.
    return returnArrayToImage(kmeansarray,rows,columns)

def getKMeansLabels(imageurl,k,max_it):
    '''performs kmeans++ on an image at imageurl.  k is the number 
    of centroids, max_it is the max iteration count. Returns an
    array of the labels for each pixel (i.e. which centroid each pixel
    is predicted to be closest to.)'''
    #load image from imageurl
    picture = Image.open(imageurl)
    #Take data on image shape.
    rows, columns = np.asarray(picture).shape[0], np.asarray(picture).shape[1]
    #Convert the image to an array of pixel data.
    # Might be equivalent to Image.getdata()
    imarray = reshapeImageAsArray(picture)
    #Perform kmeans++ on the image with k clusters and max_it max iteration count.
    kmeans = KMeans(n_clusters = k, init = 'k-means++', max_iter = max_it).fit(imarray)
    return kmeans.labels_

def recolorKMeans(imLabels, colorlist,rows,columns):
    '''Given a color list of length equal to the number of colors,
    where each color is a (3,) array of RGB values in [0,255],
    in a K-Means k value and an array imLabelArray which contains
    the labels for each pixel from a K-means algorithm, this method
    recolors each pixel with the colors in colorlist instead of in
    the usual k-means result, which replaces the labels with the 
    color centroids found in the k-means process. Also requires the
    rows and columns of the original image.'''
    #Reshapes color vectors into (3,) if they are (,3)
    if colorlist[0].shape != (3,):
        return "Color vectors are not (3,)."
    recoloredArray = np.array([colorlist[imLabels[i]] for i in range(len(imLabels))])
    return returnArrayToImage(recoloredArray,rows,columns)

def kmeansppBatch(urllist,klist,max_iter):
    for url in urllist:
        for kay in klist:
            im = kmeanspp(imageurl=url,k = kay,max_it = max_iter)
            im.save("Keq"+str(kay)+url)
            print("Keq"+str(kay)+url+" is complete.")
            

#### Example of running kmeans on one image.

In [3]:
#Insert picture url here:
#url = 'YOURURLHEREASSTRING'
#Change to numer of clusters
#kay = 3
#Set higher if you'd like closer approximations.
#max_iter = 50
#Save output as [insert filename]:
#filename = "MARIOSMUSTACHE"

#### Example of saving a resultign image

In [4]:
#UNCOMMENT AND RUN THE KMEANS ALGORITHM ON THE PROVIDED IMAGE AT URL ABOVE.
#im = kmeanspp(imageurl=url,k=kay,max_it=max_iter)

In [5]:
#Saves the output of the kmeans program to an image
#im.save(str(filename+str(kay)+".jpg"),format='JPEG')

#### Example of kmeans Batch on multiple images

In [6]:
# Or provide a list of urls and this will take care of all of them!
# This autmoatically saves the output of each image.  It is intended to be
# a batch process.
#
# NEED TO CORRECT: Overrides previously saved files with the same name.
# This is hazardous behavior.  Needs to be corrected. Corrected Oct 14.
#urlList = ["IMG_3159.jpg","IMG_3193.jpg","IMG_3206.jpg","IMG_3264.jpg","IMG_3270(1).jpg","IMG_3292(1).jpg","IMG_3302.jpg"]
#kList = [2,3,4,5]
#max_iter = 50

#kmeansppBatch(urlList,kList,max_iter)

#### Example of recoloring an image from kmeans labels

In [7]:
# To recolor we need the labels from the kmeans process.
# But, this takes a lot of time.  So we make a memo of 
# the results instead of rerunning kmeans each time we
# want to recolor our image.

#labels = getKMeansLabels("IMG_7702.jpg",k=8,max_it=50)
#picture = Image.open("IMG_7702.jpg")
#rows, columns = np.asarray(picture).shape[0], np.asarray(picture).shape[1]

In [8]:
#With the labels already found from the kmeans process, recoloring can easily and
# (relatively) quickly be done.

#colorlist = np.array([[0,0,0],[32,32,32],[64,64,64],[96,96,96],[128,128,128],[160,160,160],[192,192,192],[224,224,224]])
#recolorKMeans(labels,colorlist,rows,columns)