In [None]:
from __future__ import division, print_function
import argparse
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import os
import random


In [None]:
def init_centroids(num_clusters, image):
    """
    Initialize a `num_clusters` x image_shape[-1] nparray to RGB
    values of randomly chosen pixels of`image`
    Parameters
    ----------
    num_clusters : int
        Number of centroids/clusters
    image : nparray
        (H, W, C) image represented as an nparray

    Returns
    -------
    centroids_init : nparray
        Randomly initialized centroids
    """
    
    centroids_init= random.sample(list(image),num_clusters) # number of image dimesions by number of clusters(K selected value)
    return centroids_init
    

In [None]:

def calculate_distance_with_centroids(X,c):
    ''' The function will calculate the distance of intial centroids with image matrix and return the indices of the
    cluster that has minimum distances across the columns'''
    
    idx = np.zeros((np.size(X,0),1)) # total number of (rows*cols) of an RGB matrix(rows,cols,3) by 1
    distance_array = np.empty((np.size(X,0),1)) # Np array to store ditances of size (rows*cols) of an RGB matrix(rows,cols,3)*1
    K = np.size(c,0) # total number of rows of centroids
    for i in range(K):# iterating through total number of clusters
        y = c[i]  # Taking one value of cluster of dimenion 1*4
        temp = np.ones((np.size(X,0),1))*y # making a np array that will have cluster value with dimesnion (rows*cols) of an RGB matrix(rows,cols,3) by 1
        temp_distance = np.power(np.subtract(X,temp),2) # calculating distance using disctance formuala (x2 − x1)2 + (y2 − y1)2
        a = np.sum(temp_distance,axis = 1)  # Taking sum across the columns
        a = np.asarray(a)  # converting the list into an array
        a.resize((np.size(X,0),1)) # resizing it from 1d array to into (rows*cols)-by-1
        distance_array = np.append(distance_array, a, axis=1)# adding a distance of a cluster
    distance_array = np.delete(distance_array,0,axis=1)  # deleting 1st column because of one extra column
    idx = np.argmin(distance_array, axis=1) # Findind minium distance across the column
    return idx




In [None]:
def compute_centroids(X,idx,K):
    '''This function take values of closest neighbours and update the centroids values accordingly
    '''
    n = np.size(X,1) # np array having size of 3 
    centroids = np.zeros((K,n)) # np array of size number of K(choosen 16) by n
    for i in range(K):  # iterating through total number of clusters
        mask = idx==i # Finding minium values across cols
        mask = mask.astype(int) # Converting bool values into numeric
        sumofmask = sum(mask); # Adding values that give true
        mask.resize((np.size(X,0),1)) #total number of (rows*cols) of an RGB matrix(rows,cols,3) by 1
        matrix =np.tile(mask,(1,n)) #array by repeating ci by 1-by-n 
        total = np.multiply(X,matrix) # multiplying image matrix and computed matrix total matrix
        centroids[i] = np.sum(total,axis=0)/float(sumofmask) # calculating mean of centroid values
    return centroids # returining updated centroids

In [None]:
def run_k_means(X,centroids,max_iters):
    '''
    This function will control for how many times we want to update the K neighbour centroids
    '''
    n = np.size(X,1)# np array having size of 3
    m = np.size(X,0) # np array having size of rows*cols
    idx = np.zeros((m,1)) #np array having size of rows*cols by 1
    K = np.size(centroids,0) # np array having size of 16(K-selected value)
    
    for i in range((max_iters)): # running for required iterations
        idx = calculate_distance_with_centroids(X,centroids) # getting neighbours
        centroids = compute_centroids(X,idx,K) # Updating closest neighbours
        
    return centroids,idx

In [None]:
def update_centroids(centroids, image, max_iter=30, print_every=10):
    """
    Carry out k-means centroid update step `max_iter` times

    Parameters
    ----------
    centroids : nparray
        The centroids stored as an nparray
    image : nparray
        (H, W, C) image represented as an nparray
    max_iter : int
        Number of iterations to run
    print_every : int
        Frequency of status update

    Returns
    -------
    new_centroids : nparray
        Updated centroids
    """
    initial_centroids = init_centroids(num_clusters, image)
    centroids,idx = run_k_means(image,initial_centroids,max_iter)
    return centroids
    

In [None]:
def update_image(image, centroids):
    """
    Update RGB values of pixels in `image` by finding
    the closest among the `centroids`

    Parameters
    ----------
    image : nparray
        (H, W, C) image represented as an nparray
    centroids : int
        The centroids stored as an nparray

    Returns
    -------
    image : nparray
        Updated image
    """
    rows = image.shape[0] #Getting number of rows of an image
    cols = image.shape[1] #Getting column of rows of an image
    image = (image-0)/255.0 # Normalizing the pixel values
    image = image.reshape(image.shape[0]*image.shape[1],3) #reshaping it to (rows*cols)by 3

    idx = calculate_distance_with_centroids(image,centroids)  
    new_img_matrix = centroids[idx]
    new_img_matrix = np.reshape(new_img_matrix, (rows, cols, 3))
    
    return new_img_matrix


In [1]:
max_iter = 10
print_every = 2
image_path_small = 'peppers-small.tiff'
image_path_large = 'peppers-large.tiff'
num_clusters = 16
figure_idx = 0

# Load small image
image = np.copy(mpimg.imread(image_path_small))
print('[INFO] Loaded small image with shape: {}'.format(np.shape(image)))
plt.figure(figure_idx)
figure_idx += 1
plt.imshow(image)
plt.title('Original small image')
plt.axis('off')
savepath = os.path.join('.', 'orig_small.png')
plt.savefig(savepath, transparent=True, format='png', bbox_inches='tight')

# Initialize centroids
print('[INFO] Centroids initialized')
rows = image.shape[0]
cols = image.shape[1]
image = (image-0)/255.0 # Normalizing the pixel values
image = image.reshape(image.shape[0]*image.shape[1],3)

centroids_init = init_centroids(num_clusters, image)

# Update centroids
print(25 * '=')
print('Updating centroids ...')
print(25 * '=')
centroids = update_centroids(centroids_init, image, max_iter, print_every)

# # Load large image
image = mpimg.imread(image_path_large)

print('[INFO] Loaded large image with shape: {}'.format(np.shape(image)))
plt.figure(figure_idx)
figure_idx += 1
plt.imshow(image)
plt.title('Original large image')
plt.axis('off')
savepath = os.path.join('.', 'orig_large.png')
plt.savefig(fname=savepath, transparent=True, format='png', bbox_inches='tight')

print(25 * '=')
print('Updating large image ...')
print(25 * '=')
image_clustered = update_image(image, centroids)

plt.figure(figure_idx)
figure_idx += 1
plt.imshow(image_clustered)
plt.title('Updated large image')
plt.axis('off')
savepath = os.path.join('.', 'updated_large.png')
plt.savefig(fname=savepath, transparent=True, format='png', bbox_inches='tight')

print('\nCOMPLETE')
plt.show()


NameError: name 'np' is not defined