In [1]:
import cv2
import numpy as np
import pandas as pd
import random
import sys
from collections import defaultdict

In [2]:
img = cv2.imread('image.jpg') 

In [4]:
pixel_values = img.reshape((-1, 3))
pixel_values = np.array(pixel_values, dtype='int64')

array([[212, 167, 129],
       [213, 168, 130],
       [213, 169, 128],
       ...,
       [204, 235, 238],
       [193, 226, 222],
       [156, 190, 184]])

In [None]:
# K-means++ Algorithm

def distance(a, b):
    """
    Returns the Euclidean distance between a, b
    """
    return sum([(a - b) ** 2 for a, b in zip(a, b)]) ** 0.5

def distance_squared(a, b):
    """
    Returns the squared distance between a, b
    """
    return distance(a, b) ** 2

def generate_k_pp(dataset, k):
    """
    Given `data_set`, which is an array of arrays,
    return a random set of k points from the data_set
    """
    center = random.choice(dataset)
    k_points = [center]

    while len(k_points) < k:
        prob = []
        for point in dataset:
            prob.append(distance_squared(center, point))
        
        prob = [p/sum(prob) for p in prob]
        center = random.choices(dataset, prob)[0]

        k_points.append(center)

    return k_points

def point_avg(points):
    """
    Accepts a list of points, each with the same number of dimensions.   
    Returns a new point which is the center of all the points.
    """
    n = len(points)
    if n != 0:
        x = sum([p[0] for p in points]) / n
        y = sum([p[1] for p in points]) / n
        z = sum([p[2] for p in points]) / n
    return [x, y, z]

def update_centers(dataset, assignments):
    """
    Accepts a dataset and a list of assignments.
    Computes the center for each of the assigned groups.
    Returns `k` centers in a list.
    """
    k = max(assignments) + 1
    clusters = [[] for i in range(k)]
    for pointIndex, pointAssignment in enumerate(assignments):
        clusters[pointAssignment].append(dataset[pointIndex])

    new_centers = []
    for cluster in clusters:
        new_centers.append(point_avg(cluster))

    return new_centers

def assign_points(data_points, centers):
    """
    Accepts a list of data_points and centers.
    Returns the names of the clusters that each data point belongs to
    """
    assignments = []
    for point in data_points:
        shortest = sys.maxsize  # positive infinity
        shortest_index = 0
        for i in range(len(centers)):
            val = distance(point, centers[i])
            if val < shortest:
                shortest = val
                shortest_index = i
        assignments.append(shortest_index)
    return assignments

def lloyds_algo(dataset, k_points):
    """
    Accepts a dataset and initial k points as centers.
    Repeat the algorithm until convergence.
    Returns the final clusters.
    """
    assignments = assign_points(dataset, k_points)
    old_assignments = None
    while assignments != old_assignments:
        new_centers = update_centers(dataset, assignments)
        old_assignments = assignments
        assignments = assign_points(dataset, new_centers)
    clustering = defaultdict(list)
    for assignment, point in zip(assignments, dataset):
        clustering[assignment].append(point)
    return clustering

def k_means_pp(dataset, k):
    """
    Runs k-means++ clustering algorithm
    """
    if k not in range(1, len(dataset)+1):
        raise ValueError("lengths must be in [1, len(dataset)]")

    k_points = generate_k_pp(dataset, k)
    return lloyds_algo(dataset, k_points)

In [None]:
# run the kmeans++ algorithm to find clusters
clustering = k_means_pp(pixel_values, 2)

In [None]:
# replace the pixels with the value of their cluster center
centers = []
for idx in clustering:
    center = point_avg(clustering[idx])
    centers.append(center)

for idx in clustering:
    for value in clustering[idx]:
        value = centers[idx]
    
manipulated_img = clustering
manipulated_img = manipulated_img.reshape(image.shape)

In [None]:
# show the manipulated image
cv2.imshow(‘Display Window’, manipulated_img) 
cv2.waitKey(0)
cv2.destroyAllWindows()