# Setup Environment

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# KernelKmeans


In [18]:
import numpy as np
import random
from scipy.spatial.distance import cdist

from PIL import Image, ImageColor
import os

def read_input(filename): 
    image = Image.open(filename)
    data = np.array(image)
    # color data: RGB for each pixel (10000, 3)
    dataC = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
    # spatial data: coordinate for each pixel
    dataS = np.array([(i,j) for i in range(data.shape[0]) for j in range(data.shape[1])])
    return dataC, dataS, image.size

def compute_kernel(gammaS, gammaC, S, C):
    result_k = np.exp(-gammaS*cdist(S, S, 'sqeuclidean'))
    result_k *= np.exp(-gammaC*cdist(C, C, 'sqeuclidean'))
    return result_k

def k_means(Gram_K, k, mode):
    datapoint_his = []

    mean = initial_data(Gram_K, k, mode)
    old_mean = np.zeros(mean.shape, dtype=Gram_K.dtype)
    while np.linalg.norm(mean - old_mean) > 1e-10:
        # E-step: classify all samples
        clusters = np.zeros(Gram_K.shape[0], dtype=int)
        for i in range(Gram_K.shape[0]):
            J = []
            for j in range(k):
                J.append(np.linalg.norm(Gram_K[i] - mean[j]))
            clusters[i] = np.argmin(J)
        datapoint_his.append(clusters)

        # M-step: Update center mean
        old_mean = mean
        mean = np.zeros(mean.shape, dtype=Gram_K.dtype)
        counters = np.zeros(k)
        for i in range(Gram_K.shape[0]):
            mean[clusters[i]] += Gram_K[i]
            counters[clusters[i]] += 1
        for i in range(k):
            if counters[i] == 0:
                counters[i] = 1
            mean[i] /= counters[i]
    print("Total No. of iteration(s):", len(datapoint_his))
    return datapoint_his

def initial_data(Gram_K, k, mode):
    mean_k = np.zeros((k, Gram_K.shape[1]), dtype=Gram_K.dtype)
    
    # normal k-means -> random center
    if mode == 0: 
        center =  np.array(random.sample(range(0, 10000), k))
        mean_k = Gram_K[center,:]
    
    # k-means++
    elif mode == 1: 
        mean_k[0] = Gram_K[np.random.randint(Gram_K.shape[0], size=1), :]
        for cluste_id in range(1, k):
            temp_dist = np.zeros((len(Gram_K), cluste_id))
            for i in range(len(Gram_K)):
                for j in range(cluste_id):
                    temp_dist[i][j] = np.linalg.norm(Gram_K[i]-mean[j])
            dist = np.min(temp_dist, axis=1)
            sum = np.sum(dist) * np.random.rand()
            for i in range(len(Gram_K)):
                sum -= dist[i]
                if sum <= 0:
                    mean_k[cluste_id] = Gram_K[i]
                    break
    return mean_k

def visualplot(datapoint_his, image_size, storename, k, mode, imagename):
    gif = []
    color = [ImageColor.getrgb('darkorange'), ImageColor.getrgb('navy'), ImageColor.getrgb('Brown'), ImageColor.getrgb('greenyellow'), 
             ImageColor.getrgb('purple'), ImageColor.getrgb('silver'), ImageColor.getrgb('gold'), ImageColor.getrgb('MediumAquamarine'),
             ImageColor.getrgb('black'), ImageColor.getrgb('magenta'), ImageColor.getrgb('peru'), ImageColor.getrgb('green'),
             ImageColor.getrgb('yellow'), ImageColor.getrgb('pink'), ImageColor.getrgb('red')
             ]

    iteration = len(datapoint_his)
    for i in range(iteration):
        gif.append(Image.new("RGB", image_size))
        for y in range(image_size[0]):
            for x in range(image_size[1]):
                gif[i].putpixel((x, y), color[datapoint_his[i][y*image_size[0]+x]])

    gif[0].save(storename + f"k_means_gif_{imagename}_mode{mode}_k{k}.gif",
                format='GIF',
                save_all=True,
                append_images=gif[1:],
                duration=400, loop=0)
    gif[-1].save(storename + f"k_means_pic_{imagename}_mode{mode}_K{k}.jpg", format='JPEG')

In [14]:
img1 = '/content/drive/MyDrive/01_GMBA/3rd_semester(2022)/01-2_PM & 04-1_Machine Learning/HW6_1218/ML_HW06/image1.png'
img2 = '/content/drive/MyDrive/01_GMBA/3rd_semester(2022)/01-2_PM & 04-1_Machine Learning/HW6_1218/ML_HW06/image2.png'
storename1 = '/content/drive/MyDrive/01_GMBA/3rd_semester(2022)/01-2_PM & 04-1_Machine Learning/HW6_1218/visualization/02_jordan0210/01_Kernel_Kmeans/'
storename2 = '/content/drive/MyDrive/01_GMBA/3rd_semester(2022)/01-2_PM & 04-1_Machine Learning/HW6_1218/visualization/02_jordan0210/01_Kernel_Kmeans/01_image2/'

In [11]:
imagename = 'image1'
K_lis = [2,3,4,5,6,8,10,12,15]
mode_lis = [0,1]
for i in K_lis:
  for j in mode_lis:
    gamma_C = 1e-5
    gamma_S = 1e-5

    filename = img1 
    k = i 
    mode = j 

    dataC, dataS, image_size = read_input(filename)
    Gram = compute_kernel(gamma_S, gamma_C, dataS, dataC)

    datapoint_his = k_means(Gram, k, mode)
    visualplot(datapoint_his, image_size, storename1, k, mode, imagename)

Total No. of iteration(s): 9
Total No. of iteration(s): 15
Total No. of iteration(s): 7
Total No. of iteration(s): 18
Total No. of iteration(s): 12
Total No. of iteration(s): 19
Total No. of iteration(s): 29
Total No. of iteration(s): 28
Total No. of iteration(s): 16
Total No. of iteration(s): 32
Total No. of iteration(s): 30
Total No. of iteration(s): 67
Total No. of iteration(s): 54
Total No. of iteration(s): 33
Total No. of iteration(s): 50
Total No. of iteration(s): 45
Total No. of iteration(s): 69
Total No. of iteration(s): 86


In [19]:
imagename = 'image2'
K_lis = [2,3,4,5,6,8,10,12,15]
mode_lis = [0,1]
for i in K_lis:
  for j in mode_lis:
    gamma_C = 1e-5
    gamma_S = 1e-5

    filename = img2 
    k = i 
    mode = j 

    dataC, dataS, image_size = read_input(filename)
    Gram = compute_kernel(gamma_S, gamma_C, dataS, dataC)

    datapoint_his = k_means(Gram, k, mode)
    visualplot(datapoint_his, image_size, storename2, k, mode, imagename)

Total No. of iteration(s): 21
Total No. of iteration(s): 6
Total No. of iteration(s): 7
Total No. of iteration(s): 12
Total No. of iteration(s): 30
Total No. of iteration(s): 15
Total No. of iteration(s): 12
Total No. of iteration(s): 23
Total No. of iteration(s): 20
Total No. of iteration(s): 23
Total No. of iteration(s): 38
Total No. of iteration(s): 61
Total No. of iteration(s): 67
Total No. of iteration(s): 63
Total No. of iteration(s): 50
Total No. of iteration(s): 61
Total No. of iteration(s): 34
Total No. of iteration(s): 102
