## HỌ TÊN: TRƯƠNG GIA TIẾN
## MSSV: 20127643

## KMEANS

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import PIL as pil

def kmeans(img_1d, k_clusters, max_iter, init_centroids='random'):
    # ignore some warning while compiling
    np.seterr(invalid='ignore')

    # height <-> row, width <-> col, depth = 3 (RGB)
    height, width, depth = img_1d.shape

    # reshape 3D -> 2D voi (height * width) dong va moi dong co 3 elements(RGB)
    reshapedIMG = img_1d.reshape(height * width, depth)
    reshapedHeight, reshapedWidth = reshapedIMG.shape
    
    # khoi tao k centroid dau tien
    if init_centroids == "random":
        # mang 2D co k dong, moi dong co 3 phan tu tuong ung voi RGB va moi phan tu co value trong khoang [0,255]
        centroids = np.random.choice(256, size=(k_clusters, reshapedWidth))
    elif init_centroids == "in_pixels":
        # lay ngau nhien k_ index cua hinh
        indexArray = np.random.choice(reshapedHeight, k_clusters)
        centroids = np.zeros((k_clusters, reshapedWidth), dtype="int")
        for i in range(k_clusters):
            centroids[i] = reshapedIMG[indexArray[i]]

    # Tim k centroid cuoi cung
    for _ in range(max_iter):
        # gan centroid truoc = centroid hien tai de den cuoi loop check su hoi tu
        prevCentroid = centroids

        # Tinh khoang cach cua pixel voi centroid
        dist = np.zeros((reshapedHeight, k_clusters))
        for k in range(k_clusters):
            vectorSubtract = np.subtract(reshapedIMG, prevCentroid[k])
            vectorNorm = np.linalg.norm(vectorSubtract, axis=1)
            
            # Doi voi moi pixel, tinh do dai vector(da tru cho centroid) cua pixel do
            for i in range(reshapedHeight):
                dist[i][k] = vectorNorm[i]
                
        # Gan nhan cho tung pixel voi centroid gan nhat cua chung
        label = np.zeros((reshapedHeight), dtype="int")
        for i in range(len(label)):
            label[i] = np.argmin(dist[i], axis=0)

        # Cap nhat lai k centroid hien tai bang gia tri trung binh cua cac pixel co nhan tuong ung voi centroid do
        for k in range(k_clusters):
            imgOfCentroid = reshapedIMG[label == k]

            # Truong hop mang bi rong --> khong co pixel gan centroid (truong hop nay chi xay ra khi init_centroid = random)
            if np.all(imgOfCentroid != imgOfCentroid):
                # khoi tao lai centroid
                centroids[k] = np.random.choice(256, size=(1, reshapedWidth))
            else:
                centroids[k] = np.nanmean(imgOfCentroid, axis=0)

        # Kiem tra su hoi tu
        if np.allclose(prevCentroid, centroids, atol=2):
            break
    
    return label, centroids

# assign pixel vale with its nearest centroid
def assignNewImg(img, label, centroid):
    height, width, depth = img.shape

    reshapedIMG = img.reshape(height * width, depth)

    # Gan gia tri moi cho tung pixel la gia tri tuong ung voi centroid cua chung
    for k in range(len(centroid)):
        reshapedIMG[label == k] = centroid[k]

    # reshape lai mang thanh 3D
    reshapedIMG = reshapedIMG.reshape(height, width, depth)

    return reshapedIMG

def main():
    imgName = input("Enter image name: ")
    k_clus = input("Enter k_cluster value: ")
    maxIte = input("Enter max iterator (int): ")
    initType = input("Enter init_centroid (random, in_pixels): ")
    newImgName = input("Enter image save name: ")

    if newImgName[len(newImgName) - 4] != '.':
        imgExt = input("Enter new image extension (jpg, png, pdf): ")
        newImgName = newImgName + '.' + imgExt

    img = pil.Image.open(imgName)
    npArray = np.array(img)

    label, centroids = kmeans(npArray, k_clusters=int(k_clus), max_iter=int(maxIte), init_centroids=initType)
    
    newIMG = assignNewImg(npArray, label, centroids)
    plt.imshow(newIMG)

    plt.imsave(newImgName, newIMG)

    print("label array: ")
    print(label)

    print("Centroids: ")
    print(centroids)

main()
