In [7]:
### Imports
import numpy as np
from PIL import Image
import os

In [57]:
def init_centroids(k: int, range: list, dimensions: int) -> np.ndarray:
    init_centroids: np.ndarray = (np.random.rand(k, dimensions) * abs(range[1] - range[0])) + range[0]
    return init_centroids

def run_iteration(points: np.ndarray, centroids: np.ndarray, k: int) -> tuple[np.ndarray,list]:# list of new centroids, list of classification of point
    classification: list = []
    clusters: list[np.ndarray] = [[] for _ in range(k)]
    for point in points:
        distances: list[np.ndarray] = [np.linalg.norm(point - centroid) for centroid in centroids]
        classification.append(distances.index(min(distances)))
        clusters[classification[-1]].append(point)
    for i in range(len(centroids)):
        centroids[i] = np.mean(clusters[i], axis = 0)
    return centroids, classification

def find_range(points: np.ndarray) -> list:
    lower: float = np.inf
    upper: float = -np.inf
    for point in points:
        new_min: float = min(point)
        new_max: float = max(point)
        if new_min < lower:
            lower = new_min
        if new_max > upper:
            upper = new_max
    return [lower,upper]

def is_not_converged(last_centroids: np.ndarray, current_centroids: np.ndarray, e: float = 1e-10) -> bool:
    dist: float = 0
    for  i in range(len(last_centroids)):
        dist += np.linalg.norm(last_centroids[i] - current_centroids[i])
    return dist > e

def load_images(dir: str, resolution: int = 256, grayscale: bool = True) -> np.ndarray:
    image_names: list = sorted(os.listdir(dir))
    n: int = len(image_names)
    images: np.ndarray = np.zeros([n, resolution ** 2])
    for i, name in enumerate(image_names):
        img: Image = Image.open(dir + "//"+ name)
        images[i] = img.getdata(0)
    return images, np.array(image_names)

In [50]:
image_dir_train: str = "c:/Users/finia/OneDrive - SBL/PrA/PCA of hand signs/Processed Images/Train/"
train, names_train = load_images(image_dir_train)
train = train / 255
k = 5
dimensions = train.shape[1]
points: np.ndarray = train

In [60]:
last_centroids: np.ndarray = np.zeros((k, dimensions))
classification: list = []
centroids: np.ndarray = init_centroids(k, find_range(points), dimensions)

while is_not_converged(last_centroids, centroids):
    last_centroids = centroids
    centroids, classification = run_iteration(points, centroids, k)
    break

print(classification)
print(centroids)

[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 3, 2, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3, 2, 4, 3, 3, 2, 3, 3, 2, 3, 2, 3, 2, 2, 4, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 4, 2, 4, 2, 3, 4, 3, 4, 3, 2, 3, 2, 3, 2, 3, 3, 2, 3, 3, 2, 3, 2, 2, 4, 3, 4, 4, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 3, 2, 2, 2, 2, 2, 4, 4, 2, 3, 3, 3, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 4, 3, 2, 3, 2, 2, 3, 3]
[[       nan        nan        nan ...        nan        nan        nan]
 [       nan        nan        nan ...        nan        nan        nan]
 [0.60039683 0.60172736 0.60

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
