# PCA Algorithm to distinguish different hand signs extended with K-Means
By Mailin Brandt and Finian landes

In [17]:
### Imports
import numpy as np
from PIL import Image
import os
import matplotlib.pyplot as plt
import statistics

In [11]:
def load_images(dir: str, resolution: int = 256, grayscale: bool = True) -> np.ndarray:
    image_names: list = sorted(os.listdir(dir))
    n: int = len(image_names)
    images: np.ndarray = np.zeros([n, resolution ** 2])
    for i, name in enumerate(image_names):
        img: Image = Image.open(dir + "//"+ name)
        images[i] = img.getdata(0)
    return images, np.array(image_names)

def get_train_test_dataset(images: np.ndarray, image_names: list[str], n_test) -> list[np.ndarray]:
    indices: list = np.random.choice(len(images), n_test, replace = False)
    test: np.ndarray = images[indices]
    names_test: list[str] = image_names[indices]
    train: np.ndarray = np.delete(images, indices, axis = 0)
    names_train: list[str] = np.delete(image_names, indices)
    return train, test, names_train, names_test

def closest_neighbour(p: np.ndarray, m: np.ndarray , U: np.ndarray, B: np.ndarray, C: np.ndarray) -> int:
    c1: np.ndarray = np.matmul(U, p-m)
    d: np.ndarray = np.sum((c1 - C.T)**2, axis=1)
    return np.argmin(d)

def coeff_matrix(m: np.ndarray, U: np.ndarray, B: np.ndarray) -> np.ndarray:
    return np.matmul(U, (B - m).T)

def save_image(picture: np.ndarray, filename: str, location: str = "c:/Users/finia/OneDrive - SBL/PrA/PCA of hand signs/Processed Images/Eigenfaces", resolution: int = 256) -> None:
    min_val: float = np.min(picture)
    picture = picture - min_val
    picture = picture / np.max(picture)
    picture = (255 * picture).astype(np.uint8)
    n_picture = np.reshape(picture, (resolution, resolution)).astype(dtype = np.uint8)
    plt.imshow(n_picture, cmap = "gray")
    plt.show()
    im = Image.fromarray(n_picture, mode="L")
    im.save(location + "/" + filename + ".jpg")

def init_centroids(k: int, points: np.ndarray) -> np.ndarray:
    centroids: np.ndarray = points[np.random.choice(len(points), k, replace = False)]
    return centroids


def run_iteration(points: np.ndarray, centroids: np.ndarray, k: int) -> tuple[np.ndarray, list]:
    classification: list = []
    clusters: list[np.ndarray] = [[] for _ in range(k)]
    new_centroids: np.ndarray = np.zeros((k, points.shape[1]))

    for point in points:
        distances: list[np.ndarray] = [np.linalg.norm(point - centroid) for centroid in centroids]
        classification.append(np.argmin(distances))
        clusters[classification[-1]].append(point)
    for i in range(len(centroids)):
        if len(clusters[i]) == 0:
            new_centroids[i] = points[np.random.randint(0, points.shape[0])]
        else:
            new_centroids[i] = np.mean(clusters[i], axis = 0)
    return classification, new_centroids

def is_not_converged(last_centroids: np.ndarray, current_centroids: np.ndarray, e: float = 1e-20) -> bool:
    dist: float = 0
    for  i in range(len(last_centroids)):
        dist += np.sqrt(np.sum((last_centroids[i] - current_centroids[i]) ** 2))
    return dist > e

def cluster_names(classification: list, names: list, k: int) -> list:
    classified_names: list = [[] for _ in range(k)]
    for i, name in enumerate(names):
        classified_names[classification[i]].append(name)
    return classified_names


def compute_coefficients(p, U, m):
    return np.matmul(U, p-m)

def distance(p, q, m, U):
    c1 = compute_coefficients(p, U, m)
    c2 = compute_coefficients(q, U, m)
    return np.sqrt(np.sum((c1-c2)**2))

def outlying_distances(points: np.ndarray) -> list:
    mean: np.ndarray = np.mean(points, axis = 0)
    dist: list = [distance(point, np.zeros_like(point), mean, points) for point in points]
    return dist


In [8]:
image_dir_train: str = "c:/Users/finia/OneDrive - SBL/PrA/PCA of hand signs/Processed Images/Train/"
image_dir_test: str = "c:/Users/finia/OneDrive - SBL/PrA/PCA of hand signs/Processed Images/Test"
n_test_images: int = 50
train, names_train = load_images(image_dir_train)
test, names_test = load_images(image_dir_test)
#train, test, names_train, names_test = get_train_test_dataset(images, image_names, n_test_images)
k: int = 5
dimensions: np.ndarray = train.shape[1]

In [None]:
meanface: np.ndarray = np.mean(train, axis=0)
A: np.ndarray = train - meanface
eigenfaces, s, VT = np.linalg.svd(A.transpose(), full_matrices=False)
eigenfaces: np.ndarray = eigenfaces.transpose()
coeff_mat: np.ndarray = coeff_matrix(meanface, eigenfaces, train)

In [None]:
distances = np.array(outlying_distances(train))

new_names = np.array(names_train)
sorted_indicies = np.argsort(distances)
distances = distances[sorted_indicies]
new_names = new_names[sorted_indicies]

avg = sum(distances) / len(distances)
print(round(statistics.pstdev(distances) / statistics.mean(distances) * 100, 5))
print(list(zip(distances, new_names)))

#Lower 1000
#Upper 5000

2.84375
[(19602337232.73344, 'thumbs_up_49.jpg'), (19967675820.473194, 'pistol_46.jpg'), (20015205890.832, 'easy_52.jpg'), (20049949967.08636, 'pistol_51.jpg'), (20089328123.978493, 'pistol_49.jpg'), (20413941931.698296, 'easy_33.jpg'), (20417921076.710064, 'pistol_48.jpg'), (20443741178.150227, 'thumbs_up_53.jpg'), (20470932782.513466, 'pistol_53.jpg'), (20490895764.11545, 'easy_25.jpg'), (20499052184.415405, 'easy_32.jpg'), (20535007156.28178, 'pistol_50.jpg'), (20549845550.48775, 'thumbs_up_51.jpg'), (20603998144.69826, 'pistol_26.jpg'), (20646856044.683372, 'easy_31.jpg'), (20680857772.433777, 'pistol_21.jpg'), (20684516876.836975, 'pistol_52.jpg'), (20705775729.630905, 'pistol_15.jpg'), (20713050902.87245, 'pistol_45.jpg'), (20726188079.8666, 'pistol_29.jpg'), (20754859820.808094, 'metal_29.jpg'), (20760526997.143196, 'pistol_30.jpg'), (20781875312.310204, 'metal_27.jpg'), (20793546532.973766, 'pistol_54.jpg'), (20794401984.986923, 'thumbs_up_47.jpg'), (20839231718.355442, 'easy_5

In [24]:
points: np.ndarray = train

last_centroids: np.ndarray = np.zeros((k, dimensions))
classification: list = []
centroids = init_centroids(k, points)
last_centroids = np.zeros_like(centroids)
i = 0

while is_not_converged(last_centroids, centroids):
    i += 1
    last_centroids = centroids.copy()
    classification, centroids = run_iteration(points, centroids, k)

#save_image(centroids[0], "test")
cluster_n = cluster_names(classification, names_train,k)
for clster in cluster_n:
    print(len(clster))


54
89
55
20
57
