# PCA Algorithm to distinguish different hand signs
By Mailin Brandt and Finian landes

In [1]:
### Imports
import numpy as np
from PIL import Image
import os
import matplotlib.pyplot as plt

In [96]:
def load_images(dir: str, resolution: int = 256, grayscale: bool = True) -> np.ndarray:
    image_names: list = sorted(os.listdir(dir))
    n: int = len(image_names)
    images: np.ndarray = np.zeros([n, resolution ** 2])
    for i, name in enumerate(image_names):
        img: Image = Image.open(dir + "//"+ name)
        images[i] = img.getdata(0)
    return images, np.array(image_names)

def get_train_test_dataset(images: np.ndarray, image_names: list[str], n_test) -> list[np.ndarray]:
    indices: list = np.random.choice(len(images), n_test, replace = False)
    test: np.ndarray = images[indices]
    names_test: list[str] = image_names[indices]
    train: np.ndarray = np.delete(images, indices, axis = 0)
    names_train: list[str] = np.delete(image_names, indices)
    return train, test, names_train, names_test

def closest_neighbour(p: np.ndarray, m: np.ndarray , U: np.ndarray, B: np.ndarray, C: np.ndarray) -> int:
    c1: np.ndarray = np.matmul(U, p-m)
    d: np.ndarray = np.sum((c1 - C.T)**2, axis=1)
    return np.argmin(d)

def coeff_matrix(m: np.ndarray, U: np.ndarray, B: np.ndarray) -> np.ndarray:
    return np.matmul(U, (B - m).T)

def save_image(picture: np.ndarray, filename: str, location: str = "c:/Users/finia/OneDrive - SBL/PrA/PCA of hand signs/Processed Images/Eigenfaces", resolution: int = 256) -> None:
    min_val: float = np.min(picture)
    picture = picture - min_val
    picture = picture / np.max(picture)
    picture = (255 * picture).astype(np.uint8)
    n_picture = np.reshape(picture, (resolution, resolution)).astype(dtype = np.uint8)
    plt.imshow(n_picture, cmap = "gray")
    plt.show()
    im = Image.fromarray(n_picture, mode="L")
    im.save(location + "/" + filename + ".jpg")

def init_centroids(k: int, points: np.ndarray) -> np.ndarray:
    centroids: np.ndarray = points[np.random.choice(len(points), k, replace = False)]
    return centroids


def run_iteration(points: np.ndarray, centroids: np.ndarray, k: int) -> tuple[np.ndarray, list]:
    classification: list = []
    clusters: list[np.ndarray] = [[] for _ in range(k)]
    new_centroids: np.ndarray = np.zeros((k, points.shape[1]))

    for point in points:
        distances: list[np.ndarray] = [np.linalg.norm(point - centroid) for centroid in centroids]
        classification.append(np.argmin(distances))
        clusters[classification[-1]].append(point)
    for i in range(len(centroids)):
        if len(clusters[i]) == 0:
            new_centroids[i] = points[np.random.randint(0, points.shape[0])]
        else:
            new_centroids[i] = np.mean(clusters[i], axis = 0)
    return classification, new_centroids

def is_not_converged(last_centroids: np.ndarray, current_centroids: np.ndarray, e: float = 1e-20) -> bool:
    dist: float = 0
    for  i in range(len(last_centroids)):
        dist += np.sqrt(np.sum((last_centroids[i] - current_centroids[i]) ** 2))
    return dist > e

def cluster_names(classification: list, names: list, k: int) -> list:
    classified_names: list = [[] for _ in range(k)]
    for i, name in enumerate(names):
        classified_names[classification[i]].append(name)
    return classified_names


def compute_coefficients(p, U, m):
    return np.matmul(U, p-m)

def distance(p, q, m, U):
    c1 = compute_coefficients(p, U, m)
    c2 = compute_coefficients(q, U, m)
    return np.sqrt(np.sum((c1-c2)**2))

def outlying_distances(points: np.ndarray, meanface: np.ndarray) -> list:
    dist: list = [distance(point, np.zeros_like(point), meanface, points) for point in points]
    return dist


In [51]:
image_dir_train: str = "c:/Users/finia/OneDrive - SBL/PrA/PCA of hand signs/Processed Images/Train/"
image_dir_test: str = "c:/Users/finia/OneDrive - SBL/PrA/PCA of hand signs/Processed Images/Test"
n_test_images: int = 50
train, names_train = load_images(image_dir_train)
test, names_test = load_images(image_dir_test)
#train, test, names_train, names_test = get_train_test_dataset(images, image_names, n_test_images)
k = 20
dimensions = train.shape[1]

In [98]:
meanface: np.ndarray = np.mean(train, axis=0)
A: np.ndarray = train - meanface
U, s, VT = np.linalg.svd(A.transpose(), full_matrices=False)
U: np.ndarray = U.transpose()
C: np.ndarray = coeff_matrix(meanface, U, train)


distances = np.array(outlying_distances(np.matmul(C, U), meanface))

new_names = np.array(names_train)
sorted_indicies = np.argsort(distances)
distances = distances[sorted_indicies]
new_names = new_names[sorted_indicies]


avg = sum(distances) / len(distances)

print(list(zip(distances, new_names)))

#Lower 1000
#Upper 5000

[(5.181584951578855e-08, 'thumbs_up_54.jpg'), (490071.97898937366, 'thumbs_up_53.jpg'), (757363.9790544088, 'thumbs_up_52.jpg'), (793231.1961857426, 'thumbs_up_51.jpg'), (845410.5712859697, 'thumbs_up_50.jpg'), (859212.1503885384, 'thumbs_up_49.jpg'), (909307.44016822, 'thumbs_up_48.jpg'), (922169.0913079008, 'thumbs_up_47.jpg'), (1010808.9662358696, 'thumbs_up_46.jpg'), (1039801.6934484039, 'thumbs_up_45.jpg'), (1045069.816016244, 'thumbs_up_44.jpg'), (1082428.7461077026, 'thumbs_up_43.jpg'), (1092289.050750638, 'thumbs_up_42.jpg'), (1103797.303230254, 'thumbs_up_41.jpg'), (1130520.0146661345, 'thumbs_up_40.jpg'), (1165245.337531629, 'thumbs_up_39.jpg'), (1180224.8259662248, 'thumbs_up_38.jpg'), (1209304.2572463339, 'thumbs_up_37.jpg'), (1223569.4263543908, 'thumbs_up_36.jpg'), (1237321.8006610335, 'thumbs_up_35.jpg'), (1291589.5631710377, 'thumbs_up_34.jpg'), (1302207.0154156545, 'thumbs_up_33.jpg'), (1318137.8630357198, 'thumbs_up_32.jpg'), (1370940.2855975714, 'thumbs_up_31.jpg'), 

In [94]:
points: np.ndarray = np.matmul(C, U)
last_centroids: np.ndarray = np.zeros((k, dimensions))
classification: list = []
centroids = init_centroids(k, points)
last_centroids = np.zeros_like(centroids)
i = 0

while is_not_converged(last_centroids, centroids):
    i += 1
    last_centroids = centroids.copy()
    classification, centroids = run_iteration(points, centroids, k)

#save_image(centroids[0], "test")
cluster_n = cluster_names(classification, names_train,k)
for clster in cluster_n:
    print(len(clster))


1
1
1
1
1
1
256
1
1
1
1
1
1
1
1
1
1
1
1
1


In [7]:
successes: int = 0
for j in range(len(test)):
    p: np.ndarray = test[j]
    i: int = closest_neighbour(p, meanface, U, train, C)
    pred_name: str = names_train[i][:-7] #Removes image name suffix _XX.jpg
    real_name: str = names_test[j][:-7]
    #print(pred_name, real_name)
    successes += (real_name == pred_name)

s: int = successes/len(test)
print ("Success rate: "+str(np.round(s * 100, 2)) +"%")


Success rate: 100.0%
