In [None]:
import numpy as np
from PIL import Image
import pandas as pd
from os import listdir
from os.path import isfile, join
from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
from matplotlib.colors import ListedColormap

In [None]:
PICS_NUMBER = 75
PICS_WIDTH = 177

In [None]:
def process_image(image, dest_path, label):
    width, height = image.size
    dest_path = dest_path + '.csv'
    with open(dest_path, 'w') as dest_file:
        for y in range(0, height):
            for x in range(0, width):
                if(x !=width-1):
                    dest_file.write(f'{image.getpixel((x,y))};')
                else:
                    dest_file.write(f'{image.getpixel((x,y))};{label}')
            dest_file.write(f'\n')
        

In [None]:
def readImages(dir_path,dest_path, label):
    files = [file for file in listdir(dir_path) if isfile(join(dir_path, file))]
    for number, file in enumerate(files):
        image = Image.open(dir_path + '/' + file).convert('L')
        process_image(image,dest_path + str(number), label)

In [None]:
readImages("selfies/Ada","dataset/Ada", 1)
readImages("selfies/ja","dataset/ja", 2)
readImages("selfies/Mati","dataset/Mati", 3)
readImages("selfies/Michal","dataset/Michal", 4)
readImages("selfies/AdaK","dataset/AdaK", 5)

In [None]:
def read_dataset_from_csv(dataset):
    dataset_df = pd.read_csv(dataset, sep=';')
    X = dataset_df.iloc[:,:-1]
    y = dataset_df.iloc[:,-1]
    return(X,y)

In [None]:
def readFiles(dir_path):
    files = [file for file in listdir(dir_path) if isfile(join(dir_path, file))]
    x = np.array(0)
    y = np.array(0)
    for file in files:
        X, Y = read_dataset_from_csv(dir_path + '/' + file)
        X = np.array(X)
        X = np.array([X])
        if x.any() :
            x = np.append(x, X, axis = 0)
        else:
            x = X
        if y.any() :
            y = np.append(y, Y, axis = 0)
        else:
            y = Y
    return (x, y)

In [None]:
X, y = readFiles("dataset")

In [None]:
y =y[0:-1:177]

In [None]:
mean_face = np.mean(X, axis = 0)

In [None]:
def show_matrix(mean_face: np.ndarray):
    plt.matshow(mean_face, cmap='gray')
    plt.title("Mean face")
    plt.show()

In [None]:
show_matrix(mean_face)

In [None]:
X_all = X.reshape(X.shape[0],-1)

In [None]:
pca = PCA()
pca.fit(X_all)

In [None]:
ratio = pca.explained_variance_ratio_
x = np.arange(ratio.shape[0])
plt.figure(figsize=(18,9))
plt.xlabel("principal component")
plt.ylabel("explained variance ratio")
plt.title("explained variance ratio of principal components")
plt.bar(x,ratio,align = 'center', alpha = 0.5, color = 'red')

In [None]:
def vizualize_principal_components(array: np.ndarray, pics_number: int, pic_width:int):
    fig = plt.figure(figsize=(12,12)) 
    plt.title('Principal components')
    plt.axis('off')
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) 
    for i in range(pics_number): 
        ax = fig.add_subplot(5, pics_number/5, i+1, xticks=[], yticks=[]) 
        ax.set_title(f'{i+1}')
        ax.matshow(array[i].reshape(pic_width,-1), cmap='gray', interpolation='nearest') 
        ax.axis('off')
plt.show()

In [None]:
vizualize_principal_components(pca.components_, pca.components_.shape[0], PICS_WIDTH)

In [None]:
def visualize_pics(matrix: np.ndarray, pics_number:int, title:str):
    fig = plt.figure(figsize=(10,10)) 
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) 
    plt.title(f'{title}')
    plt.axis('off')
    for i in range(pics_number): 
        ax = fig.add_subplot(5, pics_number/5, i+1, xticks=[], yticks=[]) 
        ax.matshow(X[i], cmap='gray', interpolation='nearest') 
        ax.axis('off')
    plt.show()

In [None]:
visualize_pics(X, PICS_NUMBER, "Original pictures")

In [None]:
def transform_and_reconstruct(X :np.ndarray, n_components:int):
    pca = PCA(n_components=n_components)
    pca.fit(X)
    X_transfomred = pca.transform(X)
    X_inversed = pca.inverse_transform(X_transfomred)
    fig = plt.figure(figsize=(10,10)) 
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) 
    plt.title(f"Reconstructed pictures from {n_components} components")
    plt.axis('off')
    for i in range(PICS_NUMBER): 
        ax = fig.add_subplot(5, PICS_NUMBER/5, i+1, xticks=[], yticks=[]) 
        ax.axis('off')
        ax.matshow(X_inversed[i].reshape(PICS_WIDTH, -1), cmap='gray', interpolation='nearest') 
    plt.show()


In [None]:
dims = [5,15,50]
for dim in dims:
    transform_and_reconstruct(X_all, dim)

In [None]:
def vizualization_2d(X:np.ndarray, y: np.ndarray):
    pca = PCA(n_components = 2)
    pca.fit(X)
    X_transformed = pca.transform(X)
    plt.figure(figsize=(18,9))
    cmap = ListedColormap(['#000000','#FF0000', '#00FF00', "#0000FF", "#FFFF00"])
    plt.scatter(X_transformed[:,0], X_transformed[:,1], c=y,cmap =cmap)
    plt.title("2D representation of pictures")
    

In [None]:
vizualization_2d(X_all,y)

no raport due to personal data protection