In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pylab
import scipy.io
import scipy.misc

from pca import feature_normalize, get_usv, project_data, recover_data


def get_datum_img(row):
    """
    Creates an image from a single np array with shape 1x1024
    :param row: a single np array with shape 1x1024
    :return: the constructed image, np array of shape 32 x 32
    """
    pass


def display_data(samples, num_rows=10, num_columns=10):
    """
    Function that picks the first 100 rows from X, creates an image from each,
    then stitches them together into a 10x10 grid of images, and shows it.
    """
    width, height = 32, 32
    num_rows, num_columns = num_rows, num_columns

    big_picture = np.zeros((height * num_rows, width * num_columns))

    row, column = 0, 0
    for index in range(num_rows * num_columns):
        if column == num_columns:
            row += 1
            column = 0
        img = get_datum_img(samples[index])
        big_picture[row * height:row * height + img.shape[0], column * width:column * width + img.shape[1]] = img
        column += 1
    plt.figure(figsize=(10, 10))
    img = scipy.misc.toimage(big_picture)
    plt.imshow(img, cmap=pylab.gray())


def main():
    datafile = 'data/faces.mat'
    mat = scipy.io.loadmat(datafile)
    samples = mat['X']
    display_data(samples)
    # Feature normalize
    
    mu = np.mean(samples, axis=0)
    X_norm = samples - mu
    # Set Delta Degrees of Freedom (ddof) to 1, to compute
    # the std based on a sample and not the population.
    sigma = np.std(X_norm, axis=0, ddof=1)
    X_norm = X_norm / sigma
    
    # Run SVD
    
    m, n = sample_norm.shape
    U = np.zeros(n)
    S = np.zeros(n)
    
    # When computing the covariance matrix, we have
    # to divide by m (the number of examples).
    sigma = (1. / m) * np.dot(sample_norm.T, sample_norm)
    
    # Compute the eigenvectors and eigenvalues
    # of the covariance matrix.
    U, S, V = linalg.svd(sigma)
    S = linalg.diagsvd(S, len(S), len(S))

    # Visualize the top 36 eigenvectors found
    
    display_data(U.T, 6,6)
    
    # Project each image down to 36 dimensions
    
    K = 100
    Z = projectData(X_norm, U, K)
    print('\nThe projected data Z has a size of: ')
    print('{} {}'.format(Z.shape[0], Z.shape[1]))

    # Attempt to recover the original data
    recovered_samples = None
    print('\nVisualizing the projected (reduced dimension) faces.');
    recovered_samples = recoverData(Z, U, K)
    # Plot the dimension-reduced data
    display_data(recovered_samples)
    plt.show()


if __name__ == '__main__':
    main()
