In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy import linalg as la
from PIL import Image

### The PCA  algorithm is given as follows:

In [2]:
class PCA(object):
    def __init__(self, X_data):
        self.m, self.n = X_data.shape        # X_data is expected to have dinemsion in the row and observations in the columns
        self.data_mean = np.mean(X_data, axis = 1).reshape(self.m, 1)
        self.X_data = X_data-self.data_mean  # subtract the mean 
        self.eig_vals_sorted = np.zeros(self.m)
        self.eig_vecs_sorted = np.zeros([self.m, self.m])
        self.sorted_indeces = np.zeros(self.m)
    
    def cov_mat(self):
        covariance_mat = (1/(self.n))*np.matmul(self.X_data, np.transpose(self.X_data))
        return covariance_mat
        
    def eig_vals_vec(self):
        self.eig_vals, self.eig_vec = la.eig(self.cov_mat())
#         print(self.eig_vals)
        return self.eig_vals, self.eig_vec
    

    def sort_eig_vals_vec(self):
        dum_var, self.eig_vec = self.eig_vals_vec()
        for i in range(self.m):
            self.eig_vals_sorted[i] = max(dum_var)
            self.sorted_indeces[i] = np.where(self.eig_vals_sorted[i] == self.eig_vals)[0][0]
            dum_var = np.delete(dum_var, np.argmax(dum_var))
        
        for i in range(len(self.sorted_indeces)):
            self.eig_vecs_sorted[:,i] = np.transpose(self.eig_vec[:, int(self.sorted_indeces[i]) ])
            
        return self.eig_vals_sorted, self.eig_vecs_sorted
    
    def PCA_transform(self):
        _, self.eig_vecs_sorted = self.sort_eig_vals_vec()
#         Y_data = np.matmul(np.transpose(self.eig_vecs_sorted), self.X_data)
        Y_data = np.matmul((self.eig_vecs_sorted), self.X_data)
        return Y_data

### Generate data:

In [3]:
def get_data():
    im = Image.open("shakira.jpg")
    im.show()
    pix = np.array(im)

    N, M, K = pix.shape
    X_data = np.zeros([K, N*M])
    for i in range(K):
        X_data[i,:] = (pix[:,:,i].reshape(( pix[:,:,i].shape[0]*pix[:,:,i].shape[1]) ))

    X_data = X_data.astype(float)
    return X_data, [N, M, K]
    

### Perform PCA on the generated data:

In [4]:
X_data, org_img_size = get_data()
print(X_data.shape)
M,N = X_data.shape
obj = PCA(X_data)
Y_data = obj.PCA_transform()
Y_data = np.transpose(Y_data)
print(Y_data.shape)

(3, 102400)
(102400, 3)


### View the result:

In [5]:
im_dat = Y_data.reshape(( org_img_size[0], org_img_size[1], org_img_size[2] ) )
image = Image.fromarray(im_dat.astype('uint8'), 'RGB')
image.show()
image.save('shakira_uncorrelated.jpg')

### Result: the data is now uncorrelated and the redundancies are removed.