# Principal Component Analysis

*Principal Component Analysis* (PCA) algorithm built from scratch under the form of the `PCA()` Python class.

In [None]:
# Principal Component Analysis (PCA) algorithm

class PCA() : 
      
    # Create the PCA() object
    def __init__(self, scaled_features) : 
        
        self.scaled_features = scaled_features
        
          
    # Perform Singolar Value Decomposition on the Features matrix
    def singular_values(self):
        
        U, SIGMA, V_T = LA.svd(self.scaled_features, full_matrices = False)
        
        # Save the Sigma matrix for the projection of the Features in a lower dimensional space
        self.V_T = V_T
        
        return(U, SIGMA, V_T)
    
    
    # Compute the Cumulative Percentage of Variance Explained by the Principal Components
    def variance_explained(self, SIGMA_mat):
        
        # Track the sum of the percentage of Variance Explained
        percentages = np.zeros((len(SIGMA_mat), 1))
        
        for i in range(0, len(SIGMA_mat)):
            percentages[i] = SIGMA_mat[i]/np.sum(SIGMA_mat)
        
        # Display the corresponding Screeplot
        plt.figure(figsize=(12,7))
        plt.plot(range(1,len(SIGMA_mat)+1), percentages, marker='o')
        plt.xlabel('Singular Values')
        plt.ylabel('Explained variance')
        plt.title('Total variance explained for each principal component')
        plt.show()
        
        self.percentages = percentages
        
        return(percentages)
    
    
    # Project the Features according to the selected number of Principal Components
    def projected_features(self, n_components, show_plot = False):
        
        proj_feat = np.transpose(self.V_T[:, :n_components].T @ self.scaled_features.T)
        
        col_names = np.repeat("DIM {}", n_components)
        
        for o in range(0, n_components):
            col_names[o] = col_names[o].format(o+1)
            
        self.n_components = n_components
        proj_feat.columns = col_names
        
        
        # Display the Projected Features in a two-dimensional subspace
        if n_components == 2 and show_plot == True:
            
            plt.figure(figsize=(12,7))
            plt.scatter(proj_feat.iloc[:, 0], proj_feat.iloc[:, 1])
            plt.xlabel('First principal component')
            plt.ylabel('Second principal component')
            plt.title('PCA - California housing features')
            plt.show()
        
        return(proj_feat)
               
    
    # Project the Test Features according to the selected number of Principal Components
    def project_test(self, scaled_test_features):
        
        pca_test = PCA(scaled_test_features)
        pca_test_u, pca_test_sigma, pca_test_vt = pca_test.singular_values()
        pca_test_proj = pca_test.projected_features(self.n_components)
        
        return(pca_test_proj)