# ⤵️ PCA From Scratch

### Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Read Data

In [2]:
X = pd.read_csv("./Day 1/prepared_data.csv")
y = X['BALANCE']
X.drop('BALANCE', inplace=True, axis=1)

In [3]:
X.head()

Unnamed: 0,BALANCE_FREQUENCY,PURCHASES,ONEOFF_PURCHASES,INSTALLMENTS_PURCHASES,CASH_ADVANCE,PURCHASES_FREQUENCY,ONEOFF_PURCHASES_FREQUENCY,PURCHASES_INSTALLMENTS_FREQUENCY,CASH_ADVANCE_FREQUENCY,CASH_ADVANCE_TRX,PURCHASES_TRX,CREDIT_LIMIT,PAYMENTS,MINIMUM_PAYMENTS,PRC_FULL_PAYMENT,TENURE
0,0.818182,95.4,0.0,95.4,0.0,0.166667,0.0,0.083333,0.0,0,2,1000.0,201.802084,139.509787,0.0,12
1,0.909091,0.0,0.0,0.0,6442.945483,0.0,0.0,0.0,0.25,4,0,7000.0,4103.032597,1072.340217,0.222222,12
2,1.0,773.17,773.17,0.0,0.0,1.0,1.0,0.0,0.0,0,12,7500.0,622.066742,627.284787,0.0,12
3,0.636364,1499.0,1499.0,0.0,205.788017,0.083333,0.083333,0.0,0.083333,1,1,7500.0,0.0,312.343947,0.0,12
4,1.0,16.0,16.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0,1,1200.0,678.334763,244.791237,0.0,12


In [4]:
y.head()

0      40.900749
1    3202.467416
2    2495.148862
3    1666.670542
4     817.714335
Name: BALANCE, dtype: float64

### PCA

In [5]:
class PCA ():

    def __init__(self,n_components):
        self.n_components = n_components
        self.components   = None
        self.mean         = None
        self.variance     = None
        
        if self.n_components > 1:
            self.type = 'var'
        else:
            self.type = 'ratio'

    def fit(self,X): 
        # Step 1: Normalization using mean and Variance
        self.mean = np.mean(X)
        self.variance = np.var(X)
        
        X -= self.mean
        X /= self.variance
        
        # Step 2: Covariance
        cov_matrix = np.cov(np.transpose(X))
        
        # Step 3: Eigen vector and Eigen values
        values, vectors = np.linalg.eig(cov_matrix)
        vectors = np.transpose(vectors)

        # Step 4: Sort values 
        indexes = np.argsort(values)[::-1]
        values  = values[indexes]
        vectors = vectors[indexes]
        
        # Store eigne vectors first n 
        self.components = vectors[0: self.n_components]
        
    def transform(self,Z):
        # Projections
        Z_new = (Z - self.mean )/ self.variance
        Z_new = np.dot(Z_new, self.components.T)

        return Z_new

    #optional 
    def plot_explained_variance(self, transformed):
        pass        

### Call PCA

In [6]:
pca_section = PCA(4)
pca_section.fit(X)
X_transformed = pca_section.transform(X)
# pca_section.explained_variance #return list len = 4

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


In [7]:
#return matrix (n,4)
pca_section.components 

array([[ 2.20974751e-01, -1.90280924e-05, -1.76816022e-05,
        -4.66773984e-05,  5.20959269e-05, -2.24651206e-01,
        -1.47721477e-01, -1.93509693e-01,  8.67949354e-01,
         1.98776811e-02, -1.91225787e-03,  3.39218819e-06,
         6.09718142e-06,  1.00120176e-05, -2.95787075e-01,
        -1.44740951e-02],
       [ 7.91049113e-01,  3.81397259e-05,  4.04677546e-05,
         7.65599460e-05,  3.68492871e-07,  3.44968095e-01,
         4.16514928e-01,  2.63239740e-01,  5.08519368e-02,
         1.63626974e-03,  4.30036210e-03,  1.21104494e-05,
         1.29006877e-05,  8.29011975e-06,  9.70094779e-02,
         2.09888051e-02],
       [-4.05835509e-01,  2.98577707e-05,  3.55704705e-05,
         4.68288293e-05,  2.83798394e-05,  1.86464222e-01,
         3.33290977e-01,  1.10059373e-01,  4.63597220e-01,
         1.26867107e-02,  2.66438429e-03,  1.59900095e-05,
         2.73919584e-05, -1.20828286e-05,  6.79307772e-01,
        -2.79857853e-02],
       [ 2.91118559e-01, -3.62382663e

In [8]:
X_transformed

array([[-66.62058159, -54.53242563, -55.65724896,  -3.33539692],
       [ 68.18468456, -23.91781853,  23.96718754,  21.88438216],
       [-79.08281356,  53.92947848, -31.38532094, -86.0022351 ],
       ...,
       [-85.93176459, -32.43316455, -26.9867919 ,  19.77442727],
       [ 16.66707015, -45.26158299,  12.26863688,  16.44249184],
       [ 87.08221493, -41.23227693,  91.57148154, -82.0267187 ]])

In [9]:
print(f"Dimentionality Reduction from {X.shape[1]} dimentions \
to {X_transformed.shape[1]} dimentions")

Dimentionality Reduction from 16 dimentions to 4 dimentions
