# Whitening

Based on: https://iq.opengenus.org/whitening-with-pca/


In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA

In [None]:
np.random.seed(1)

Create some gaussian distributed data

In [None]:
mu = [3,3] 
sigma = [[7, 5],[5, 7]] # must be positive semi-definite
n = 1000
x = np.random.multivariate_normal(mu, sigma, size=n).T 

In [None]:
# Take the largest 20 samples appart as set1
set1 = np.argsort(np.linalg.norm(x - 3, axis=0))[-20:] 
set2 = list(set(range(n)) - set(set1))

In [None]:
def plotting(x, xlim = 16, ylim = 16):
    fig, ax = plt.subplots(figsize=(12,10))    
    ax.scatter(x[0, set1], x[1, set1], s=20, c="red", alpha=0.2)
    ax.scatter(x[0, set2], x[1, set2], s=20, alpha=0.2)
    ax.set_aspect("equal")
    ax.set_xlim(-xlim, xlim)
    ax.set_ylim(-ylim, ylim)
    #ax.set_xlabel("$x_1$",loc='right')
    #ax.set_ylabel("$x_2$",loc='top')
    
    # Move left y-axis and bottim x-axis to centre, passing through (0,0)
    ax.spines['left'].set_position(('data',0.0))
    ax.spines['bottom'].set_position(('data',0.0))

    # Eliminate upper and right axes
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')

    # Show ticks in the left and lower axes only
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.set_title("Original")

In [None]:
plotting(x,xlim=15,ylim=15)

In [None]:
k = 2
pca = PCA(n_components = k, whiten = False)
z = pca.fit_transform(x.T).T

plotting(z,xlim=15,ylim=15)

In [None]:
pca = PCA(n_components = k, whiten = True)
z = pca.fit_transform(x.T).T

plotting(z,xlim=5,ylim=5)