
# PCA

## Prepare input data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go

K = 5

X, t = make_blobs(n_samples = 20, n_features=3, centers=K, random_state=3, cluster_std=1)
N = len(X)

## Visualize the data in 3d space

In [None]:
df = pd.DataFrame(X, columns=["x1", "x2", "x3"])
fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black")])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.show()

## Get Xc and visualize It

In [59]:
# TODO: fill here (get Xbar)

Xbar = X.mean(axis=0)
Xc = X - Xbar

fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black", showlegend=False)])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.add_trace(go.Scatter3d(x=[Xbar[0]], y=[Xbar[1]], z=[Xbar[2]], marker_size=3, marker_color="orange", mode='markers', showlegend=False))
fig.show()

## Get Covariance Matrix

In [None]:
cov_Xc = np.cov(Xc.T)
print(cov_Xc)

[[17.62930762  9.32590179 19.04159501]
 [ 9.32590179 43.00503367 15.42294601]
 [19.04159501 15.42294601 38.94721957]]


## Get Eigenvectors and Eigenvalues

In [None]:
eigenval, eigenvec = np.linalg.eig(cov_Xc)
print(eigenval)
print(eigenvec)

[64.92647681  6.46428742 28.19079664]
[[-0.39296055 -0.86440975  0.31365235]
 [-0.63506507  0.00842943 -0.77241265]
 [-0.66503711  0.50271735  0.55226888]]


## Get Projections Z and Visualize Them

In [89]:
def plotsurface(center, direction, num_steps, step_size):
  line_x = []
  line_y = []
  line_z = []
  # TODO: fill here
  for i in range(num_steps):
    distance_from_center = step_size * (i - num_steps/2)
    point_on_line = center + distance_from_center * direction[:,0]
    line_x.append(point_on_line[0])
    line_y.append(point_on_line[1])
    line_z.append(point_on_line[2])
  return (line_x, line_y, line_z)

In [85]:
print(len(line_x))

80


In [97]:
# TODO: fill here (get z, line_x, line_y, line_z, Xhat)

z = np.dot(Xc, eigenvec)

line_x, line_y , line_z = plotsurface(Xbar, eigenvec, 80, 0.5)
line_Z = np.reshape(line_z*80, (80,80))

Xhat = np.zeros((len(z[:,0]), 3))
for i in range(len(z[:,0])):
  Xhat[i] = Xbar.reshape(1,3) + z[i,0] * eigenvec[:,0] + z[i,2] * eigenvec[:,2] 

fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black", showlegend=False)])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.add_trace(go.Scatter3d(x=[Xbar[0]], y=[Xbar[1]], z=[Xbar[2]], marker_size=3, marker_color="orange", mode='markers', showlegend=False))
fig.add_trace(go.Surface(z=line_Z, x=line_x, y=line_y, showscale=False, opacity=0.5, colorscale=["rgb(196,196,196)", "rgb(196,196,196)"]))

for i in range(len(z[:,0])):
  fig.add_trace(go.Scatter3d(x=[X[i,0],Xhat[i,0]], y=[X[i,1],Xhat[i,1]], z=[X[i,2],Xhat[i,2]], 
                             marker=dict(size=2,color=["black", "red"]), line=dict(color="black",width=2), showlegend=False))

fig.show()

In [93]:
Xbar.reshape(1,3) + z[1,0] * eigenvec[:,0]

array([[-6.12949632, -9.78378743, -8.94996903]])

In [94]:
print(Xhat[1])

[ -7.32836607  -6.83140345 -11.0609    ]


## Calculate Reconstruction Error

In [95]:
error = np.sum(np.sum((X-Xhat)**2))
print(error)

122.82146097377198


In [96]:
error = np.sum(z[:,1]**2)
print(error)

122.82146097377199
