### 2021-2 Machine Learning (01)
## Homework #3-2: PCA
---
Copyright (c) Code Designed by Prof. Jaehyeong Sim 

Department of Computer Science and Engineering

Ewha Womans University

## Prepare input data

In [195]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go

D = 3
K = 5

X, t = make_blobs(n_samples = 20, n_features=D, centers=K, random_state=3, cluster_std=1)
N = len(X)

## Visualize the data in 3d space

In [196]:
df = pd.DataFrame(X, columns=["x1", "x2", "x3"])
fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black")])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.show()

## Get Xc and visualize It

In [197]:
# TODO: fill here (get Xbar)
Xbar = np.average(X, axis = 0)
Xc = X - Xbar

fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black", showlegend=False)])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.add_trace(go.Scatter3d(x=[Xbar[0]], y=[Xbar[1]], z=[Xbar[2]], marker_size=3, marker_color="orange", mode='markers', showlegend=False))
fig.show()

## Get Covariance Matrix

In [198]:
cov_Xc = np.cov(Xc.T)
print(cov_Xc)

[[17.62930762  9.32590179 19.04159501]
 [ 9.32590179 43.00503367 15.42294601]
 [19.04159501 15.42294601 38.94721957]]


## Get Eigenvectors and Eigenvalues

In [199]:
eigenval, eigenvec = np.linalg.eig(cov_Xc)
print(eigenval)
print(eigenvec)

[64.92647681  6.46428742 28.19079664]
[[-0.39296055 -0.86440975  0.31365235]
 [-0.63506507  0.00842943 -0.77241265]
 [-0.66503711  0.50271735  0.55226888]]


## Get Projections Z and Visualize Them

In [200]:
def plotsurface(center, direction, num_steps, step_size):
  # TODO: fill here
  line_x = np.zeros([num_steps, num_steps])
  line_y = np.zeros([num_steps, num_steps])
  line_z = np.zeros([num_steps, num_steps])
  Xhat = center

  for i in range(num_steps):
    for j in range(num_steps):
      dist_from_center = [[step_size * (i - num_steps/2), step_size * (j - num_steps/2)]]

      surface = center + np.dot(dist_from_center, direction)
      surface = surface.reshape(3)

      line_x[i][j] = surface[0]
      line_y[i][j] = surface[1]
      line_z[i][j] = surface[2]

  return (line_x, line_y, line_z, Xhat)

In [201]:
# TODO: fill here (get z, line_x, line_y, line_z, Xhat)
line_x, line_y, line_z, Xhat = plotsurface(Xbar, eigenvec[:,0:2].T, 50, 0.5)

z = np.dot(Xc, eigenvec)
Xhat = np.zeros([N, 3])

for i in range(N):
  Xhat[i] = Xbar.reshape(1,3) + np.dot(z[i, 0:2], eigenvec[:,0:2].T)

fig = go.Figure(data=[go.Scatter3d(x=df.x1, y=df.x2, z=df.x3, mode='markers', marker_size=2, marker_color="black", showlegend=False)])
fig.update_coloraxes(showscale=False)
fig.update_layout(legend_itemsizing="constant")
fig.add_trace(go.Scatter3d(x=[Xbar[0]], y=[Xbar[1]], z=[Xbar[2]], marker_size=3, marker_color="orange", mode='markers', showlegend=False))
fig.add_trace(go.Surface(z=line_z, x=line_x, y=line_y, showscale=False, opacity=0.5, colorscale=["rgb(196,196,196)", "rgb(196,196,196)"]))

for i in range(len(z[:,0])):
  fig.add_trace(go.Scatter3d(x=[X[i,0],Xhat[i,0]], y=[X[i,1],Xhat[i,1]], z=[X[i,2],Xhat[i,2]], 
                             marker=dict(size=2,color=["black", "red"]), line=dict(color="black",width=2), showlegend=False))

fig.show()

## Calculate Reconstruction Error

In [202]:
error = np.sum(np.sum((X-Xhat)**2))
print(error)

535.6251360894188


In [203]:
error = np.sum(z[:,1]**2)
print(error)

122.82146097377199
