In [29]:
import numpy as np
import plotly.graph_objects as go

In [30]:
mu = [7,8,9]
sigma = [[4, 8,-0.2],
         [3, -7, 1.5],
        [-2,5,0.3]]

np.random.seed(2024)
original_data = np.random.multivariate_normal(mu,sigma, 1000)

print(original_data)

[[ 5.27914494  2.27192549  9.0391532 ]
 [ 4.74768067  8.60648173  9.91992568]
 [10.07250743 17.00933244  9.13120049]
 ...
 [ 7.6790821   8.86996439  7.7067955 ]
 [ 6.14810187  9.89482057  8.65256105]
 [ 4.11473045  3.35648327  8.69428423]]



covariance is not symmetric positive-semidefinite.



In [31]:
#Original data visualizing
x, y, z = original_data.T

# Create a 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=x,                    # X-axis data
    y=y,                    # Y-axis data
    z=z,                    # Z-axis data
    mode='markers',         # Use markers to represent data points
    marker=dict(
        size=1.5,           # Size of the markers
        opacity=0.8,        # Marker opacity
        color='royalblue'   # Color of the markers
    )
)])

# Set the layout of the plot
fig.update_layout(title='Original data (3D)',
                  scene=dict(
                      xaxis_title='X Axis',  # Title for the X-axis
                      yaxis_title='Y Axis',  # Title for the Y-axis
                      zaxis_title='Z Axis'   # Title for the Z-axis
                  ))

fig.show()

In [32]:
mean = np.mean(original_data, axis = 0)
print(mean)
centered_data = original_data - mean
#Centered data visualizing
x = centered_data[:,0]
y = centered_data[:,1]
z = centered_data[:,2]

# Create a 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=x,                    # X-axis data
    y=y,                    # Y-axis data
    z=z,                    # Z-axis data
    mode='markers',         # Use markers to represent data points
    marker=dict(
        size=1.5,           # Size of the markers
        opacity=0.8,        # Marker opacity
        color='royalblue'   # Color of the markers
    )
)])

# Set the layout of the plot
fig.update_layout(title='Centered data (3D)',
                  scene=dict(
                      xaxis_title='X Axis',  # Title for the X-axis
                      yaxis_title='Y Axis',  # Title for the Y-axis
                      zaxis_title='Z Axis'   # Title for the Z-axis
                  ))

fig.show()

[6.97217221 8.02277927 9.02222836]


In [48]:
#Find Principal component by using covaiance, linear algebera.eig, to detect eigenvalues, eigenvectors
def compute_principal_component(feature):
    # Calculate the covariance matrix of the feature set
    Sigma = np.cov(feature.T)

    # Compute eigenvalues and eigenvectors of the covariance matrix
    eig_val, eig_vec = np.linalg.eig(Sigma)

    # Sort eigenvalues and corresponding eigenvectors in decreasing order
    idx = eig_val.argsort()[::-1]
   # print(eig_val)
    # Reorder eigenvalues and eigenvectors according to sorted indices
    eig_val = eig_val[idx]
    eig_vec = eig_vec[:, idx]
   # print(eig_val)
    
    # Calculate the first three principal components by multiplying each eigenvalue with its corresponding eigenvector
    principal_component_1 = eig_val[0] * eig_vec[:, 0]
    principal_component_2 = eig_val[1] * eig_vec[:, 1]
    principal_component_3 = eig_val[2] * eig_vec[:, 2]
    
    pca_vector = []
    pca_vector.append(principal_component_1)
    pca_vector.append(principal_component_2)
    pca_vector.append(principal_component_3)
    
    pca_data = np.dot(feature,pca_vector)
    return (pca_data,principal_component_1,principal_component_2,principal_component_3)

# Compute the principal components from the centered data
pca_data,pc1, pc2, pc3 = compute_principal_component(centered_data)

# Print the first three principal components
print(f"Principal component 1: {pc1}")
print(f"Principal component 2: {pc2}")
print(f"Principal component 3: {pc3}")

Principal component 1: [ -0.70651561 -12.32974953   0.84212788]
Principal component 2: [ 5.39131872 -0.26598476  0.62879447]
Principal component 3: [ 0.12602502 -0.08343377 -1.11583861]


In [49]:
#PCA data visualizing
x = pca_data[:,0]
y = pca_data[:,1]
z = pca_data[:,2]

fig = go.Figure()

for idx, pc in enumerate([pc1, pc2, pc3]):
    # Add a 3D scatter trace representing each principal component as a line with a marker
    fig.add_trace(go.Scatter3d(x=[0, pc[0]], y=[0, pc[1]], z=[0, pc[2]],
                               mode='lines+markers',  # Display as both line and marker
                               line=dict(width=5),  # Line width
                               marker=dict(size=3),  # Marker size
                               name=f'Principal component {idx + 1}'))  # Name it according to its order

# Add trace for the data points
fig.add_trace(go.Scatter3d(x=x, y=y, z=z, mode='markers',
    marker=dict(
        size=1.5,       # Marker size for data points
        opacity=0.5,    # Marker opacity for data points
    ),
    name='Data'         # Name of the trace
))

# Update the layout of the figure
fig.update_layout(title='Visualization of principal components and Pca_data',
                  scene=dict(aspectmode='cube',
                             xaxis=dict(range=[-10, 10], autorange=False),
                             yaxis=dict(range=[-10, 10], autorange=False),
                             zaxis=dict(range=[-10, 10], autorange=False)))
fig.show()

In [35]:
#ax + by + cz + d = 0 : plane
def projection_onto_z_plane(x, y, pc):
    _z = -(pc[0] * x + pc[1] * y) / pc[2]
    return _z

In [41]:
# Generate a grid for plotting a plane
x_plane, y_plane = np.meshgrid(np.linspace(-10, 10), np.linspace(-10, 10))
# Calculate z-values for the plane using the projection_onto_z_plane function
z_plane = projection_onto_z_plane(x_plane, y_plane, pc3)

# Initialize the figure and add a surface trace for the plane
fig = go.Figure(data=[go.Surface(
    x=x_plane, y=y_plane, z=z_plane,
    opacity=0.5,
    colorscale=[[0, 'rgb(150,150,150)'], [1, 'rgb(150,150,150)']],  # Set to gray color
    showscale=False  # Hide the color scale
)])

# Extract x, y, z components of the centered data
proj_x, proj_y, proj_z = centered_data.T
# Calculate z-values for the data points' projection onto the plane
proj_z = projection_onto_z_plane(proj_x, proj_y, pc3)

# Add a scatter trace for the projected data points
fig.add_trace(go.Scatter3d(x=proj_x, y=proj_y, z=proj_z, mode='markers',
    marker=dict(
        size=1.5,
        opacity=0.8,
        color='tomato'
    ),
    name='Data',
    showlegend=False
))

# Update the layout to customize the plot appearance
fig.update_layout(title='PCA result (3D to 2D)',
                  scene=dict(aspectmode='cube',
                             xaxis=dict(range=[-10, 10], autorange=False),
                             yaxis=dict(range=[-10, 10], autorange=False),
                             zaxis=dict(range=[-10, 10], autorange=False)))

fig.show()
