# Recitation 9

UTDallas CS 4375, taught by Dr. Ruozzi

Recitation with Jim Amato

#### Jim's notes to self for eventual updates:

* Better connection between math in slides and code here
* Better coordination between this notebook and next week's
* Todo: review this notebook more

#$\color{blue}{\text{Today:}}$

* Any requests?
* PCA

#$\color{blue}{\text{Problem 1:}} \text{ PCA in 2D}$

##$\color{blue}{\text{Step 1:}} \text{ Make Data}$

First, we'll generate some data.

I make some random multivariate data with 0 mean and unit covariance.

Then the data is scaled and rotated. The scaling produces an obvious preferred axis. The rotation means the preferred axis is not one of the features.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def generate_and_rotate_data(n_samples=50):
    """
    Generate 2D data, then scale and rotate in the plane.
    """
    # Generate random data
    np.random.seed(0)
    mean = [0, 0]
    covariance = [[1, 0], [0, 1]]  # Identity covariance to start
    X = np.random.multivariate_normal(mean, covariance, n_samples)

    X_base = X.copy()

    scaling_matrix = np.array([[2, 0],
                              [0, 10]])
    X = X.dot(scaling_matrix)

    theta = np.radians(72)
    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)],
                                [np.sin(theta), np.cos(theta)]])
    X = X.dot(rotation_matrix)

    return X, X_base

Now let's look at it.

In [None]:
def visualize_2d(data, colors, eigenvectors=None, ax=None):
    if ax is None:
        fig, ax = plt.subplots(figsize=(7, 7))  # Setting the figure size to make it more square

    ax.scatter(data[:, 0], data[:, 1], c=colors)

    # Determine the range to set the axis limits
    max_val = max(data.max(), -data.min()) + 1
    ax.set_xlim([-max_val, max_val])
    ax.set_ylim([-max_val, max_val])

    if eigenvectors is not None:
        # Plot the principal components
        scale = 3  # to scale the principal component vectors for better visualization
        ax.quiver(0, 0, eigenvectors[0, 0]*scale, eigenvectors[1, 0]*scale, angles='xy', scale_units='xy', color='black')
        ax.quiver(0, 0, eigenvectors[0, 1]*scale, eigenvectors[1, 1]*scale, angles='xy', scale_units='xy', color='gray')

    ax.set_xlabel('Feature 1')
    ax.set_ylabel('Feature 2')
    plt.title('2D Data with Principal Components')
    ax.grid(True)
    ax.axhline(0, color='black',linewidth=0.5)
    ax.axvline(0, color='black',linewidth=0.5)
    ax.set_aspect('equal', adjustable='box')

In [None]:
def show_2d_data_base():
    X, _ = generate_and_rotate_data()
    colors_list = ['red', 'blue', 'green', 'orange', 'purple']
    data_colors = np.array(colors_list * (len(X) // len(colors_list) + 1))[:len(X)]
    visualize_2d(X, data_colors)

In [None]:
show_2d_data_base()

Perhaps you'd prefer real data. Feature 1 is how much caffeine I've consumed. Feature 2 is the level of nonsense I spew. Conclusion 1: Caffiene makes Jim silly.

##$\color{blue}{\text{Step 2:}} \text{ Easy PCA: Calculating Eigenthings}$

Now we need to compute the eignevalues and eigenvectors for the data. First, we'll do this the easy way, with Numpy.

###$\text{Template}$

I'll use some combination of these numpy functions:
* [np.cov](https://numpy.org/doc/stable/reference/generated/numpy.cov.html)
* [np.linalg.eigh](https://numpy.org/doc/stable/reference/generated/numpy.linalg.eigh.html)
* [NDArray.argsort](https://numpy.org/doc/stable/reference/generated/numpy.argsort.html)

In [None]:
def compute_eigen(data):
    """
    Compute the eigenvalues and eigenvectors.
    """
    cov = np.cov(data)
    eigenvalues, eigenvectors = np.linalg.eigh(cov)

    idx = eigenvalues.argsort()[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    return eigenvalues, eigenvectors

For some particular arrays, we know eigenvalues and eigenvectors. Are we getting what we expect?


In [None]:
def show_intermediate_eigenstuff(m, eigenvalues, eigenvectors):
    print(f"We expect eigenvalues {eigenvalues} and eigenvectors {eigenvectors} for ")
    print(m)
    _, __ = compute_eigen(m)

def helper():
    m1 = np.array([[1, 0],
                   [0, 1]])
    m1_vals = (1, 1)
    m1_vecs = [[1, 0], [0, 1]]
    show_intermediate_eigenstuff(m1, m1_vals, m1_vecs)

    isqrt2 = 1/np.sqrt(2)
    m2 = np.array([[1, 1],
                   [1, 1]])
    m2_vals = (0, 2)
    m2_vecs = [[-isqrt2, isqrt2], [isqrt2, isqrt2]]
    show_intermediate_eigenstuff(m2, m2_vals, m2_vecs)

    m3 = np.array([[2, 1],
                   [1, 2]])
    m3_vals = (1, 3)
    m3_vecs = [[-isqrt2, isqrt2], [isqrt2, isqrt2]]
    show_intermediate_eigenstuff(m3, m3_vals, m3_vecs)

helper()

###$\text{Completed}$

<!-- cheater -->

In [None]:
def compute_eigen(data):
    """
    Compute the eigenvalues and eigenvectors.
    """
    covariance_matrix = np.cov(data, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

    # Sort the eigenvectors by eigenvalues in descending order
    idx = eigenvalues.argsort()[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    return eigenvalues, eigenvectors

###$\text{Showcase Code}$

In [None]:
colors_list = ['red', 'blue', 'green', 'orange', 'purple']
X, _ = generate_and_rotate_data()
data_colors = np.array(colors_list * (len(X) // len(colors_list) + 1))[:len(X)]
np.random.shuffle(data_colors)

def showcase_2d_p1_np():
    eigenvalues, eigenvectors = compute_eigen(X)
    visualize_2d(X, data_colors, eigenvectors)

###$\text{Showcase}$


In [None]:
showcase_2d_p1_np()

##$\color{blue}{\text{Step 3:}} \text{ Easy PCA: Compressing Data}$

Usually, we use PCA to distill a dataset to it's more essential elements.

###$\text{Template}$

In [None]:
def project_onto_component(data, vector):
    return data.dot(vector)

###$\text{Completed}$

In [None]:
def project_onto_component(data, vector):
    projected_data = data.dot(vector)
    return projected_data

###$\text{Showcase Code}$

In [None]:
def showcase_2d_p2_np():
    eigenvalues, eigenvectors = compute_eigen(X)
    eigenvectors = -1*eigenvectors
    project_X = project_onto_component(X, eigenvectors[0])
    project_X_2D = np.column_stack((project_X, np.zeros(project_X.shape)))

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
    visualize_2d(X[:10], data_colors[:10], eigenvectors, ax=ax1)
    visualize_2d(project_X_2D[:10], data_colors[:10], ax=ax2)

###$\text{Showcase}$

In [None]:
showcase_2d_p2_np()

##$\color{blue}{\text{Step 4:}} \text{ Easy PCA: Going back to the Feature Space}$

In [None]:
def showcase_2d_p3_np():
    eigenvalues, eigenvectors = compute_eigen(X)
    principal_eigenvector = eigenvectors[:, 0]
    project_X = project_onto_component(X, principal_eigenvector)
    reconstructed_data = project_X[:, np.newaxis] * principal_eigenvector

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
    visualize_2d(X[:10], data_colors[:10], eigenvectors, ax=ax1)
    visualize_2d(reconstructed_data[:10], data_colors[:10], eigenvectors, ax=ax2)

In [None]:
showcase_2d_p3_np()

Alternatively (and equivalently), we can use [np.outer](https://numpy.org/doc/stable/reference/generated/numpy.outer.html)

In [None]:
def showcase_2d_p3_np():
    eigenvalues, eigenvectors = compute_eigen(X)
    principal_eigenvector = eigenvectors[:, 0]

    vvT = np.outer(principal_eigenvector, principal_eigenvector.T)

    reconstructed_data = X.dot(vvT)

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
    visualize_2d(X, data_colors, eigenvectors, ax=ax1)
    visualize_2d(reconstructed_data, data_colors, eigenvectors, ax=ax2)

showcase_2d_p3_np()

##$\color{blue}{\text{Step 5:}} \text{ PCA: Calculating Eigenstuff using the SVD}$

As mentioned in lecture, when we have a ton of data, computing the full covariance matrix is burdensome. Instead we calculate the singular value decomposition.

We use [np.linalg.svd](https://numpy.org/doc/stable/reference/generated/numpy.linalg.svd.html) for this purpose.

###$\text{Template}$

In [None]:
def compute_eigen_via_svd(W):

    return eigenvalues, eigenvectors

###$\text{Completed}$

In [None]:
def compute_eigen_via_svd(W):
    # Compute the SVD
    U, s, Vt = np.linalg.svd(W, full_matrices=False)

    # The eigenvalues are the squared singular values
    eigenvalues = s**2

    # The eigenvectors are the columns of Vt
    eigenvectors = Vt

    # Sort the eigenvalues and eigenvectors by descending eigenvalue
    sort_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sort_indices]
    eigenvectors = eigenvectors[:, sort_indices]

    return eigenvalues, eigenvectors

###$\text{Showcase Code}$

In [None]:
def showcase_2d_p4_np():
    eigenvalues, eigenvectors = compute_eigen(X)
    print(eigenvectors.shape)
    eigenvalues, eigenvectors = compute_eigen_via_svd(X)
    print(eigenvectors.shape)
    principal_eigenvector = eigenvectors[:, 0]
    project_X = project_onto_component(X, principal_eigenvector)
    reconstructed_data = project_X[:, np.newaxis] * principal_eigenvector

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
    visualize_2d(X, data_colors, eigenvectors, ax=ax1)
    visualize_2d(reconstructed_data, data_colors, eigenvectors, ax=ax2)

###$\text{Showcase}$

In [None]:
showcase_2d_p4_np()

#$\color{blue}{\text{Problem 2:}} \text{ PCA in 3D}$

##$\color{blue}{\text{Step 1:}} \text{ Make Data}$

As before, I'll generate some data.

In [None]:
def generate_and_transform_data(n_samples=50, theta=np.pi/4, phi=np.pi/6):
    np.random.seed(0)  # for reproducibility
    mean = [0, 0, 0]
    X = np.random.multivariate_normal(mean, np.identity(3), n_samples)

    # Define the scaling matrix
    S = np.array([
        [0.1, 0, 0],
        [0, 0.3, 0],
        [0, 0, 1]
    ])
    # Define the rotation matrices
    Rx = np.array([
        [1, 0, 0],
        [0, np.cos(theta), -np.sin(theta)],
        [0, np.sin(theta), np.cos(theta)]
    ])
    Ry = np.array([
        [np.cos(phi), 0, np.sin(phi)],
        [0, 1, 0],
        [-np.sin(phi), 0, np.cos(phi)]
    ])

    # Composite transformation (scaling followed by rotation)
    T = S.dot(Rx).dot(Ry)
    # T = Ry.dot(Rx).dot(S)

    # Transform the data
    X_transformed = X.dot(T)

    return X_transformed

Now we'll look at it with Plotly.

In [None]:
import plotly.graph_objs as go

In [None]:
def visualize_3d(data, colors, eigenvectors=None, title="3D Data with Principal Components"):
    fig = go.Figure()

    # Add the scatter plot for data points
    fig.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=data[:,2],
                               mode='markers',
                               marker=dict(size=5, color=colors, opacity=0.8)))

    # Add principal component vectors
    if eigenvectors is not None:
        for vector, color in zip(eigenvectors.T, ['red', 'green', 'blue']):
            fig.add_trace(go.Scatter3d(x=[0, vector[0]*3],
                                      y=[0, vector[1]*3],
                                      z=[0, vector[2]*3],
                                      mode='lines',
                                      line=dict(color=color, width=6)))

    fig.update_layout(title=title, height=1000, scene=dict(aspectmode='cube',
                                                xaxis=dict(range=[-4, 4]),
                                                yaxis=dict(range=[-4, 4]),
                                                zaxis=dict(range=[-4, 4])))
    fig.show(height=1000)


In [None]:
X = generate_and_transform_data()
colors_list = ['red', 'blue', 'green', 'orange', 'purple']
data_colors = np.array(colors_list * (len(X) // len(colors_list) + 1))[:len(X)]
np.random.shuffle(data_colors)


def showcase_3d_p1():
    visualize_3d(X, data_colors)

In [None]:
showcase_3d_p1()

##$\color{blue}{\text{Step 2:}} \text{ Calculating Eigenstuff}$

In [None]:
def showcase_3d_p2():
    eigenvalues, eigenvectors = compute_eigen(X)
    visualize_3d(X, data_colors, eigenvectors)

In [None]:
showcase_3d_p2()

##$\color{blue}{\text{Step 3:}} \text{ Projecting to Components}$

In [None]:
def drop_and_project(data, eigenvectors, component_to_drop=2):
    # Remove the component with smallest eigenvalue (default is the last component)
    projection_matrix = np.delete(eigenvectors, component_to_drop, axis=1)

    # Project data onto the subspace of the remaining components
    return data.dot(projection_matrix)

In [None]:
def visualize_2d(data, eigenvectors, colors):
    plt.scatter(data[:, 0], data[:, 1], c=colors)

    # Determine the range to set the axis limits
    max_val = max(data.max(), -data.min()) + 1
    plt.xlim([-max_val, max_val])
    plt.ylim([-max_val, max_val])

    # Plot the principal components
    origin = [0, 0]  # Origin point
    plt.quiver(*origin, 1, 0, angles='xy', scale_units='xy', scale=1, color='red')
    plt.quiver(*origin, 0, 1, angles='xy', scale_units='xy', scale=1, color='green')

    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.title('2D PCA result')
    plt.grid(True)
    plt.axhline(0, color='black',linewidth=0.5)
    plt.axvline(0, color='black',linewidth=0.5)
    plt.gca().set_aspect('equal', adjustable='box')  # Set aspect ratio to equal for true representation
    plt.show()

In [None]:
def showcase_3d_p3():
    eigenvalues, eigenvectors = compute_eigen(X)
    X_2d = drop_and_project(X, eigenvectors)
    visualize_2d(X_2d, eigenvectors, data_colors)

In [None]:
showcase_3d_p3()

##$\color{blue}{\text{Step 4:}} \text{ Projecting Compressed Data back to Feature Space}$

In [None]:
def project_to_original_space(data, eigenvectors):
    # Assuming the eigenvectors are sorted by eigenvalue in descending order
    # and you've already dropped the smallest component in the 2D representation
    major_eigenvectors = eigenvectors[:, :2]

    # Transform the 2D data back to the original 3D space
    transformed_data = data.dot(major_eigenvectors.T)

    return transformed_data

In [None]:
def showcase_3d_p4():
    eigenvalues, eigenvectors = compute_eigen(X)
    X_2d = drop_and_project(X, eigenvectors)
    X_3d_projected = project_to_original_space(X_2d, eigenvectors)
    visualize_3d(X_3d_projected, data_colors, eigenvectors)

In [None]:
showcase_3d_p4()

Or, again, we could more simply use what we learned in lecture to do this.

In [None]:
def flatten_in_feature_space(data, eigenvectors, k):
    # Select the first k eigenvectors to form the projection matrix W
    W = eigenvectors[:, :k]

    # Reconstruct the data back in the original feature space using XWW^T
    reconstructed_data = data @ W @ W.T

    return reconstructed_data

In [None]:
def showcase_3d_p4():
    eigenvalues, eigenvectors = compute_eigen(X)
    X_3d_projected = flatten_in_feature_space(X, eigenvectors, 2)
    visualize_3d(X_3d_projected, data_colors, eigenvectors)

In [None]:
showcase_3d_p4()

#$\color{blue}{\text{Dev Junk}}$

In [None]:
# import numpy as np
# import matplotlib.pyplot as plt
# import plotly.graph_objs as go


# def generate_and_transform_data(n_samples=50, theta=np.pi/4, phi=np.pi/6):
#     np.random.seed(0)  # for reproducibility

#     # Generate data with identity covariance matrix
#     mean = [0, 0, 0]
#     X = np.random.multivariate_normal(mean, np.identity(3), n_samples)

#     # Define the scaling matrix
#     S = np.array([
#         [0.1, 0, 0],
#         [0, 0.3, 0],
#         [0, 0, 1]
#     ])

#     # Define the rotation matrices

#     # Rotation around the x-axis
#     Rx = np.array([
#         [1, 0, 0],
#         [0, np.cos(theta), -np.sin(theta)],
#         [0, np.sin(theta), np.cos(theta)]
#     ])

#     # Rotation around the y-axis
#     Ry = np.array([
#         [np.cos(phi), 0, np.sin(phi)],
#         [0, 1, 0],
#         [-np.sin(phi), 0, np.cos(phi)]
#     ])

#     # Composite transformation (scaling followed by rotation)
#     T = Ry.dot(Rx).dot(S)

#     # Transform the data
#     X_transformed = X.dot(T)

#     return X_transformed


# def compute_eigen(data):
#     # Compute the covariance matrix
#     cov_matrix = np.cov(data, rowvar=False)

#     # Compute eigenvalues and eigenvectors
#     eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

#     # Sort eigenvalues and eigenvectors in decreasing order
#     idx = eigenvalues.argsort()[::-1]
#     eigenvalues = eigenvalues[idx]
#     eigenvectors = eigenvectors[:,idx]

#     return eigenvalues, eigenvectors


# def drop_and_project(data, eigenvectors, component_to_drop=2):
#     # Remove the component with smallest eigenvalue (default is the last component)
#     projection_matrix = np.delete(eigenvectors, component_to_drop, axis=1)

#     # Project data onto the subspace of the remaining components
#     return data.dot(projection_matrix)


# def visualize_2d(data, eigenvectors, colors):
#     plt.scatter(data[:, 0], data[:, 1], c=colors)

#     # Determine the range to set the axis limits
#     max_val = max(data.max(), -data.min()) + 1
#     plt.xlim([-max_val, max_val])
#     plt.ylim([-max_val, max_val])

#     # Plot the principal components
#     origin = [0, 0]  # Origin point
#     plt.quiver(*origin, 1, 0, angles='xy', scale_units='xy', scale=1, color='red')
#     plt.quiver(*origin, 0, 1, angles='xy', scale_units='xy', scale=1, color='green')

#     plt.xlabel('Principal Component 1')
#     plt.ylabel('Principal Component 2')
#     plt.title('2D PCA result')
#     plt.grid(True)
#     plt.axhline(0, color='black',linewidth=0.5)
#     plt.axvline(0, color='black',linewidth=0.5)
#     plt.gca().set_aspect('equal', adjustable='box')  # Set aspect ratio to equal for true representation
#     plt.savefig("2D_data.png")
#     plt.close()


# def visualize_3d(data, eigenvectors, colors, title="3D Data with Principal Components"):
#     fig = go.Figure()

#     # Add the scatter plot for data points
#     fig.add_trace(go.Scatter3d(x=data[:,0], y=data[:,1], z=data[:,2],
#                                mode='markers',
#                                marker=dict(size=5, color=colors, opacity=0.8)))

#     # Add principal component vectors
#     for vector, color in zip(eigenvectors.T, ['red', 'green', 'blue']):
#         fig.add_trace(go.Scatter3d(x=[0, vector[0]*3],
#                                    y=[0, vector[1]*3],
#                                    z=[0, vector[2]*3],
#                                    mode='lines',
#                                    line=dict(color=color, width=6)))

#     fig.update_layout(title=title, scene=dict(aspectmode='cube',
#                                               xaxis=dict(range=[-4, 4]),
#                                               yaxis=dict(range=[-4, 4]),
#                                               zaxis=dict(range=[-4, 4])))
#     fig.show()


# def project_to_original_space(data, eigenvectors):
#     # Assuming the eigenvectors are sorted by eigenvalue in descending order
#     # and you've already dropped the smallest component in the 2D representation
#     major_eigenvectors = eigenvectors[:, :2]

#     # Transform the 2D data back to the original 3D space
#     transformed_data = data.dot(major_eigenvectors.T)

#     return transformed_data


# def main():
#     X = generate_and_rotate_data()
#     colors_list = ['red', 'blue', 'green', 'orange', 'purple']
#     data_colors = np.array(colors_list * (len(X) // len(colors_list) + 1))[:len(X)]
#     np.random.shuffle(data_colors)

#     eigenvalues, eigenvectors = compute_eigen(X)
#     X_2d = drop_and_project(X, eigenvectors)
#     X_3d_projected = project_to_original_space(X_2d, eigenvectors)
#     visualize_2d(X_2d, eigenvectors, data_colors)
#     visualize_3d(X, eigenvectors, data_colors)
#     visualize_3d(X_3d_projected, eigenvectors, data_colors)


# if __name__ == "__main__":
#     main()


In [None]:
import numpy as np
import matplotlib.pyplot as plt


def generate_and_rotate_data(n_samples=50):
    """
    Generate 2D data, then scale and rotate in the plane.
    """
    # Generate random data
    mean = [0, 0]
    covariance = [[1, 0], [0, 1]]  # Identity covariance to start
    X = np.random.multivariate_normal(mean, covariance, n_samples)

    X2 = X.copy()

    # Apply scaling (e.g., 1:10 ratio)
    scaling_matrix = np.array([[2, 0],
                              [0, 10]])
    X = X.dot(scaling_matrix)

    # Apply rotation in the plane
    theta = np.radians(23)  # 45-degree rotation as an example
    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)],
                                [np.sin(theta), np.cos(theta)]])
    X = X.dot(rotation_matrix)

    return X, X2


def compute_eigen(data):
    """
    Compute the eigenvalues and eigenvectors.
    """
    covariance_matrix = np.cov(data, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

    # Sort the eigenvectors by eigenvalues in descending order
    idx = eigenvalues.argsort()[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    return eigenvalues, eigenvectors


def visualize_2d(data, eigenvectors, colors):
    plt.scatter(data[:, 0], data[:, 1], c=colors, edgecolor='k')

    # Plot the principal components
    scale = 3  # to scale the principal component vectors for better visualization
    plt.quiver(0, 0, eigenvectors[0, 0]*scale, eigenvectors[1, 0]*scale, angles='xy', scale_units='xy', color='black')
    plt.quiver(0, 0, eigenvectors[0, 1]*scale, eigenvectors[1, 1]*scale, angles='xy', scale_units='xy', color='gray')

    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title('2D Data with Principal Components')
    plt.grid(True)
    plt.axhline(0, color='black',linewidth=0.5)
    plt.axvline(0, color='black',linewidth=0.5)
    plt.gca().set_aspect('equal', adjustable='box')
    plt.show()


def main():
    X, X2 = generate_and_rotate_data()
    colors_list = ['red', 'blue', 'green', 'orange', 'purple']
    data_colors = np.array(colors_list * (len(X) // len(colors_list) + 1))[:len(X)]
    np.random.shuffle(data_colors)
    eigenvalues, eigenvectors = compute_eigen(X)
    visualize_2d(X, eigenvectors, data_colors)
    visualize_2d(X2, eigenvectors, data_colors)

main()



In [None]:
import numpy as np

np.random.seed(0)  # for reproducibility
mean = [0, 0, 0]
cov = [[0.1, 0.05, 0.02],  # Reduced variance along the first dimension
       [0.05, 3, 0.5],
       [0.02, 0.5, 1]]
X = np.random.multivariate_normal(mean, cov, 50)

# Rotation around the x-axis
theta = np.pi/4  # Example: 45-degree rotation
Rx = np.array([
    [1, 0, 0],
    [0, np.cos(theta), -np.sin(theta)],
    [0, np.sin(theta), np.cos(theta)]
])

# Rotation around the y-axis
phi = np.pi/6  # Example: 30-degree rotation
Ry = np.array([
    [np.cos(phi), 0, np.sin(phi)],
    [0, 1, 0],
    [-np.sin(phi), 0, np.cos(phi)]
])

# Composite rotation
R = Ry.dot(Rx)

X = X.dot(R)

covariance_matrix = np.cov(X.T)
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)


index_of_smallest = np.argmin(eigenvalues)
eigenvectors_dropped = np.delete(eigenvectors, index_of_smallest, axis=1)
X_pca = X.dot(eigenvectors_dropped)


import matplotlib.pyplot as plt

colors_options = plt.cm.tab10.colors
colors = [colors_options[i%10] for i in range(50)]

# Plot data
plt.scatter(X_pca[:, 0], X_pca[:, 1], color=colors)
plt.quiver(0, 0, eigenvectors_dropped[0, 0], eigenvectors_dropped[1, 0], angles='xy', scale_units='xy', scale=2, color='r')
plt.quiver(0, 0, eigenvectors_dropped[0, 1], eigenvectors_dropped[1, 1], angles='xy', scale_units='xy', scale=2, color='g')
plt.title('2D PCA')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.savefig('2D_PCA_plot.png')


import plotly.graph_objs as go

trace1 = go.Scatter3d(
    x=X[:, 0],
    y=X[:, 1],
    z=X[:, 2],
    mode='markers',
    marker=dict(size=6, color=colors, opacity=0.9, line=dict(width=0.5, color='white'))
)

# Eigenvectors as lines
lines = []
for vector, color in zip(eigenvectors.T, ['red', 'green', 'blue']):
    line = go.Scatter3d(
        x=[0, vector[0]*2],
        y=[0, vector[1]*2],
        z=[0, vector[2]*2],
        marker=dict(size=0),
        line=dict(width=6, color=color)
    )
    lines.append(line)

layout = go.Layout(
    margin=dict(l=0, r=0, b=0, t=0)
)

fig = go.Figure(data=[trace1] + lines, layout=layout)
fig.write_html("3D_PCA_plot.html")


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Function to generate random colors
def generate_random_colors(n):
    return [(np.random.rand(), np.random.rand(), np.random.rand()) for _ in range(n)]

# Step 1: Generate Sample Data
np.random.seed(0)  # for reproducibility
mean = [0, 0]
cov = [[1, 0.8], [0.8, 1]]  # diagonal covariance
X = np.random.multivariate_normal(mean, cov, 100)

colors = generate_random_colors(X.shape[0])

# Plot original data with random colors
for i, (x, y) in enumerate(X):
    plt.scatter(x, y, alpha=0.6, edgecolors="w", linewidth=0.5, color=colors[i])
plt.title('Original Data')
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()

# Step 2: Standardize Data
X_normalized = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

# Step 3: Compute the Covariance Matrix
covariance_matrix = np.cov(X_normalized.T)

# Step 4: Compute Eigenvalues and Eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

# Step 5: Choose Principal Components
# Sorting pairs of eigenvalues and eigenvectors
eigenpairs = [(eigenvalues[i], eigenvectors[:, i]) for i in range(len(eigenvalues))]
eigenpairs.sort(key=lambda x: x[0], reverse=True)

# Step 6: Transform Original Data
W = np.hstack((eigenpairs[0][1].reshape(2, 1),
               eigenpairs[1][1].reshape(2, 1)))
X_pca = X_normalized.dot(W)

# Plot transformed data with random colors
for i, (x, y) in enumerate(X_pca):
    plt.scatter(x, y, alpha=0.6, edgecolors="w", linewidth=0.5, color=colors[i])
plt.title('Transformed Data')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()


# Resources