In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

In [None]:
def scatter_plot(data, title=None, x_label=None, y_label=None):
    # plt.figure(figsize=(10, 7))
    plt.scatter(data[:,0], data[:,1], alpha=0.6, s=5)
    if title:
        plt.title(title)
    if x_label:
        plt.xlabel(x_label)
    if y_label:
        plt.ylabel(y_label)
    plt.show()

In [None]:
# Load the dataset
data = np.loadtxt("data/pca_data.txt")

In [None]:
# Standardize the data
mean = np.mean(data, axis=0)
std = np.std(data, axis=0)
standardized_data = (data - mean) / std

# Covariance matrix
cov_matrix = np.cov(standardized_data.T)

# eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

In [None]:
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

In [None]:
# Project Data
projected_data = np.dot(standardized_data, eigenvectors[:, :2])
scatter_plot(projected_data, "PCA: 2D Scatter Plot", "Principal Component 1", "Principal Component 2")

In [None]:
# Make UMAP plot
import umap
mapper = umap.UMAP(random_state=42)
umap_data = mapper.fit_transform(data)
scatter_plot(umap_data, "UMAP: 2D Scatter Plot", "UMAP 1", "UMAP 2")


In [None]:
# https://www.datacamp.com/tutorial/introduction-t-sne

# Create a t-SNE instance
tsne = TSNE(n_components=2, random_state=42)

# Fit and transform the data
tsne_embedding = tsne.fit_transform(data)

scatter_plot(tsne_embedding, "t-SNE: 2D Scatter Plot", "t-SNE 1", "t-SNE 2")