In [1]:
!pip install umap-learn
!pip install plotly

Collecting umap-learn
  Downloading umap_learn-0.5.7-py3-none-any.whl.metadata (21 kB)
Collecting pynndescent>=0.5 (from umap-learn)
  Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)
Downloading umap_learn-0.5.7-py3-none-any.whl (88 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/88.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.8/88.8 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynndescent, umap-learn
Successfully installed pynndescent-0.5.13 umap-learn-0.5.7


In [4]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA, FactorAnalysis
from sklearn.manifold import TSNE, MDS, Isomap, LocallyLinearEmbedding
import umap.umap_ as umap
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
import plotly.express as px

# Load example datasets
digits = datasets.load_digits()
iris = datasets.load_iris()
faces = datasets.fetch_olivetti_faces()

In [5]:
# LLE on Digits Dataset
lle = LocallyLinearEmbedding(n_components=2)
digits_lle = lle.fit_transform(digits.data)
fig = px.scatter(x=digits_lle[:, 0], y=digits_lle[:, 1], color=digits.target.astype(str))
fig.show()


In [6]:
# t-SNE on Iris Dataset
tsne = TSNE(n_components=2, random_state=42)
iris_tsne = tsne.fit_transform(iris.data)
fig = px.scatter(x=iris_tsne[:, 0], y=iris_tsne[:, 1], color=iris.target_names[iris.target])
fig.show()

In [7]:
# ISOMAP on Faces Dataset
isomap = Isomap(n_components=2)
faces_isomap = isomap.fit_transform(faces.data)
fig = px.scatter(x=faces_isomap[:, 0], y=faces_isomap[:, 1], color=faces.target.astype(str))
fig.show()


The number of connected components of the neighbors graph is 4 > 1. Completing the graph to fit Isomap might be slow. Increase the number of neighbors to avoid this issue.


Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.


Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.


Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.


Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.


Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.


Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.



In [8]:
# UMAP on Digits Dataset
reducer = umap.UMAP()
digits_umap = reducer.fit_transform(digits.data)
fig = px.scatter(x=digits_umap[:, 0], y=digits_umap[:, 1], color=digits.target.astype(str))
fig.show()

In [9]:
# MDS on Iris Dataset
mds = MDS(n_components=2)
iris_mds = mds.fit_transform(iris.data)
fig = px.scatter(x=iris_mds[:, 0], y=iris_mds[:, 1], color=iris.target_names[iris.target])
fig.show()

In [10]:
# PCA with randomized solver on Faces Dataset
randomized_pca = PCA(n_components=2, svd_solver='randomized')
faces_pca = randomized_pca.fit_transform(faces.data)
fig = px.scatter(x=faces_pca[:, 0], y=faces_pca[:, 1], color=faces.target.astype(str))
fig.show()

In [11]:
# Kernel PCA on Digits Dataset
kernel_pca = KernelPCA(n_components=2, kernel='rbf', gamma=15)
digits_kpca = kernel_pca.fit_transform(digits.data)
fig = px.scatter(x=digits_kpca[:, 0], y=digits_kpca[:, 1], color=digits.target.astype(str))
fig.show()

In [12]:
# Incremental PCA on Iris Dataset
incremental_pca = IncrementalPCA(n_components=2)
iris_inc_pca = incremental_pca.fit_transform(iris.data)
fig = px.scatter(x=iris_inc_pca[:, 0], y=iris_inc_pca[:, 1], color=iris.target_names[iris.target])
fig.show()

In [13]:
# Factor Analysis on Faces Dataset
factor_analysis = FactorAnalysis(n_components=2)
faces_fa = factor_analysis.fit_transform(faces.data)
fig = px.scatter(x=faces_fa[:, 0], y=faces_fa[:, 1], color=faces.target.astype(str))
fig.show()

In [14]:
# Autoencoder on Digits Dataset
input_img = Input(shape=(64,))
encoded = Dense(32, activation='relu')(input_img)
encoded = Dense(16, activation='relu')(encoded)
decoded = Dense(32, activation='relu')(encoded)
decoded = Dense(64, activation='sigmoid')(decoded)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.fit(digits.data, digits.data, epochs=50, batch_size=256, shuffle=True, verbose=0)

# Using encoder part to reduce dimensionality
encoder = Model(input_img, encoded)
digits_encoded = encoder.predict(digits.data)
fig = px.scatter(x=digits_encoded[:, 0], y=digits_encoded[:, 1], color=digits.target.astype(str))
fig.show()

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
