In [34]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import Isomap, LocallyLinearEmbedding, TSNE, SpectralEmbedding, MDS
from sklearn.decomposition import KernelPCA
import umap.umap_ as umap
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objects as go
from utils import load_graph
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import jaccard_score

In [26]:
# Interactive 3D surface plot using Plotly
def plot_interactive_3d_surface(embedding, title):
    x = embedding[:, 0]
    y = embedding[:, 1]
    z = embedding[:, 2]

    fig = go.Figure(data=[go.Scatter3d(
        x=x, y=y, z=z,
        mode='markers',
        marker=dict(
            size=1.5,
            color=z,
            colorscale='matter',
            opacity=0.8
        )
    )])

    fig.update_layout(
        title=title,
        scene=dict(
            xaxis_title='x',
            yaxis_title='y',
            zaxis_title='z'
        )
    )

    fig.show()


def plot_interactive_3d_with_links(embedding, title, k=5):
    # Compute k-nearest neighbors
    nn = NearestNeighbors(n_neighbors=k, metric="euclidean")
    nn.fit(embedding)
    _, neighbors = nn.kneighbors(embedding)

    # Extract points
    x = embedding[:, 0]
    y = embedding[:, 1]
    z = embedding[:, 2]

    # Initialize the plot
    fig = go.Figure()

    # Plot points
    fig.add_trace(go.Scatter3d(
        x=x, y=y, z=z,
        mode='markers',
        marker=dict(
            size=3,
            color=z,
            colorscale='matter',
            opacity=0.8
        ),
        name='Points'
    ))

    # Plot lines for neighbors
    for i in range(len(embedding)):
        for j in neighbors[i]:
            # Line endpoints
            fig.add_trace(go.Scatter3d(
                x=[x[i], x[j]],
                y=[y[i], y[j]],
                z=[z[i], z[j]],
                mode='lines',
                line=dict(color='lightblue', width=1),
                name='Neighbor Link',
                showlegend=False
            ))

    # Layout settings
    fig.update_layout(
        title=title,
        scene=dict(
            xaxis_title='x',
            yaxis_title='y',
            zaxis_title='z'
        )
    )

    fig.show()


# Function to compute Jaccard similarity for nearest neighbors
def compute_jaccard_similarity(original_neighbors, reduced_neighbors, distance_matrix):
    # Ensure both are binary (1 for neighbor, 0 for not)
    jaccard_similarities = []
    for orig_neighbors, red_neighbors in zip(original_neighbors, reduced_neighbors):
        # Create binary vectors of size N
        orig_binary = np.zeros(len(distance_matrix))
        red_binary = np.zeros(len(distance_matrix))
        orig_binary[orig_neighbors] = 1
        red_binary[red_neighbors] = 1
        # Compute Jaccard similarity
        jaccard_similarities.append(jaccard_score(orig_binary, red_binary))
    return np.mean(jaccard_similarities)

# Function to find k-nearest neighbors in high-dimensional space
def get_nearest_neighbors(embedding, k):
    """Find the indices of k nearest neighbors for each point."""
    nn = NearestNeighbors(n_neighbors=k, metric="euclidean")
    nn.fit(embedding)
    _, indices = nn.kneighbors(embedding)
    return indices


# Load data

In [27]:
distance_matrix = np.load("/Users/damian_gerber/Library/Mobile Documents/com~apple~CloudDocs/01 - Master Mathematics/MIT/Thesis/thesis_dgerber/src/data/graph_data/D_mat.npy")
locations = np.load("/Users/damian_gerber/Library/Mobile Documents/com~apple~CloudDocs/01 - Master Mathematics/MIT/Thesis/thesis_dgerber/src/data/graph_data/locations.npy")
graph = load_graph("/Users/damian_gerber/Library/Mobile Documents/com~apple~CloudDocs/01 - Master Mathematics/MIT/Thesis/thesis_dgerber/src/data/graph_data/zurich.graphml")

In [28]:
locations = [[graph.nodes[location]["y"], graph.nodes[location]["x"]] for location in graph.nodes]
locations = np.array(locations)

In [29]:
# Number of nearest neighbors to check
k = 300

# Run the models

In [None]:
# Initialize dimensionality reduction techniques
isomap = Isomap(n_neighbors=k, n_components=3)
lle = LocallyLinearEmbedding(n_neighbors=k, n_components=3, method="standard")
tsne = TSNE(n_components=3, metric="precomputed", random_state=42, init="random", perplexity=k)
umap_reducer = umap.UMAP(n_neighbors=k, n_components=3, metric="precomputed", random_state=42)
laplacian = SpectralEmbedding(n_components=3, affinity='precomputed')
kernel_pca = KernelPCA(n_components=3, kernel='precomputed')
mds = MDS(n_components=3, dissimilarity='precomputed', random_state=42)
mds_nm = MDS(n_components=3, dissimilarity='precomputed', random_state=42, metric=False)
ltsa = LocallyLinearEmbedding(n_neighbors=k, n_components=3, method="ltsa")

# Apply dimensionality reduction
isomap_embedding = isomap.fit_transform(distance_matrix)
print("iso is done")
lle_embedding = lle.fit_transform(distance_matrix)
print("lle is done")
tsne_embedding = tsne.fit_transform(distance_matrix)
print("tsne is done")
umap_embedding = umap_reducer.fit_transform(distance_matrix)
print("umap is done")
laplacian_embedding = laplacian.fit_transform(distance_matrix)
print("laplacian is done")
kernel_pca_embedding = kernel_pca.fit_transform(distance_matrix)
print("kernel_pca is done")
mds_embedding = mds.fit_transform(distance_matrix)
print("mds is done")
mds_nm_embedding = mds_nm.fit_transform(distance_matrix)
print("mds_nm is done")
ltsa_embedding = ltsa.fit_transform(distance_matrix)
print("ltsa is done")

iso is done
lle is done
tsne is done



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


using precomputed metric; inverse_transform will be unavailable


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



umap is done
laplacian is done


# Comparison

In [40]:
# Plot surfaces
plot_interactive_3d_surface(isomap_embedding, "Isomap")
plot_interactive_3d_surface(lle_embedding, "Locally Linear Embedding (LLE)")
plot_interactive_3d_surface(tsne_embedding, "t-SNE")
plot_interactive_3d_surface(umap_embedding, "UMAP")
plot_interactive_3d_surface(laplacian_embedding, "Laplacian Eigenmaps")
plot_interactive_3d_surface(kernel_pca_embedding, "Kernel PCA")
plot_interactive_3d_surface(mds_embedding, "MDS")
plot_interactive_3d_surface(mds_nm_embedding, "MDS (Non-Metric)")
plot_interactive_3d_surface(ltsa_embedding, "Local Tangent Space Alignment (LTSA)")


In [32]:

# Find nearest neighbors in the original high-dimensional space
original_neighbors = get_nearest_neighbors(distance_matrix, k)

# Find nearest neighbors in each reduced space
isomap_neighbors = get_nearest_neighbors(isomap_embedding, k)
lle_neighbors = get_nearest_neighbors(lle_embedding, k)
tsne_neighbors = get_nearest_neighbors(tsne_embedding, k)
umap_neighbors = get_nearest_neighbors(umap_embedding, k)
laplacian_neighbors = get_nearest_neighbors(laplacian_embedding, k)

# Compute Jaccard similarities for each method
isomap_jaccard = compute_jaccard_similarity(original_neighbors, isomap_neighbors, distance_matrix)
lle_jaccard = compute_jaccard_similarity(original_neighbors, lle_neighbors, distance_matrix)
tsne_jaccard = compute_jaccard_similarity(original_neighbors, tsne_neighbors, distance_matrix)
umap_jaccard = compute_jaccard_similarity(original_neighbors, umap_neighbors, distance_matrix)
laplacian_jaccard = compute_jaccard_similarity(original_neighbors, laplacian_neighbors, distance_matrix)

# Print the Jaccard similarities
print(f"Isomap Jaccard Similarity: {isomap_jaccard}")
print(f"LLE Jaccard Similarity: {lle_jaccard}")
print(f"t-SNE Jaccard Similarity: {tsne_jaccard}")
print(f"UMAP Jaccard Similarity: {umap_jaccard}")
print(f"Laplacian Jaccard Similarity: {laplacian_jaccard}")

Isomap Jaccard Similarity: 0.8270701277674128
LLE Jaccard Similarity: 0.6326227236477693
t-SNE Jaccard Similarity: 0.601651600739334
UMAP Jaccard Similarity: 0.5211434930256684
Laplacian Jaccard Similarity: 0.12297674106947812
