In [1]:
import numpy as np
import time
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.manifold import MDS, Isomap, LocallyLinearEmbedding, SpectralEmbedding, TSNE
import plotly.express as px

# -------------------------------------
# FUNCTION: Detect solid or hollow via PCA
# -------------------------------------
def detect_solid_or_hollow(X, threshold=0.95, solid_relative_cutoff=0.25):
    pca = PCA()
    pca.fit(X)
    explained = np.cumsum(pca.explained_variance_ratio_)
    individual = pca.explained_variance_ratio_
    intrinsic_dim = np.argmax(explained >= threshold) + 1

    if intrinsic_dim >= 3 and len(individual) >= 3:
        ratio_3v12 = individual[2] / (individual[0] + individual[1])
        return "Solid" if ratio_3v12 >= solid_relative_cutoff else "Hollow"
    return "Hollow"

# -------------------------------------
# DR Methods generator
# -------------------------------------
def get_methods(n):
    return {
        "PCA": PCA(n_components=n),
        "MDS": MDS(n_components=n, random_state=42),
        "Isomap": Isomap(n_components=n, n_neighbors=10),
        "LLE": LocallyLinearEmbedding(n_components=n, n_neighbors=10, method='standard'),
        "Hessian LLE": LocallyLinearEmbedding(n_components=n, n_neighbors=10, method='hessian'),
        "Modified LLE": LocallyLinearEmbedding(n_components=n, n_neighbors=10, method='modified'),
        "Spectral Embedding": SpectralEmbedding(n_components=n, n_neighbors=10),
        "t-SNE": TSNE(n_components=n, perplexity=30, random_state=42),
        "LTSA": LocallyLinearEmbedding(n_components=n, n_neighbors=10, method='ltsa')
    }

# -------------------------------------
# Loop over 3D → 10D true random datasets
# -------------------------------------
results = {}
times = []
structure = []

for n_components in range(3, 11):
    print(f"\n🔢 Generating true random data of shape (1000, {n_components})")
    X = np.random.rand(1000, n_components)
    methods = get_methods(n_components)

    for name, model in methods.items():
        print(f"→ Running {name} ({n_components}D)", end=" ... ")
        try:
            start = time.time()
            X_trans = model.fit_transform(X)
            duration = time.time() - start
            results[(name, n_components)] = X_trans
            times.append((name, n_components, duration))
            print(f"✅ {duration:.3f}s")

            # Solid/Hollow classification (PCA only)
            if name == "PCA":
                label = detect_solid_or_hollow(X_trans)
                structure.append((n_components, label))

            # Plotting
            if X_trans.shape[1] >= 3:
                fig = px.scatter_3d(
                    x=X_trans[:, 0],
                    y=X_trans[:, 1],
                    z=X_trans[:, 2],
                    title=f"{name} ({n_components}D) - First 3 Components",
                    opacity=0.6
                )
                fig.show()
            else:
                plt.figure(figsize=(7, 6))
                plt.scatter(X_trans[:, 0], X_trans[:, 1], alpha=0.6)
                plt.title(f"{name} ({n_components}D) - First 2 Components")
                plt.xlabel("Component 1")
                plt.ylabel("Component 2")
                plt.grid(True)
                plt.tight_layout()
                plt.show()

        except Exception as e:
            print(f"❌ Failed: {e}")
            times.append((name, n_components, None))

# -------------------------------------
# Build Summary Tables
# -------------------------------------
time_df = pd.DataFrame(times, columns=["Method", "n_components", "Time (s)"])
pivot_df = time_df.pivot(index="Method", columns="n_components", values="Time (s)").round(4)

structure_df = pd.DataFrame(structure, columns=["n_components", "PCA Classification"])

# -------------------------------------
# Print Summaries
# -------------------------------------
print("\n⏱️ Execution Time Table:")
print(pivot_df)

print("\n🔍 PCA Solid/Hollow Classification:")
print(structure_df)



🔢 Generating true random data of shape (1000, 3)
→ Running PCA (3D) ... ✅ 0.001s


→ Running MDS (3D) ... ✅ 7.145s


→ Running Isomap (3D) ... ✅ 0.458s


→ Running LLE (3D) ... ✅ 0.100s


→ Running Hessian LLE (3D) ... ✅ 0.697s


→ Running Modified LLE (3D) ... ✅ 1.460s


→ Running Spectral Embedding (3D) ... ✅ 0.042s


→ Running t-SNE (3D) ... ✅ 6.300s


→ Running LTSA (3D) ... ✅ 0.949s



🔢 Generating true random data of shape (1000, 4)
→ Running PCA (4D) ... ✅ 0.002s


→ Running MDS (4D) ... ✅ 9.661s


→ Running Isomap (4D) ... ✅ 0.371s


→ Running LLE (4D) ... ✅ 0.152s


→ Running Hessian LLE (4D) ... ❌ Failed: for method='hessian', n_neighbors must be greater than [n_components * (n_components + 3) / 2]
→ Running Modified LLE (4D) ... ✅ 2.211s


→ Running Spectral Embedding (4D) ... ✅ 0.049s


→ Running t-SNE (4D) ... ❌ Failed: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.
→ Running LTSA (4D) ... ✅ 2.076s



🔢 Generating true random data of shape (1000, 5)
→ Running PCA (5D) ... ✅ 0.001s


→ Running MDS (5D) ... ✅ 14.857s


→ Running Isomap (5D) ... ✅ 0.869s


→ Running LLE (5D) ... ✅ 0.219s


→ Running Hessian LLE (5D) ... ❌ Failed: for method='hessian', n_neighbors must be greater than [n_components * (n_components + 3) / 2]
→ Running Modified LLE (5D) ... ✅ 2.634s


→ Running Spectral Embedding (5D) ... ✅ 0.073s


→ Running t-SNE (5D) ... ❌ Failed: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.
→ Running LTSA (5D) ... ✅ 1.759s



🔢 Generating true random data of shape (1000, 6)
→ Running PCA (6D) ... ✅ 0.001s


→ Running MDS (6D) ... ✅ 17.721s


→ Running Isomap (6D) ... ✅ 0.500s


→ Running LLE (6D) ... ✅ 0.206s


→ Running Hessian LLE (6D) ... ❌ Failed: for method='hessian', n_neighbors must be greater than [n_components * (n_components + 3) / 2]
→ Running Modified LLE (6D) ... ✅ 2.287s


→ Running Spectral Embedding (6D) ... ✅ 0.118s


→ Running t-SNE (6D) ... ❌ Failed: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.
→ Running LTSA (6D) ... ❌ Failed: Error in determining null-space with ARPACK. Error message: 'Factor is exactly singular'. Note that eigen_solver='arpack' can fail when the weight matrix is singular or otherwise ill-behaved. In that case, eigen_solver='dense' is recommended. See online documentation for more information.

🔢 Generating true random data of shape (1000, 7)
→ Running PCA (7D) ... ✅ 0.001s


→ Running MDS (7D) ... ✅ 19.967s


→ Running Isomap (7D) ... ✅ 0.778s


→ Running LLE (7D) ... ✅ 0.254s


→ Running Hessian LLE (7D) ... ❌ Failed: for method='hessian', n_neighbors must be greater than [n_components * (n_components + 3) / 2]
→ Running Modified LLE (7D) ... ✅ 2.714s


→ Running Spectral Embedding (7D) ... ✅ 0.175s


→ Running t-SNE (7D) ... ❌ Failed: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.
→ Running LTSA (7D) ... ❌ Failed: Error in determining null-space with ARPACK. Error message: 'Factor is exactly singular'. Note that eigen_solver='arpack' can fail when the weight matrix is singular or otherwise ill-behaved. In that case, eigen_solver='dense' is recommended. See online documentation for more information.

🔢 Generating true random data of shape (1000, 8)
→ Running PCA (8D) ... ✅ 0.001s


→ Running MDS (8D) ... ✅ 20.286s


→ Running Isomap (8D) ... ✅ 0.795s


→ Running LLE (8D) ... ✅ 0.465s


→ Running Hessian LLE (8D) ... ❌ Failed: for method='hessian', n_neighbors must be greater than [n_components * (n_components + 3) / 2]
→ Running Modified LLE (8D) ... ✅ 2.366s


→ Running Spectral Embedding (8D) ... ✅ 0.241s


→ Running t-SNE (8D) ... ❌ Failed: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.
→ Running LTSA (8D) ... ❌ Failed: Error in determining null-space with ARPACK. Error message: 'Factor is exactly singular'. Note that eigen_solver='arpack' can fail when the weight matrix is singular or otherwise ill-behaved. In that case, eigen_solver='dense' is recommended. See online documentation for more information.

🔢 Generating true random data of shape (1000, 9)
→ Running PCA (9D) ... ✅ 0.001s


→ Running MDS (9D) ... ✅ 24.386s


→ Running Isomap (9D) ... ✅ 0.785s


→ Running LLE (9D) ... ✅ 0.250s


→ Running Hessian LLE (9D) ... ❌ Failed: for method='hessian', n_neighbors must be greater than [n_components * (n_components + 3) / 2]
→ Running Modified LLE (9D) ... ✅ 2.649s


→ Running Spectral Embedding (9D) ... ✅ 0.204s


→ Running t-SNE (9D) ... ❌ Failed: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.
→ Running LTSA (9D) ... ✅ 1.836s



🔢 Generating true random data of shape (1000, 10)
→ Running PCA (10D) ... ✅ 0.001s


→ Running MDS (10D) ... ✅ 27.583s


→ Running Isomap (10D) ... ✅ 0.847s


→ Running LLE (10D) ... ✅ 0.551s


→ Running Hessian LLE (10D) ... ❌ Failed: for method='hessian', n_neighbors must be greater than [n_components * (n_components + 3) / 2]
→ Running Modified LLE (10D) ... 


invalid value encountered in scalar divide



✅ 1.778s


→ Running Spectral Embedding (10D) ... ✅ 0.684s


→ Running t-SNE (10D) ... ❌ Failed: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.
→ Running LTSA (10D) ... ✅ 2.472s



⏱️ Execution Time Table:
n_components            3       4        5        6        7        8   \
Method                                                                   
Hessian LLE         0.6974     NaN      NaN      NaN      NaN      NaN   
Isomap              0.4580  0.3705   0.8695   0.5005   0.7775   0.7954   
LLE                 0.1005  0.1518   0.2195   0.2055   0.2541   0.4648   
LTSA                0.9495  2.0760   1.7590      NaN      NaN      NaN   
MDS                 7.1451  9.6607  14.8571  17.7211  19.9669  20.2862   
Modified LLE        1.4599  2.2112   2.6340   2.2869   2.7144   2.3664   
PCA                 0.0011  0.0024   0.0013   0.0009   0.0012   0.0008   
Spectral Embedding  0.0423  0.0489   0.0732   0.1181   0.1754   0.2415   
t-SNE               6.2996     NaN      NaN      NaN      NaN      NaN   

n_components             9        10  
Method                                
Hessian LLE             NaN      NaN  
Isomap               0.7851   0.8472  
LLE