In [1]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.manifold import MDS, Isomap, LocallyLinearEmbedding, SpectralEmbedding, TSNE
import time

pi = np.pi

# -----------------------------
# SHAPE
# -----------------------------

# Cone
X = []
for i in range(1000):
    z = random.random()
    t = random.uniform(0, 1000 * pi * z)
    X.append([z * np.cos(t), z * np.sin(t), z])
data_cone = pd.DataFrame(X)

# Solid Cone
X = []
for _ in range(1000):  
    z = random.random() 
    r = z * np.sqrt(random.random())  
    theta = random.uniform(0, 2 * np.pi)
    x = r * np.cos(theta)
    y = r * np.sin(theta)
    X.append([x, y, z])
data_solidcone = pd.DataFrame(X, columns=['x', 'y', 'z'])

# Cube
X = []
for i in range(1000):
    x = random.random()
    y = random.random()
    z = random.random()
    X.append([x, y, z])
data_cube = pd.DataFrame(X)

# Pyramid
X = []
for i in range(1000):
    x = random.uniform(-1, 1)
    y = random.uniform(-1, 1)
    z = random.random()
    X.append([z * x, z * y, z])
data_pyra = pd.DataFrame(X)

# Open book
X = []
for i in range(1000):
    x = random.uniform(-1, 1)
    y = random.random()
    X.append([x, y, np.abs(x)])
data_book = pd.DataFrame(X)

# Hemisphere
X = []
for i in range(1000):
    z = random.random()
    a = np.sqrt(1 - z ** 2)
    t = random.uniform(0, 2 * pi)
    X.append([a * np.cos(t), a * np.sin(t), z])
data_hemi = pd.DataFrame(X)

# Half-ball
X = []
for i in range(1000):
    z = random.random()
    a = np.sqrt(1 - z ** 2)
    r = random.uniform(0, a)
    t = random.uniform(0, 2 * pi)
    X.append([r * np.cos(t), r * np.sin(t), z])
data_half = pd.DataFrame(X)



DR Reduction Function

In [2]:
import plotly.express as px
import matplotlib.pyplot as plt

def detect_solid_or_hollow(X, threshold=0.95, solid_relative_cutoff=0.25):
    """
    Robust solid/hollow detector.
    If PCA needs 3 components but the 3rd one adds little compared to the first two → Hollow.
    """
    from sklearn.decomposition import PCA
    pca = PCA()
    pca.fit(X)
    explained = np.cumsum(pca.explained_variance_ratio_)
    individual = pca.explained_variance_ratio_

    intrinsic_dim = np.argmax(explained >= threshold) + 1

    print("PCA variance ratio:", individual.round(4))
    print("Cumulative explained variance:", explained.round(4))
    print("→ Intrinsic dim to reach 95%:", intrinsic_dim)

    if intrinsic_dim >= 3 and len(individual) >= 3:
        ratio_3v12 = individual[2] / (individual[0] + individual[1])
        print(f"  → 3rd component ratio vs. first two: {ratio_3v12:.4f}")
        if ratio_3v12 < solid_relative_cutoff:
            return 2  # Hollow
        else:
            return 3  # Solid
    else:
        return 2  # Hollow




def run_dim_reductions(X, shape_name="Shape", n_components=None):
    if n_components is None:
        n_components = min(5, X.shape[1])  # auto-select up to 5D max (safe for t-SNE etc.)

    methods = {
        "PCA": PCA(n_components=n_components),
        "MDS": MDS(n_components=n_components, random_state=42, normalized_stress='auto'),
        "Isomap": Isomap(n_components=n_components, n_neighbors=10),
        "LLE": LocallyLinearEmbedding(n_components=n_components, n_neighbors=10, method='standard'),
        "Hessian LLE": LocallyLinearEmbedding(n_components=n_components, n_neighbors=10, method='hessian'),
        "Modified LLE": LocallyLinearEmbedding(n_components=n_components, n_neighbors=10, method='modified'),
        "Spectral Embedding": SpectralEmbedding(n_components=n_components, n_neighbors=10),
        "t-SNE": TSNE(n_components=n_components, perplexity=30, random_state=42),
        "LTSA": LocallyLinearEmbedding(n_components=n_components, n_neighbors=10, method='ltsa')
    }

    results = {}
    times = {}

    for name, model in methods.items():
        start = time.time()
        X_trans = model.fit_transform(X)
        duration = time.time() - start

        results[name] = X_trans
        times[name] = duration

        # Plot
        if X_trans.shape[1] == 2:
            plt.figure(figsize=(8, 6))
            plt.scatter(X_trans[:, 0], X_trans[:, 1], alpha=0.7)
            plt.title(f"{shape_name} - {name}")
            plt.xlabel("Component 1")
            plt.ylabel("Component 2")
            plt.grid(True)
            plt.tight_layout()
            plt.show()

        elif X_trans.shape[1] == 3:
            fig = px.scatter_3d(
                x=X_trans[:, 0],
                y=X_trans[:, 1],
                z=X_trans[:, 2],
                title=f"{shape_name} - {name}",
                opacity=0.7
            )
            fig.show()

    return results, times


APPLY TO ALL SHAPES


In [3]:


shape_datasets = {
    "Cone": data_cone,
    "Solid Cone": data_solidcone,
    "Cube": data_cube,
    "Pyramid": data_pyra,
    "Open Book": data_book,
    "Hemisphere": data_hemi,
    "Half Ball": data_half
}

all_times = []

for name, df in shape_datasets.items():
    print(f"\n--- Processing: {name} ---")

    n_components = detect_solid_or_hollow(df.values)
    print(f"  → Detected {'Solid' if n_components == 3 else 'Hollow'}: reducing to {n_components}D")

    results, times = run_dim_reductions(df.values, shape_name=name, n_components=n_components)

    for method, t in times.items():
        all_times.append((name, method, t))






--- Processing: Cone ---
PCA variance ratio: [0.4107 0.3792 0.2101]
Cumulative explained variance: [0.4107 0.7899 1.    ]
→ Intrinsic dim to reach 95%: 3
  → 3rd component ratio vs. first two: 0.2659
  → Detected Solid: reducing to 3D



--- Processing: Solid Cone ---
PCA variance ratio: [0.3444 0.3398 0.3158]
Cumulative explained variance: [0.3444 0.6842 1.    ]
→ Intrinsic dim to reach 95%: 3
  → 3rd component ratio vs. first two: 0.4616
  → Detected Solid: reducing to 3D


ValueError: Error in determining null-space with ARPACK. Error message: 'Factor is exactly singular'. Note that eigen_solver='arpack' can fail when the weight matrix is singular or otherwise ill-behaved. In that case, eigen_solver='dense' is recommended. See online documentation for more information.


TIME SUMMARY


In [None]:
time_df = pd.DataFrame(all_times, columns=["Shape", "Method", "Time (s)"])
print("\nExecution Time Summary:")
print(time_df.pivot(index="Method", columns="Shape", values="Time (s)").round(3))


Execution Time Summary:
Shape                 Cone    Cube  Half Ball  Hemisphere  Open Book  Pyramid  \
Method                                                                          
Hessian LLE          1.469   1.460      0.747       0.850      1.510    1.577   
Isomap               0.684   0.620      0.410       0.348      0.693    0.584   
LLE                  0.118   0.191      0.102       0.094      0.098    0.166   
LTSA                 1.875   1.874      1.203       1.212      1.988    1.504   
MDS                 14.880  11.285     15.122       8.827     17.182   20.836   
Modified LLE         2.476   2.217      1.618       1.695      2.397    2.585   
PCA                  0.002   0.000      0.001       0.001      0.001    0.003   
Spectral Embedding   0.059   0.053      0.047       0.058      0.049    0.093   
t-SNE                9.082  10.509      8.942       3.596      5.160   13.407   

Shape               Solid Cone  
Method                          
Hessian LLE      

In [None]:
_ = detect_solid_or_hollow(data_cone)


PCA variance ratio: [0.4071 0.3884 0.2046]
Cumulative explained variance: [0.4071 0.7954 1.    ]
→ Intrinsic dim to reach 95%: 3
  → 3rd component ratio vs. first two: 0.2572
