In [15]:
# scripts/run_swissroll_experiments.py

from __future__ import annotations

import os
import json
from dataclasses import asdict
from typing import List

import numpy as np
import pandas as pd
import torch

from src.generate_data import generate_dataset
from src.manifold_learning import estimate_tangent_normal_knn_pca
from src.diffusion import DiffusionExperiment
from src.metrics import chamfer_distance, swiss_roll_manifold_distance
from src.experiments_swissroll import SwissRollExperimentConfig

In [16]:
configs = []
name = f"tangent_ratio_{0.6}_epochs{20}"
configs.append(
    SwissRollExperimentConfig(
        name=name,
        use_manifold=True,
        mixed_noise=True,
        tangent_fraction=3.0,
        normal_fraction=5.0,
        num_epochs=20,
        num_eval_samples=2000,
    )
)

In [22]:
print("=== Generating swiss roll data ===")
embed_dim = 3
data = generate_dataset(
    name="swiss_roll",
    n_samples=10000,
    embed_dim=embed_dim,
    noise_sigma=0.4,
    random_state=0,
)
X = data["X"]        # (N, 3)
E = data["E"]        # (3, 3) embedding matrix used

print("=== Estimating local tangent/normal frames via KNN+PCA ===") 

frames = estimate_tangent_normal_knn_pca(
    X,
    intrinsic_dim=2,
    n_neighbors=32,
    include_self=True,
)

results = []
num_trials = 5
device = torch.device("cpu")
for cfg in configs:
    print(f"\n=== Running experiment: {cfg.name} ===")

    chamfer_scores = []
    swiss_scores = []

    for trial_idx in range(num_trials):

        # Decide whether this run uses manifold frames
        local_frames = frames if cfg.use_manifold else None

        # Instantiate diffusion experiment
        exp = DiffusionExperiment(
            X=X,
            local_frames=local_frames,
            tangent_fraction=cfg.tangent_fraction,
            normal_fraction=cfg.normal_fraction,
            mixed_noise=cfg.mixed_noise and cfg.use_manifold,
            num_timesteps=cfg.num_timesteps,
            batch_size=cfg.batch_size,
            lr=cfg.lr,
            device=device,
        )

        # Train
        exp.diffusion_train(num_epochs=cfg.num_epochs)

        # Sample
        samples = exp.sample(num_samples=cfg.num_eval_samples)  # (N_gen, D) tensor
        samples_np = samples.detach().cpu().numpy()

        # -----------------------------
        # 5. Metrics
        # -----------------------------
        cd = chamfer_distance(
            X_true=X,
            X_gen=samples_np,
            device=device,
            max_true=cfg.max_true_eval,
            max_gen=cfg.max_gen_eval,
        )

        swiss_dist = swiss_roll_manifold_distance(
            X_gen=samples_np,
            embed_dim=X.shape[1],
            u_range=cfg.u_range,
            v_range=cfg.v_range,
            n_u=cfg.grid_n_u,
            n_v=cfg.grid_n_v,
            E=E,
            device=device,
            max_gen=cfg.max_gen_eval,
        )

        chamfer_scores.append(cd)
        swiss_scores.append(swiss_dist)

    # Collect sample means
    chamfer_mean = np.mean(chamfer_scores)
    swiss_mean = np.mean(swiss_scores)

    # Collect sample stds
    chamfer_std = np.std(chamfer_scores)
    swiss_std = np.std(swiss_scores)

    print(
        f" -> chamfer_mean={chamfer_mean:.4f}, "
        f"chamfer_std={chamfer_std:.4f}, "
        f" swiss_dist={swiss_mean:.4f}, "
        f" swiss_dist_std={swiss_std:.4f}"
    )

    # Collect result row
    row = asdict(cfg)
    row["chamfer_mean"] = chamfer_mean
    row["swiss_dist_mean"] = swiss_mean
    row["chamfer_std"] = chamfer_std
    row["swiss_dist_std"] = swiss_std
    results.append(row)

=== Generating swiss roll data ===
=== Estimating local tangent/normal frames via KNN+PCA ===

=== Running experiment: tangent_ratio_0.6_epochs20 ===
Epoch 1: loss = 0.765973
Epoch 2: loss = 0.573118
Epoch 3: loss = 0.583208
Epoch 4: loss = 0.572549
Epoch 5: loss = 0.552374
Epoch 6: loss = 0.545031
Epoch 7: loss = 0.538447
Epoch 8: loss = 0.533890
Epoch 9: loss = 0.523529
Epoch 10: loss = 0.529165
Epoch 11: loss = 0.515745
Epoch 12: loss = 0.519882
Epoch 13: loss = 0.524693
Epoch 14: loss = 0.499167
Epoch 15: loss = 0.498146
Epoch 16: loss = 0.497405
Epoch 17: loss = 0.490954
Epoch 18: loss = 0.491302
Epoch 19: loss = 0.468199
Epoch 20: loss = 0.467059
Epoch 1: loss = 0.763959
Epoch 2: loss = 0.570741
Epoch 3: loss = 0.589313
Epoch 4: loss = 0.563166
Epoch 5: loss = 0.553868
Epoch 6: loss = 0.565633
Epoch 7: loss = 0.569901
Epoch 8: loss = 0.541042
Epoch 9: loss = 0.530817
Epoch 10: loss = 0.510637
Epoch 11: loss = 0.530257
Epoch 12: loss = 0.510664
Epoch 13: loss = 0.517901
Epoch 14: 

In [19]:
exp

<src.diffusion.DiffusionExperiment at 0x169c7cb30>

In [20]:
samples

tensor([[11.2679,  8.4640,  6.7357],
        [-4.9485,  0.6947,  6.8391],
        [ 2.0812, -0.9326, -4.2353],
        ...,
        [12.2152,  2.0323,  3.3068],
        [-3.0721,  6.3185,  6.8263],
        [-3.8872,  3.4483, -9.6662]])

In [21]:

from src.make_plots import plot_swiss_roll_3d


plot_swiss_roll_3d(samples, title="Generated Swiss Roll Samples")