In [1]:
import numpy as np
import phate
import sys
sys.path.append('../../src/')
from data_script import hemisphere_data, sklearn_swiss_roll
from data_convert import convert_data
from plotly3d.plot import scatter
from negative_sampling import add_negative_samples, make_hi_freq_noise
import pathlib
from scipy.spatial.distance import pdist, squareform
from sklearn.model_selection import train_test_split
import phate
import numpy as np
from scipy.spatial.distance import pdist, squareform
from omegaconf import DictConfig, OmegaConf
import hydra

In [3]:
data = np.load('../../data/gt_geodesic/ae/swissroll_none_0.npz')

In [5]:
scatter(data['phate'])

In [13]:
from autometric.datasets import SwissRoll
import torch

In [14]:
# gt_X, X, _ = sklearn_swiss_roll(n_samples=1000, noise=0.0)
# set parameters
length_phi = 15   #length of swiss roll in angular direction
length_Z = 15     #length of swiss roll in z direction
sigma = 0.1       #noise strength
m = 2000         #number of samples

# create dataset
phi = length_phi*np.random.rand(m)
xi = np.random.rand(m)
Z = length_Z*np.random.rand(m)
X = 1./6*(phi + sigma*xi)*np.sin(phi)
Y = 1./6*(phi + sigma*xi)*np.cos(phi)

points = np.array([X, Y, Z]).transpose()
X = torch.tensor(points)

In [45]:
swissroll = SwissRoll(height=15, r=1/6, num_spirals = 15/(2*np.pi), delay=0)
# swissroll.X = X

In [46]:
gs, ls = swissroll.geodesics(swissroll.X[:10], swissroll.X[350:360], ts = np.linspace(0,1,1000))


To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).



In [47]:
from autometric.utils import plot_3d_with_geodesics

In [48]:
plot_3d_with_geodesics(swissroll.X, gs)

In [49]:
scatter(swissroll.X, rescale=True)

In [38]:
scatter(X[:,[0,2,1]], rescale=True)

In [53]:
#|export
import os
from fastcore.script import *
from autometric.branch_datasets import Branch

@call_parse
def export_datasets(
    foldername:str,
    num_geodesics = 20,    
    num_points_per_geodesic = 1000,
    seed = 480851,
):
    """
    Saves all of the datasets above into npz files.
    """
    # check if the folder foldername exists. If not create it.
    if not os.path.exists(foldername):
        os.makedirs(foldername)
        
    np.random.seed(seed)
    dsets = {}
    for rot in [None, 5, 15, 50]:
        for noise in [0,0.1,0.3]:
            dsets[f'swissroll_{rot}_{noise}'] = SwissRoll(num_points = 3000, height=15, r=1/6, num_spirals = 15/(2*np.pi), delay=0, rotation_dimension=rot, noise = noise, seed = seed)
    for dname, dset in zip(dsets.keys(), dsets.values()):
        print(f"Creating {dname}")
        # make dname filename safe
        dname = dname.replace(" ", "_")
        dname = dname.lower()
        # get geodesics
        # first sample points from dset.X
        X = dset.X
        X_ground_truth = dset.X_ground_truth
        
        # if these are torch tensors, convert to numpy
        if isinstance(X, torch.Tensor):
            X = X.detach().numpy()
            X_ground_truth = X_ground_truth.detach().numpy()
            
        endpoint_idxs = np.random.randint(0, dset.X.shape[0], size = num_geodesics*2)
        start_points = X[endpoint_idxs[:num_geodesics]]
        end_points = X[endpoint_idxs[num_geodesics:]]
        ts = np.linspace(0, 1, num_points_per_geodesic)
        
        gs, ls = dset.geodesics(start_points, end_points, ts)
        
        # gs is a list; its contents may have different lengths, which will trip up np.savez
        # we pad the ends of the list with zeros to make them all the same length
        
        # convert to numpy arrays
        if isinstance(gs[0], torch.Tensor):
            gs = [g.detach().numpy() for g in gs]
            ls = ls.numpy()
            
        
        max_len = max([len(g) for g in gs])
        # pad the ends of the list with copies of the last element to make them all the same length, using np.vstack
        gs = [np.vstack([g[:-1], np.repeat(g[-1][None,:], max_len - len(g) + 1, axis = 0)]) for g in gs]

        np.savez(
            os.path.join(foldername, f'{dname}.npz'), 
            X = X, 
            X_ground_truth = X_ground_truth,
            start_points = start_points, 
            end_points = end_points,
            geodesics = gs,
            geodesic_lengths = ls,
            
        )

In [54]:
export_datasets("../../data/swiss_roll_wide_geod/", seed = 12490132, num_geodesics=20)


To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).



Creating swissroll_None_0
Creating swissroll_None_0.1
Creating swissroll_None_0.3
Creating swissroll_5_0
Creating swissroll_5_0.1



To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).



Creating swissroll_5_0.3
Creating swissroll_15_0
Creating swissroll_15_0.1
Creating swissroll_15_0.3
Creating swissroll_50_0
Creating swissroll_50_0.1
Creating swissroll_50_0.3
