In [1]:
import torch
import torch.nn as nn
import numpy as np
import sys; sys.path.append('../src/')
import os
from model.cmpnts import MLP
from scipy.stats import ortho_group
import matplotlib.pyplot as plt
from math import sqrt

In [2]:
data_dir = '../data/toy'

In [3]:
def nonlinearity(dataset, epsilon=1.): # epsilon can be set to the groundtruth LD manifold's average std.
    X = dataset[:]
    _, s, _ = torch.svd(X - X.mean(0), compute_uv=False)
    return s.max() / s.min(), s.min() / epsilon, epsilon, s

In [4]:
@ torch.no_grad()
def save_dataset(X, Y, i):
    in_dim = X.shape[1]
    out_dim = Y.shape[1]
    path = os.path.join(data_dir, '{}-manifold'.format(in_dim), '{}-ambient'.format(out_dim))
    os.makedirs(path, exist_ok=True)
    
    data_path = os.path.join(path, '{}-{}_{}.npy'.format(in_dim, out_dim, i))
    report_path = os.path.join(path, '{}-{}_{}.txt'.format(in_dim, out_dim, i))
    np.save(data_path, Y.detach().numpy())
    
    _, s_x, _ = torch.svd(X - X.mean(0), compute_uv=False)
    dis, ratio, eps, s = nonlinearity(Y, epsilon=s_x.mean())
    with open(report_path, 'w') as report:
        report.write('s.max / s.min = {}\n'.format(dis))
        report.write('s.min / x.s.mean = {}\n'.format(ratio))
        report.write('x.s.mean = {}\n'.format(eps / sqrt(len(X))))
        report.write('singular values = {}'.format(s.sort()[0].flip(0) / sqrt(len(X))))

# Nonlinear Dataset

In [5]:
from dataset.toy import IsometricEmbedding
from model.flow import _RealNVP

In [6]:
def build_flow(out_dim, n=10, w=256):
    nets = lambda: nn.Sequential(
        nn.Linear(out_dim, w), nn.ReLU(), 
        nn.Linear(w, w), nn.ReLU(),
        nn.Linear(w, w), nn.ReLU(),
        nn.Linear(w, out_dim), nn.Tanh())
    nett = lambda: nn.Sequential(
        nn.Linear(out_dim, w), nn.ReLU(), 
        nn.Linear(w, w), nn.ReLU(),
        nn.Linear(w, w), nn.ReLU(),
        nn.Linear(w, out_dim))
    masks = torch.as_tensor([
        [0]*(out_dim//2) + [1]*(out_dim//2), 
        [1]*(out_dim//2) + [0]*(out_dim//2)] * n, dtype=torch.float)
    flow = _RealNVP(nets, nett, masks)
    return flow

In [12]:
from tqdm import tqdm

in_dims = [64] # [1, 2, 4, 8, 16, 32, 64]
out_dims = np.asarray([2, 4, 8, 16, 32], dtype=int)
sizes = [100000] #[100, 300, 1000, 3000, 10000, 30000, 100000]

for in_dim, size in zip(in_dims, sizes):
    for out_dim in out_dims * in_dim:
        for i in range(5):
            print('{} -> {} ({})'.format(in_dim, out_dim, i))

            X = torch.randn(size, in_dim).cuda()
            Y = IsometricEmbedding(X, out_dim=out_dim)

            flow = build_flow(out_dim, 10, 256)
            Y.embed(flow = flow.cuda(),
                    optimizer = torch.optim.Adam(flow.parameters(), lr=3e-4),
                    epochs = 100,
                    batch_size = 100,
                    lam = 10.)

            save_dataset(X.cpu(), Y[:].cpu(), i)

16 -> 32 (0)


Embedding: 100%|██████████████████████████████| 100/100 [1:16:35<00:00, 45.95s/it, isometricity=0.032, linearity=0.168]    


16 -> 32 (1)


Embedding: 100%|██████████████████████████████| 100/100 [21:17<00:00, 12.77s/it, isometricity=0.0424, linearity=0.172]


16 -> 32 (2)


Embedding: 100%|██████████████████████████████| 100/100 [22:04<00:00, 13.25s/it, isometricity=0.0409, linearity=0.165]


16 -> 32 (3)


Embedding: 100%|██████████████████████████████| 100/100 [23:16<00:00, 13.96s/it, isometricity=0.038, linearity=0.147]


16 -> 32 (4)


Embedding: 100%|██████████████████████████████| 100/100 [22:32<00:00, 13.53s/it, isometricity=0.0381, linearity=0.158]


16 -> 64 (0)


Embedding: 100%|██████████████████████████████| 100/100 [23:03<00:00, 13.84s/it, isometricity=0.0544, linearity=0.168]


16 -> 64 (1)


Embedding: 100%|██████████████████████████████| 100/100 [22:29<00:00, 13.49s/it, isometricity=0.0446, linearity=0.163]


16 -> 64 (2)


Embedding: 100%|██████████████████████████████| 100/100 [22:29<00:00, 13.50s/it, isometricity=0.0468, linearity=0.163]


16 -> 64 (3)


Embedding: 100%|██████████████████████████████| 100/100 [22:25<00:00, 13.45s/it, isometricity=0.0399, linearity=0.152]


16 -> 64 (4)


Embedding: 100%|██████████████████████████████| 100/100 [22:15<00:00, 13.35s/it, isometricity=0.0392, linearity=0.167]


16 -> 128 (0)


Embedding: 100%|██████████████████████████████| 100/100 [23:43<00:00, 14.24s/it, isometricity=0.0441, linearity=0.141]


16 -> 128 (1)


Embedding: 100%|██████████████████████████████| 100/100 [23:43<00:00, 14.23s/it, isometricity=0.039, linearity=0.151]


16 -> 128 (2)


Embedding: 100%|██████████████████████████████| 100/100 [23:48<00:00, 14.29s/it, isometricity=0.0528, linearity=0.161]


16 -> 128 (3)


Embedding: 100%|██████████████████████████████| 100/100 [24:00<00:00, 14.40s/it, isometricity=0.0427, linearity=0.15]


16 -> 128 (4)


Embedding: 100%|██████████████████████████████| 100/100 [23:33<00:00, 14.13s/it, isometricity=0.0447, linearity=0.151]


16 -> 256 (0)


Embedding: 100%|██████████████████████████████| 100/100 [23:29<00:00, 14.09s/it, isometricity=0.0549, linearity=0.163]


16 -> 256 (1)


Embedding: 100%|██████████████████████████████| 100/100 [23:14<00:00, 13.94s/it, isometricity=0.0642, linearity=0.185]


16 -> 256 (2)


Embedding: 100%|██████████████████████████████| 100/100 [23:01<00:00, 13.82s/it, isometricity=0.065, linearity=0.175]


16 -> 256 (3)


Embedding: 100%|██████████████████████████████| 100/100 [23:00<00:00, 13.81s/it, isometricity=0.0594, linearity=0.153]


16 -> 256 (4)


Embedding: 100%|██████████████████████████████| 100/100 [23:05<00:00, 13.86s/it, isometricity=0.0508, linearity=0.146]


16 -> 512 (0)


Embedding: 100%|██████████████████████████████| 100/100 [23:25<00:00, 14.06s/it, isometricity=0.12, linearity=0.177] 


16 -> 512 (1)


Embedding: 100%|██████████████████████████████| 100/100 [24:08<00:00, 14.49s/it, isometricity=0.0516, linearity=0.131]


16 -> 512 (2)


Embedding: 100%|██████████████████████████████| 100/100 [24:03<00:00, 14.43s/it, isometricity=0.0783, linearity=0.126]


16 -> 512 (3)


Embedding: 100%|██████████████████████████████| 100/100 [23:41<00:00, 14.21s/it, isometricity=0.0631, linearity=0.138]


16 -> 512 (4)


Embedding: 100%|██████████████████████████████| 100/100 [23:35<00:00, 14.15s/it, isometricity=0.0565, linearity=0.127]


32 -> 64 (0)


Embedding: 100%|██████████████████████████████| 100/100 [1:09:43<00:00, 41.84s/it, isometricity=0.0334, linearity=0.203]


32 -> 64 (1)


Embedding: 100%|██████████████████████████████| 100/100 [1:08:28<00:00, 41.09s/it, isometricity=0.0227, linearity=0.192]


32 -> 64 (2)


Embedding: 100%|██████████████████████████████| 100/100 [1:09:42<00:00, 41.83s/it, isometricity=0.032, linearity=0.195]


32 -> 64 (3)


Embedding: 100%|██████████████████████████████| 100/100 [1:09:39<00:00, 41.79s/it, isometricity=0.0356, linearity=0.205]


32 -> 64 (4)


Embedding: 100%|██████████████████████████████| 100/100 [1:09:38<00:00, 41.78s/it, isometricity=0.0286, linearity=0.19]


32 -> 128 (0)


Embedding: 100%|██████████████████████████████| 100/100 [1:11:11<00:00, 42.71s/it, isometricity=0.0462, linearity=0.19]


32 -> 128 (1)


Embedding: 100%|██████████████████████████████| 100/100 [1:11:50<00:00, 43.11s/it, isometricity=0.0487, linearity=0.196]


32 -> 128 (2)


Embedding: 100%|██████████████████████████████| 100/100 [1:11:53<00:00, 43.13s/it, isometricity=0.0469, linearity=0.186]


32 -> 128 (3)


Embedding: 100%|██████████████████████████████| 100/100 [1:10:58<00:00, 42.59s/it, isometricity=0.0593, linearity=0.193]


32 -> 128 (4)


Embedding: 100%|██████████████████████████████| 100/100 [1:10:54<00:00, 42.54s/it, isometricity=0.0414, linearity=0.17]


32 -> 256 (0)


Embedding: 100%|██████████████████████████████| 100/100 [1:09:40<00:00, 41.81s/it, isometricity=0.0699, linearity=0.173]


32 -> 256 (1)


Embedding: 100%|██████████████████████████████| 100/100 [1:09:56<00:00, 41.96s/it, isometricity=0.0781, linearity=0.159]


32 -> 256 (2)


Embedding: 100%|██████████████████████████████| 100/100 [1:09:41<00:00, 41.81s/it, isometricity=0.0817, linearity=0.156]


32 -> 256 (3)


Embedding: 100%|██████████████████████████████| 100/100 [1:09:11<00:00, 41.51s/it, isometricity=0.0753, linearity=0.17]


32 -> 256 (4)


Embedding: 100%|██████████████████████████████| 100/100 [1:09:51<00:00, 41.91s/it, isometricity=0.0831, linearity=0.187]


32 -> 512 (0)


Embedding: 100%|██████████████████████████████| 100/100 [1:10:49<00:00, 42.49s/it, isometricity=0.0603, linearity=0.112]


32 -> 512 (1)


Embedding: 100%|██████████████████████████████| 100/100 [1:11:18<00:00, 42.79s/it, isometricity=0.0719, linearity=0.128]


32 -> 512 (2)


Embedding: 100%|██████████████████████████████| 100/100 [1:11:58<00:00, 43.19s/it, isometricity=0.0664, linearity=0.12]


32 -> 512 (3)


Embedding: 100%|██████████████████████████████| 100/100 [1:11:44<00:00, 43.04s/it, isometricity=0.0685, linearity=0.138]


32 -> 512 (4)


Embedding: 100%|██████████████████████████████| 100/100 [1:10:41<00:00, 42.42s/it, isometricity=0.0834, linearity=0.124]


32 -> 1024 (0)


Embedding: 100%|██████████████████████████████| 100/100 [1:14:03<00:00, 44.43s/it, isometricity=0.0605, linearity=0.106]


32 -> 1024 (1)


Embedding: 100%|██████████████████████████████| 100/100 [1:10:19<00:00, 42.20s/it, isometricity=0.0543, linearity=0.106]


32 -> 1024 (2)


Embedding: 100%|██████████████████████████████| 100/100 [1:11:02<00:00, 42.62s/it, isometricity=0.0626, linearity=0.0948]


32 -> 1024 (3)


Embedding: 100%|██████████████████████████████| 100/100 [1:13:53<00:00, 44.34s/it, isometricity=0.0721, linearity=0.108]


32 -> 1024 (4)


Embedding: 100%|██████████████████████████████| 100/100 [1:12:20<00:00, 43.41s/it, isometricity=0.0616, linearity=0.107]


64 -> 128 (0)


Embedding: 100%|██████████████████████████████| 100/100 [3:58:43<00:00, 143.23s/it, isometricity=0.0865, linearity=0.238] 


RuntimeError: [enforce fail at alloc_cpu.cpp:73] . DefaultCPUAllocator: can't allocate memory: you tried to allocate 40000000000 bytes. Error code 12 (Cannot allocate memory)