In [2]:
import os
import traceback
import numpy as np
from numpy import linalg as LA

from hyperbolicTSNE import Datasets
from hyperbolicTSNE.visualization import plot_poincare, animate
from hyperbolicTSNE import load_data, Datasets, SequentialOptimizer, initialization, HyperbolicTSNE
from hyperbolicTSNE.cost_functions_ import HyperbolicKL
from hyperbolicTSNE.util import find_last_embedding, opt_config, initialize_logger, write_data, store_visuals
from hyperbolicTSNE.data_loaders import load_mnist

Please note that `empty_sequence` uses the KL divergence with Barnes-Hut approximation (angle=0.5) by default.


In [3]:
data_home = "datasets"
log_path = "temp/poincare/"  # path for saving embedding snapshots

In [4]:
seed = 42

In [5]:
dataset = Datasets.MNIST
num_points = [-1]
perplexities = [50]
correct_gradient = False                         # NOTE: Recompile with correct flag (GRAD_FIX flag)
exact = True                                     # Exact computation or BH estimation of gradient
pca_components = 0                               # Whether to use pca initialization of high dim. data or not
grad_scale_fix = True                            # Whether we multiply the gradient by the inverse metric tensor of hyperbolic space or not
                                                 # Note that the correct hyperoblic gradient has an inverse metric tensor factor

In [8]:
# Generate sample
sample = 20         # 2 classes, for MNIST take numbers 2 and 4
raw_X, raw_labels = load_mnist(data_home=data_home)
print(raw_X.shape, raw_labels.shape)

indices_2 = np.where(raw_X == 2)
indices_4 = np.where(raw_X == 4)

print(indices_2.shape)
print(indices_4.shape)



(70000, 784) (70000,)


AttributeError: 'numpy.ndarray' object has no attribute 'where'

In [6]:
exaggeration_factor = 1
ex_iterations = 0
main_iterations = 100
cf = HyperbolicKL

In [7]:
# Simple experiment with no exaggeration

# Compute initial embedding in Poincare disk (PCA embedding)
X_embedded = initialization(
    n_samples=dataX.shape[0], 
    n_components=2,
    X=dataX,
    random_state=seed,
    method="pca"
) 

# Initialize config and parameters
learning_rate = (dataX.shape[0] * 1) / (exaggeration_factor * 1000)

opt_conf = opt_config(cf, learning_rate, exaggeration_factor, ex_iterations, main_iterations, exact)
opt_params = SequentialOptimizer.sequence_poincare(**opt_conf) 
opt_params, opt_conf = initialize_logger(log_path, opt_params, opt_conf)

# Set up H-TSNE object 
htsne = HyperbolicTSNE(
    init=X_embedded, 
    n_components=2, 
    metric="precomputed", 
    verbose=True, 
    opt_method=SequentialOptimizer,         # the optimizater we use
    opt_params=opt_params              # the parameters for the optimizers
)

# Compute embedding:
try:
    hyperbolicEmbedding = htsne.fit_transform((D, V))
    
except ValueError:
    hyperbolicEmbedding = find_last_embedding(log_path)
    traceback.print_exc()

Please note that `empty_sequence` uses the KL divergence with Barnes-Hut approximation (angle=0.5) by default.
config: {'cf': <class 'hyperbolicTSNE.cost_functions_.HyperbolicKL'>, 'learning_rate_ex': 1.0, 'learning_rate_main': 1.0, 'exaggeration': 1, 'exaggeration_its': 0, 'gradientDescent_its': 100, 'vanilla': False, 'momentum_ex': 0.5, 'momentum': 0.8, 'exact': True, 'area_split': False, 'n_iter_check': 10, 'size_tol': 0.999, 'grad_scale_fix': True}
[HyperbolicTSNE] Received iterable as input. It should have len=2 and contain (D=None, V=None)
Running Gradient Descent, Verbosity: True


Gradient Descent: 0it [00:00, ?it/s]


Running Gradient Descent, Verbosity: True


Gradient Descent error: 2.03571 grad_norm: 1.22907e-02: 100%|██████████| 100/100 [00:01<00:00, 59.93it/s]
