In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# General imports
import os
from pathlib import Path 
import traceback
import numpy as np
from numpy import linalg as LA
from matplotlib import pyplot as plt
from scipy.sparse import csr_matrix

In [None]:
# HyperbolicTSNE imports
from hyperbolicTSNE import Datasets, load_data
from hyperbolicTSNE import Datasets, SequentialOptimizer, initialization, HyperbolicTSNE
from hyperbolicTSNE.cost_functions_ import HyperbolicKL, GaussianKL
from hyperbolicTSNE.util import find_last_embedding, opt_config, initialize_logger, save_experiment_results,  next_experiment_folder_id
from hyperbolicTSNE.data_loaders import load_mnist
from hyperbolicTSNE.hd_mat_ import hd_matrix
from hyperbolicTSNE.visualization import plot_poincare, plot_tree

# Data generation function
from data_gen import generate_Tree_D, generate_Tree_V

# Storing results functions
from hyperbolicTSNE.util import GaussianKL_Tree_results, HyperbolicKL_Tree_results

# Custom Hierarchical D, V 

Generate distance D, affinity V matrices based on a tree-like hierarchy

In [None]:
# Data generation parameters
cluster_size = 10
dist = 5
n_children = 4
depth = 3
n_nodes = sum(np.power(n_children, d) for d in range(depth + 1))

# Sample parameters
mu = 0
# sigma = np.sqrt(dist / n_children)       # std sigma
sigma = np.sqrt(dist)

# D, V matrix
D = generate_Tree_D(cluster_size, n_children, n_nodes, mu, sigma, dist)
V = generate_Tree_V(D, mean=mu, var=np.square(sigma))                       # Also known as P_ij Matrix

# Datalabels := [label node 0, label node 1, ... label node n_nodes]
dataLabels = np.array([[label for _ in range(cluster_size)] for label in range(n_nodes)]).flatten()
print("dataLabels shape: ", dataLabels.shape)

# print(V[0])
# print(D[0])

# Sanity check
print("is D symmetric? ", np.allclose(D, D.T))
print("is V symmetric? ", np.allclose(V, V.T))
print("D shape: ", D.shape)

# Turn V into a csr matrix (so it works with hyperbolicKL cf)
V = csr_matrix(V)

print(f"V[0] max: {np.max(V[0])}, V[0] min: {np.min(V[0])}")
print(V[0].toarray())

In [None]:
log_path = "temp/poincare/"  # path for saving embedding snapshots
grad_path = "temp/grad/"     # NOTE: We will manually calculate the gradients
data_home = "datasets"

experiments_folder = "./experiment_results/"
exp_id = next_experiment_folder_id(experiments_folder)

seed = 42
correct_gradient = [True, False]                   # NOTE: Recompile with correct flag (GRAD_FIX flag)
exact = [True]                               # NOTE: Exact computation or BH estimation of gradient
grad_scale_fix = True                        # Whether we multiply the gradient by the inverse metric tensor of hyperbolic space or not
                                             # Note that the correct hyperoblic gradient has an inverse metric tensor factor
cfs = [HyperbolicKL]

# Simple experiment with no exaggeration
exaggeration_factor = 12
ex_iterations = 1000
main_iterations = 50000

Compute hyperbolic variance

In [None]:
def hyp_dist(a, b):
        num = (a-b) * (a-b)
        denum = (1 - a*a) * (1 - b*b)
        return np.arccosh(1 + 2 * (num / denum))

# Computing hyperbolic variance using a heuristic method
max_dist = np.max(D)                                    # max distance between 2 datapoints in high dim. space
size_tol = 0.999
max_dist_H = hyp_dist(-size_tol, size_tol)              # the max width we want to adhere to in hyperbolic embeddings
hyp_sigma = (max_dist_H / max_dist) * sigma             # sigma computed in above cell
hyp_var = np.square(hyp_sigma)

print(f"scaling sigma first - hyp sigma:{hyp_sigma}, hyp var:{hyp_var}")

# Second method for computing hyperoblic variance
# hyp_sigma = (1. / max_dist) * sigma

In [None]:
print(hyp_dist(-0.99, 0.99))
print(hyp_dist(-0.999, 0.999))
print(hyp_dist(-0.9999, 0.9999))
print(hyp_dist(-0.99999, -0.9999))

In [None]:
from itertools import product

experiment_details = list(product(cfs, correct_gradient, exact))
print("nr. of experiments: ", len(experiment_details))

for exp in experiment_details:
    print(exp)

In [None]:
# Run experiments
for (cf, correct_grad, exact_grad) in experiment_details:
    print(f"[Experiment: {exp_id}] \t cf: {cf.class_str()}, correct grad: {correct_grad},  exact grad: {exact_grad}")
    
    # (1) Compute initial embedding in Poincare disk (PCA embedding)
    X_embedded = initialization(
        n_samples=D.shape[0], 
        n_components=2,
        X=None,
        random_state=seed,
        method="random",
        init_scale=1e-4         # spread out initializations more
    ) 

    # Initialize config and parameters
    if cf == GaussianKL:
        learning_rate = D.shape[0] / (exaggeration_factor * 10) * hyp_var

    # NOTE: Change lr. depending on wrong/correct HyperbolicKL
    elif cf == HyperbolicKL:
        learning_rate = D.shape[0] / (exaggeration_factor * 100)

    print(f"The learning rate is: {learning_rate}")

    no_progr_its = (ex_iterations + main_iterations) / 3        # nr. of iterations of no progress before we stop

    opt_conf = opt_config(cf, learning_rate, exaggeration_factor, ex_iterations, main_iterations, 
                          exact=exact_grad, vanilla=True, grad_scale_fix=grad_scale_fix, 
                          grad_fix=correct_grad, size_tol=size_tol, max_no_progress=no_progr_its)
    opt_params = SequentialOptimizer.sequence_poincare(**opt_conf) 

    log_path_cf = log_path + f"cf_{cf.class_str()}/correct_grad_{correct_gradient}/"
    grad_path_grad = grad_path + f"cf_{cf.class_str()}/correct_grad_{correct_gradient}/"

    # (3) Update config params using computed variance
    opt_params, opt_conf = initialize_logger(opt_params, opt_conf, log_path_cf, grad_path_grad)
    opt_params["cf_params"].update({"grad_fix" : correct_gradient})     # So the cost function knows which gradient to use
    
    # Only add var as param for GaussianKL
    if cf == GaussianKL:
        opt_params["cf_params"].update({"var" : hyp_var})                  # GaussianKL variance for q_ij

    # (4) Set up t-SNE object and run
    htsne = HyperbolicTSNE(
        init=X_embedded, 
        n_components=2, 
        metric="precomputed",
        verbose=1, 
        opt_method=SequentialOptimizer,         # the optimizater we use
        opt_params=opt_params              # the parameters for the optimizers
        )

    # Compute embedding:
    try:
        hyperbolicEmbedding = htsne.fit_transform((D, V))
        
    except ValueError:
        hyperbolicEmbedding = find_last_embedding(log_path)
        traceback.print_exc()

    # (5) Plot the embedding (NOTE: We can also use plot_poincare)
    # emb_fig = plot_poincare(hyperbolicEmbedding, dataLabels)
    emb_fig = plot_tree(hyperbolicEmbedding, dataLabels)

    # (6) Store experiment results
    # folder to save results to
    save_folder = f"./experiment_results/experiment_{exp_id}/"   

    # dictionary containing relevant details of this experiment
    optim_procedure = "Vanilla SGD"
    description = "Wrong gradient, tree data experiment with HyperbolicKL. More iterations, larger lr."
    
    if cf == GaussianKL:
        exp_data = GaussianKL_Tree_results(n_children, depth, cluster_size, dist, n_nodes, htsne, ex_iterations,
                                            main_iterations, learning_rate, cf, hyp_var, size_tol, max_dist_H, max_dist,
                                            correct_grad, grad_scale_fix, exact_grad, exaggeration_factor, 
                                            optim_procedure, description)
    elif cf == HyperbolicKL:
        exp_data = HyperbolicKL_Tree_results(n_children, depth, cluster_size, dist, n_nodes, htsne, ex_iterations,
                                             main_iterations, learning_rate, cf, correct_grad, grad_scale_fix, exact_grad, exaggeration_factor, 
                                             optim_procedure, description)
    
    # Prepare index for next iteration
    exp_id += 1    
 
    animation_step = 25
    save_experiment_results(save_folder, None, emb_fig, opt_params, dataLabels, 
                            exp_data, hyperbolicEmbedding, animation_step)