In [1]:
from tsne_api import TSNEResultsWithKNN, run_optsne, run_reference_tsne
from utils import MNIST, SwissRoll, plot_side_by_side, plot_tsne_result

%load_ext jupyter_black

In [7]:
perplexity = 30
random_state = 42
n_jobs = 1
initial_alpha = 1
n_iter = 2000
n_components = 2
n_samples = 5000
dof_lr = 0.8

In [8]:
swiss_5k = SwissRoll.generate(n_samples=n_samples, noise=0.0)
swiss_5k.plot(width=600, height=600)

### Compare OpTSNE implementation without DoF-optimization with original openTSNE with same parameters

In [9]:
refrence_tsne = run_reference_tsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    fixed_dof=initial_alpha,
    n_iter=n_iter,
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)

--------------------------------------------------------------------------------
TSNE(callbacks_every_iters=1, early_exaggeration=12, n_iter=2000,
     negative_gradient_method='bh', random_state=42, verbose=True)
--------------------------------------------------------------------------------
===> Running optimization with exaggeration=12.00, lr=416.67 for 250 iterations...
Iteration   50, KL divergence 2.9492, 50 iterations in 4.6646 sec
Iteration  100, KL divergence 2.9346, 50 iterations in 4.6858 sec
Iteration  150, KL divergence 2.8498, 50 iterations in 4.4126 sec
Iteration  200, KL divergence 2.8014, 50 iterations in 4.4699 sec
Iteration  250, KL divergence 2.7734, 50 iterations in 4.5247 sec
   --> Time elapsed: 22.76 seconds
===> Running optimization with exaggeration=1.00, lr=5000.00 for 2000 iterations...
Iteration   50, KL divergence 0.9235, 50 iterations in 4.5531 sec
Iteration  100, KL divergence 0.7678, 50 iterations in 4.5321 sec
Iteration  150, KL divergence 0.7168, 50 

In [5]:
optsne_not_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=False,
    dof_lr=None,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 8.33 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=8.33 for 250 iterations...
Iteration   50, KL divergence 0.2033, 50 iterations in 0.0333 sec
Iteration  100, KL divergence 1.1709, 50 iterations in 0.0208 sec
Iteration  150, KL divergence 1.1709, 50 iterations in 0.0080 sec
Iteration  200, KL divergence 1.1709, 50 iterations in 0.0080 sec
Iteration  250, KL divergence 1.1709, 50 iterations in 0.0189 sec
   --> Time elapsed: 0.09 seconds
===> Running optimization with exaggeration=1.00, lr=100.00 for 100 iterations...
Iteration   50, KL divergence 0.4443, 50 iterations in 0.0378 sec
Iteration  100, KL divergence 0.3676, 50 iterations in 0.0560 sec
   --> Time elapsed: 0.09 seconds
Optimization took 0.09 seconds


In [6]:
plot_side_by_side(
    refrence_tsne,
    optsne_not_optimized,
    labels=swiss_5k.labels,
    additional_title_1="Runtime = 124s | openTSNE",
    additional_title_2="Runtime = 132s | OpTSNE (not optimized)",
)

### Compare OpTSNE implementation with DoF-optimization with original openTSNE with same parameters

In [27]:
optsne_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=True,
    dof_lr=dof_lr,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)
print(optsne_optimized.im_alpha_grads[-1])

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 416.67 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=416.67 for 250 iterations...
Iteration   50, KL divergence 2.9601, 50 iterations in 6.8517 sec
Iteration  100, KL divergence 2.9458, 50 iterations in 6.8095 sec
Iteration  150, KL divergence 2.8607, 50 iterations in 6.5867 sec
Iteration  200, KL divergence 2.8158, 50 iterations in 6.5673 sec
Iteration  250, KL divergence 2.7931, 50 iterations in 6.6641 sec
   --> Time elapsed: 33.48 seconds
===> Running optimization with exaggeration=1.00, lr=5000.00 for 1000 iterations...
Iteration   50, KL divergence 0.2818, 50 iterations in 9.7010 sec
Iteration  100, KL divergence 0.2689, 50 iterations in 9.8317 sec
Iteration  150, KL divergence 0.2659, 50 iterations in 9.7599 sec
Iteration  200, KL divergence 0.2643, 50 iterations in 9.7826 sec
Iteration  250, KL divergence 0.2630, 50 iterations in 10.3548 sec
Iteration  300, KL diverge

In [28]:
plot_side_by_side(
    refrence_tsne,
    optsne_optimized,
    labels=swiss_5k.labels,
    additional_title_1="Runtime = 124s | openTSNE",
    additional_title_2="Runtime = 221s | OpTSNE (optimized)",
)