In [21]:
from tsne_api import TSNEResultsWithKNN, run_optsne, run_reference_tsne
from utils import MNIST, SwissRoll, plot_side_by_side, plot_tsne_result

%load_ext jupyter_black

The jupyter_black extension is already loaded. To reload it, use:
  %reload_ext jupyter_black


In [22]:
perplexity = 30
random_state = 42
n_jobs = 1
initial_alpha = 1
n_iter = 1000
n_components = 2
n_samples = 5000
dof_lr = 0.8

In [23]:
swiss_5k = SwissRoll.generate(n_samples=n_samples, noise=0.0)
swiss_5k.plot(width=600, height=600)

### Compare OpTSNE implementation without DoF-optimization with original openTSNE with same parameters

In [24]:
refrence_tsne = run_reference_tsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    fixed_dof=initial_alpha,
    n_iter=n_iter,
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)

--------------------------------------------------------------------------------
TSNE(callbacks_every_iters=1, early_exaggeration=12, n_iter=1000,
     negative_gradient_method='bh', random_state=42, verbose=True)
--------------------------------------------------------------------------------
===> Running optimization with exaggeration=12.00, lr=416.67 for 250 iterations...
Iteration   50, KL divergence 2.9573, 50 iterations in 6.6371 sec
Iteration  100, KL divergence 2.9430, 50 iterations in 6.6108 sec
Iteration  150, KL divergence 2.8574, 50 iterations in 6.3906 sec
Iteration  200, KL divergence 2.8121, 50 iterations in 6.3970 sec
Iteration  250, KL divergence 2.7890, 50 iterations in 6.4036 sec
   --> Time elapsed: 32.44 seconds
===> Running optimization with exaggeration=1.00, lr=5000.00 for 1000 iterations...
Iteration   50, KL divergence 0.9208, 50 iterations in 6.4284 sec
Iteration  100, KL divergence 0.7623, 50 iterations in 6.3469 sec
Iteration  150, KL divergence 0.7102, 50 

In [25]:
optsne_not_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=False,
    dof_lr=None,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 416.67 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=416.67 for 250 iterations...
Iteration   50, KL divergence 2.9601, 50 iterations in 7.0875 sec
Iteration  100, KL divergence 2.9458, 50 iterations in 7.2494 sec
Iteration  150, KL divergence 2.8607, 50 iterations in 6.8521 sec
Iteration  200, KL divergence 2.8158, 50 iterations in 6.6060 sec
Iteration  250, KL divergence 2.7931, 50 iterations in 6.5899 sec
   --> Time elapsed: 34.39 seconds
===> Running optimization with exaggeration=1.00, lr=5000.00 for 1000 iterations...
Iteration   50, KL divergence 0.9610, 50 iterations in 6.6702 sec
Iteration  100, KL divergence 0.8247, 50 iterations in 6.5764 sec
Iteration  150, KL divergence 0.7825, 50 iterations in 6.4855 sec
Iteration  200, KL divergence 0.7633, 50 iterations in 6.6036 sec
Iteration  250, KL divergence 0.7526, 50 iterations in 6.5698 sec
Iteration  300, KL divergen

In [26]:
plot_side_by_side(
    refrence_tsne,
    optsne_not_optimized,
    labels=swiss_5k.labels,
    additional_title_1="Runtime = 124s | openTSNE",
    additional_title_2="Runtime = 132s | OpTSNE (not optimized)",
)

### Compare OpTSNE implementation with DoF-optimization with original openTSNE with same parameters

In [27]:
optsne_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=True,
    dof_lr=dof_lr,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)
print(optsne_optimized.im_alpha_grads[-1])

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 416.67 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=416.67 for 250 iterations...
Iteration   50, KL divergence 2.9601, 50 iterations in 6.8517 sec
Iteration  100, KL divergence 2.9458, 50 iterations in 6.8095 sec
Iteration  150, KL divergence 2.8607, 50 iterations in 6.5867 sec
Iteration  200, KL divergence 2.8158, 50 iterations in 6.5673 sec
Iteration  250, KL divergence 2.7931, 50 iterations in 6.6641 sec
   --> Time elapsed: 33.48 seconds
===> Running optimization with exaggeration=1.00, lr=5000.00 for 1000 iterations...
Iteration   50, KL divergence 0.2818, 50 iterations in 9.7010 sec
Iteration  100, KL divergence 0.2689, 50 iterations in 9.8317 sec
Iteration  150, KL divergence 0.2659, 50 iterations in 9.7599 sec
Iteration  200, KL divergence 0.2643, 50 iterations in 9.7826 sec
Iteration  250, KL divergence 0.2630, 50 iterations in 10.3548 sec
Iteration  300, KL diverge

In [28]:
plot_side_by_side(
    refrence_tsne,
    optsne_optimized,
    labels=swiss_5k.labels,
    additional_title_1="Runtime = 124s | openTSNE",
    additional_title_2="Runtime = 221s | OpTSNE (optimized)",
)