In [1]:
from tsne_api import TSNEResultsWithKNN, run_optsne, run_reference_tsne
from utils import MNIST, SwissRoll, plot_side_by_side, plot_tsne_result

%load_ext jupyter_black

In [2]:
perplexity = 30
random_state = 42
n_jobs = 1
initial_alpha = 1
n_iter = 2000
n_components = 2
n_samples = 10000
dof_lr = 0.8

## Swiss Roll (10k)

In [5]:
swiss_5k = SwissRoll.generate(n_samples=n_samples, noise=0.0)
swiss_5k.plot(width=600, height=600)

### Compare OpTSNE implementation without DoF-optimization with original openTSNE with same parameters

In [None]:
refrence_tsne = run_reference_tsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    fixed_dof=initial_alpha,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)

--------------------------------------------------------------------------------
TSNE(callbacks_every_iters=1, early_exaggeration=12, n_iter=2000,
     negative_gradient_method='bh', random_state=42, verbose=True)
--------------------------------------------------------------------------------
===> Running optimization with exaggeration=12.00, lr=833.33 for 250 iterations...
Iteration   50, KL divergence 3.3624, 50 iterations in 14.5487 sec
Iteration  100, KL divergence 3.2600, 50 iterations in 14.5416 sec
Iteration  150, KL divergence 3.1477, 50 iterations in 14.4176 sec
Iteration  200, KL divergence 3.0687, 50 iterations in 14.1640 sec
Iteration  250, KL divergence 3.0166, 50 iterations in 13.9061 sec
   --> Time elapsed: 71.58 seconds
===> Running optimization with exaggeration=1.00, lr=10000.00 for 2000 iterations...
Iteration   50, KL divergence 1.2118, 50 iterations in 14.2419 sec
Iteration  100, KL divergence 0.9710, 50 iterations in 14.0345 sec
Iteration  150, KL divergence 0.8

In [6]:
optsne_not_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=False,
    dof_lr=None,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    eval_error_every_iter=1,
    n_jobs=n_jobs,
)

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 833.33 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=833.33 for 250 iterations...
Iteration   50, KL divergence 3.3643, 50 iterations in 14.9016 sec
Iteration  100, KL divergence 3.2622, 50 iterations in 15.2922 sec
Iteration  150, KL divergence 3.1494, 50 iterations in 14.9866 sec
Iteration  200, KL divergence 3.0706, 50 iterations in 14.7485 sec
Iteration  250, KL divergence 3.0163, 50 iterations in 14.3706 sec
   --> Time elapsed: 74.30 seconds
===> Running optimization with exaggeration=1.00, lr=10000.00 for 2000 iterations...
Iteration   50, KL divergence 1.2357, 50 iterations in 15.2568 sec
Iteration  100, KL divergence 1.0117, 50 iterations in 14.8111 sec
Iteration  150, KL divergence 0.9241, 50 iterations in 14.7252 sec
Iteration  200, KL divergence 0.8787, 50 iterations in 14.6426 sec
Iteration  250, KL divergence 0.8521, 50 iterations in 14.6442 sec
Iteration  300, 

In [8]:
plot_side_by_side(
    refrence_tsne,
    optsne_not_optimized,
    labels=swiss_5k.labels,
    additional_title_1="Runtime = 124s | openTSNE",
    additional_title_2="Runtime = 132s | OpTSNE (not optimized)",
)

### Compare OpTSNE implementation with DoF-optimization with original openTSNE with same parameters

In [9]:
optsne_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=True,
    dof_lr=dof_lr,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)
print(optsne_optimized.im_alpha_grads[-1])

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 833.33 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=833.33 for 250 iterations...
Iteration   50, KL divergence 3.3643, 50 iterations in 14.7655 sec
Iteration  100, KL divergence 3.2622, 50 iterations in 15.0295 sec
Iteration  150, KL divergence 3.1494, 50 iterations in 14.8851 sec
Iteration  200, KL divergence 3.0706, 50 iterations in 14.6806 sec
Iteration  250, KL divergence 3.0163, 50 iterations in 14.2845 sec
   --> Time elapsed: 73.65 seconds
===> Running optimization with exaggeration=1.00, lr=10000.00 for 2000 iterations...
Iteration   50, KL divergence 0.2971, 50 iterations in 24.4800 sec
Iteration  100, KL divergence 0.2549, 50 iterations in 24.3263 sec
Iteration  150, KL divergence 0.2469, 50 iterations in 24.2150 sec
Iteration  200, KL divergence 0.2437, 50 iterations in 24.3145 sec
Iteration  250, KL divergence 0.2421, 50 iterations in 24.3193 sec
Iteration  300, 

In [10]:
plot_side_by_side(
    refrence_tsne,
    optsne_optimized,
    labels=swiss_5k.labels,
    additional_title_1="Runtime = 124s | openTSNE",
    additional_title_2="Runtime = 221s | OpTSNE (optimized)",
)

## MNIST (60k)

### Compare OpTSNE implementation without DoF-optimization with original openTSNE with same parameters

In [4]:
mnist = MNIST.generate(n_training_samples=60000)
mnist.reshape()

In [5]:
reference_tsne_mnist = run_reference_tsne(
    data=mnist.data_train,
    perplexity=perplexity,
    random_state=random_state,
    fixed_dof=initial_alpha,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="MNIST (60000)",
    n_jobs=n_jobs,
)

--------------------------------------------------------------------------------
TSNE(callbacks_every_iters=1, early_exaggeration=12, n_iter=2000,
     negative_gradient_method='bh', random_state=42, verbose=True)
--------------------------------------------------------------------------------
===> Running optimization with exaggeration=12.00, lr=5000.00 for 250 iterations...
Iteration   50, KL divergence 5.8772, 50 iterations in 108.7687 sec
Iteration  100, KL divergence 5.7308, 50 iterations in 106.7758 sec
Iteration  150, KL divergence 5.6903, 50 iterations in 105.2692 sec
Iteration  200, KL divergence 5.6758, 50 iterations in 110.8025 sec
Iteration  250, KL divergence 5.6677, 50 iterations in 105.1549 sec
   --> Time elapsed: 536.77 seconds
===> Running optimization with exaggeration=1.00, lr=60000.00 for 2000 iterations...
Iteration   50, KL divergence 3.4554, 50 iterations in 93.6529 sec
Iteration  100, KL divergence 3.1982, 50 iterations in 92.4048 sec
Iteration  150, KL diverge

In [7]:
plot_tsne_result(
    reference_tsne_mnist,
    labels=mnist.labels_train,
    additional_title="Runtime = 4411s | openTSNE",
    save_figure=False,
    black_template=True,
)

In [11]:
dof_lr = 0.5
optsne_optimized_mnist = run_optsne(
    data=mnist.data_train,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=True,
    dof_lr=dof_lr,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="MNIST (60000)",
    n_jobs=n_jobs,
)

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 5000.00 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=5000.00 for 250 iterations...
Iteration   50, KL divergence 5.8773, 50 iterations in 96.2340 sec
Iteration  100, KL divergence 5.7308, 50 iterations in 94.0052 sec
Iteration  150, KL divergence 5.6904, 50 iterations in 92.7618 sec
Iteration  200, KL divergence 5.6759, 50 iterations in 92.0730 sec
Iteration  250, KL divergence 5.6679, 50 iterations in 92.8719 sec
   --> Time elapsed: 467.95 seconds
===> Running optimization with exaggeration=1.00, lr=60000.00 for 2000 iterations...
Iteration   50, KL divergence 2.9757, 50 iterations in 135.2846 sec
Iteration  100, KL divergence 2.8640, 50 iterations in 134.6395 sec
Iteration  150, KL divergence 2.8008, 50 iterations in 135.8422 sec
Iteration  200, KL divergence 2.7588, 50 iterations in 136.6386 sec
Iteration  250, KL divergence 2.7272, 50 iterations in 135.9665 sec
Iteratio

In [12]:
plot_tsne_result(
    optsne_optimized_mnist,
    labels=mnist.labels_train,
    additional_title="Runtime = 5428s | OpTSNE",
    black_template=True,
)

In [13]:
plot_side_by_side(
    reference_tsne_mnist,
    optsne_optimized_mnist,
    labels=mnist.labels_train,
    additional_title_1="Runtime = 4410s | openTSNE",
    additional_title_2="Runtime = 5428s | OpTSNE (optimized)",
    save_figure=False,
)