In [17]:
from tsne_api import TSNEResultsWithKNN, run_optsne, run_reference_tsne
from utils import MNIST, SwissRoll, plot_side_by_side, plot_tsne_result

In [41]:
perplexity = 20
random_state = 42
n_jobs = 1
initial_alpha = 1
n_iter = 1000
n_components = 2
n_samples = 1000
dof_lr = 0.8

In [42]:
swiss_5k = SwissRoll.generate(n_samples=n_samples, noise=0.0)
swiss_5k.plot(width=600, height=600)

In [43]:
optsne_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=True,
    dof_lr=dof_lr,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)
print(optsne_optimized.im_alpha_grads[-1])

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 83.33 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=83.33 for 250 iterations...
Iteration   50, KL divergence 2.4905, 50 iterations in 1.0931 sec
Iteration  100, KL divergence 2.4369, 50 iterations in 1.0681 sec
Iteration  150, KL divergence 2.3879, 50 iterations in 0.9786 sec
Iteration  200, KL divergence 2.3657, 50 iterations in 1.0158 sec
Iteration  250, KL divergence 2.3559, 50 iterations in 1.0454 sec
   --> Time elapsed: 5.20 seconds
===> Running optimization with exaggeration=1.00, lr=1000.00 for 1000 iterations...
Iteration   50, KL divergence 0.2501, 50 iterations in 1.8621 sec
Iteration  100, KL divergence 0.2430, 50 iterations in 1.8595 sec
Iteration  150, KL divergence 0.2404, 50 iterations in 1.7834 sec
Iteration  200, KL divergence 0.2390, 50 iterations in 1.8251 sec
Iteration  250, KL divergence 0.2380, 50 iterations in 1.8339 sec
Iteration  300, KL divergence 

In [48]:
plot_tsne_result(optsne_optimized, labels=swiss_5k.labels,black_template=True, additional_title="perplexity = 20")

In [45]:
optsne_not_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=False,
    dof_lr=dof_lr,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 83.33 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=83.33 for 250 iterations...
Iteration   50, KL divergence 2.4905, 50 iterations in 1.0944 sec
Iteration  100, KL divergence 2.4369, 50 iterations in 1.0237 sec
Iteration  150, KL divergence 2.3879, 50 iterations in 0.9840 sec
Iteration  200, KL divergence 2.3657, 50 iterations in 1.0686 sec
Iteration  250, KL divergence 2.3559, 50 iterations in 1.1320 sec
   --> Time elapsed: 5.31 seconds
===> Running optimization with exaggeration=1.00, lr=1000.00 for 1000 iterations...
Iteration   50, KL divergence 0.6754, 50 iterations in 1.0632 sec
Iteration  100, KL divergence 0.6369, 50 iterations in 1.0020 sec
Iteration  150, KL divergence 0.6279, 50 iterations in 1.0623 sec
Iteration  200, KL divergence 0.6240, 50 iterations in 1.0635 sec
Iteration  250, KL divergence 0.6222, 50 iterations in 1.0723 sec
Iteration  300, KL divergence 

In [47]:
plot_tsne_result(optsne_not_optimized, labels=swiss_5k.labels,black_template=True, additional_title="perplexity = 20")

In [52]:
plot_side_by_side(
    optsne_optimized,
    optsne_not_optimized,
    labels=swiss_5k.labels,
    additional_title_1="perplexity = 20",
    additional_title_2="perplexity = 20",
)

In [67]:
n_samples = 5000
perplexity = 75
swiss_5k = SwissRoll.generate(n_samples=n_samples, noise=0.0)
swiss_5k.plot(width=600, height=600)

In [72]:
optsne_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=True,
    dof_lr=dof_lr,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)
print(optsne_optimized.im_alpha_grads[-1])

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 416.67 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=416.67 for 250 iterations...
Iteration   50, KL divergence 2.5624, 50 iterations in 8.4162 sec
Iteration  100, KL divergence 2.5522, 50 iterations in 8.2112 sec
Iteration  150, KL divergence 2.4624, 50 iterations in 8.2974 sec
Iteration  200, KL divergence 2.4318, 50 iterations in 8.2835 sec
Iteration  250, KL divergence 2.4142, 50 iterations in 8.4369 sec
   --> Time elapsed: 41.65 seconds
===> Running optimization with exaggeration=1.00, lr=5000.00 for 1000 iterations...
Iteration   50, KL divergence 0.1037, 50 iterations in 16.0253 sec
Iteration  100, KL divergence 0.0891, 50 iterations in 16.2548 sec
Iteration  150, KL divergence 0.0830, 50 iterations in 16.4930 sec
Iteration  200, KL divergence 0.0790, 50 iterations in 16.7472 sec
Iteration  250, KL divergence 0.0759, 50 iterations in 16.9840 sec
Iteration  300, KL div

In [73]:
plot_tsne_result(optsne_optimized, labels=swiss_5k.labels,black_template=True, additional_title="perplexity = 75")

In [74]:
optsne_not_optimized = run_optsne(
    data=swiss_5k.datapoints,
    perplexity=perplexity,
    random_state=random_state,
    initial_dof=initial_alpha,
    optimize_for_dof=False,
    dof_lr=dof_lr,
    n_iter=n_iter,
    negative_gradient_method="bh",
    dataset_name="Swiss Roll (5000)",
    n_jobs=n_jobs,
)
print(optsne_optimized.im_alpha_grads[-1])

Performing the early exaggeration fase with exaggeration = 12 and learning rate = 416.67 for 250 iterations...
===> Running optimization with exaggeration=12.00, lr=416.67 for 250 iterations...
Iteration   50, KL divergence 2.5624, 50 iterations in 8.6187 sec
Iteration  100, KL divergence 2.5522, 50 iterations in 8.1102 sec
Iteration  150, KL divergence 2.4624, 50 iterations in 8.3470 sec
Iteration  200, KL divergence 2.4318, 50 iterations in 8.6018 sec
Iteration  250, KL divergence 2.4142, 50 iterations in 8.8278 sec
   --> Time elapsed: 42.51 seconds
===> Running optimization with exaggeration=1.00, lr=5000.00 for 1000 iterations...
Iteration   50, KL divergence 0.7001, 50 iterations in 9.0237 sec
Iteration  100, KL divergence 0.6236, 50 iterations in 9.0267 sec
Iteration  150, KL divergence 0.6050, 50 iterations in 8.7079 sec
Iteration  200, KL divergence 0.5963, 50 iterations in 9.0871 sec
Iteration  250, KL divergence 0.5907, 50 iterations in 9.0484 sec
Iteration  300, KL divergen

In [76]:
plot_tsne_result(optsne_not_optimized, labels=swiss_5k.labels,black_template=True, additional_title="perplexity = 75")