#### Outline
- for each dataset: 
    - load dataset; 
    - for each network: 
        - load network
        - project 1000 test dataset samples
        - save to metric dataframe

In [1]:
# reload packages
%load_ext autoreload
%autoreload 2

### Choose GPU (this may not be needed on your computer)

In [2]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=''

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=''


In [3]:
import numpy as np
import pickle
import pandas as pd
import time
from umap import UMAP

In [4]:
from tfumap.umap import tfUMAP
import tensorflow as tf
from sklearn.decomposition import PCA
from openTSNE import TSNE



In [5]:
from tqdm.autonotebook import tqdm

In [6]:
from tfumap.paths import ensure_dir, MODEL_DIR, DATA_DIR

In [7]:
output_dir = MODEL_DIR/'projections' 

In [8]:
projection_speeds = pd.DataFrame(columns = ['method_', 'dimensions', 'dataset', 'speed'])

### FMNIST

In [9]:
dataset = 'fmnist'
dims = (28,28,1)

##### load dataset

In [10]:
from tensorflow.keras.datasets import fashion_mnist

# load dataset
(train_images, Y_train), (test_images, Y_test) = fashion_mnist.load_data()
X_train = (train_images/255.).astype('float32')
X_test = (test_images/255.).astype('float32')
X_train = X_train.reshape((len(X_train), np.product(np.shape(X_train)[1:])))
X_test = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

# subset a validation set
n_valid = 10000
X_valid = X_train[-n_valid:]
Y_valid = Y_train[-n_valid:]
X_train = X_train[:-n_valid]
Y_train = Y_train[:-n_valid]

# flatten X
X_train_flat = X_train.reshape((len(X_train), np.product(np.shape(X_train)[1:])))
X_test_flat = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))
X_valid_flat= X_valid.reshape((len(X_valid), np.product(np.shape(X_valid)[1:])))
print(len(X_train), len(X_valid), len(X_test))

50000 10000 10000


#### Network 

##### 2 dims

In [11]:
load_loc = output_dir / dataset / 'network' 

In [12]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    batch_size = 100,
    dims = dims
)

In [13]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [14]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    embedder.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['network', 2, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.6665196269750595
seconds:  0.570827400078997
seconds:  0.5870037979912013
seconds:  0.5799364240374416
seconds:  0.5651483149267733
seconds:  0.5651810250710696
seconds:  0.5721099157817662
seconds:  0.6015974411275238
seconds:  0.578452100045979
seconds:  0.5722668098751456



In [15]:
##### Network CPU

with tf.device('/CPU:0'):
    n_repeats = 10
    times = []
    for i in tqdm(range(n_repeats)):
        start_time = time.monotonic()
        embedder.transform(X_test_flat);
        end_time = time.monotonic()
        print('seconds: ', end_time - start_time)
        times.append(end_time - start_time)
        projection_speeds.loc[len(projection_speeds)] = ['network-cpu', 2, dataset, end_time - start_time]

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,fmnist,0.66652
1,network,2,fmnist,0.570827
2,network,2,fmnist,0.587004
3,network,2,fmnist,0.579936
4,network,2,fmnist,0.565148
5,network,2,fmnist,0.565181
6,network,2,fmnist,0.57211
7,network,2,fmnist,0.601597
8,network,2,fmnist,0.578452
9,network,2,fmnist,0.572267


##### 64 dims

In [16]:
load_loc = output_dir / dataset /"64"/ 'network' 

In [17]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    batch_size = 100,
    dims = dims
)

In [18]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [19]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    embedder.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['network', 64, dataset, end_time - start_time]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.6324575650505722
seconds:  0.5782665328588337
seconds:  0.5873863671440631
seconds:  0.5957047080155462
seconds:  0.5778048790525645
seconds:  0.619128986960277
seconds:  0.5928229740820825
seconds:  0.5797711350023746
seconds:  0.5786726931110024
seconds:  0.5829243750777096



In [20]:
##### Network CPU

with tf.device('/CPU:0'):
    n_repeats = 10
    times = []
    for i in tqdm(range(n_repeats)):
        start_time = time.monotonic()
        embedder.transform(X_test_flat);
        end_time = time.monotonic()
        print('seconds: ', end_time - start_time)
        times.append(end_time - start_time)
        projection_speeds.loc[len(projection_speeds)] = ['network-cpu', 64, dataset, end_time - start_time]

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,fmnist,0.66652
1,network,2,fmnist,0.570827
2,network,2,fmnist,0.587004
3,network,2,fmnist,0.579936
4,network,2,fmnist,0.565148
5,network,2,fmnist,0.565181
6,network,2,fmnist,0.57211
7,network,2,fmnist,0.601597
8,network,2,fmnist,0.578452
9,network,2,fmnist,0.572267


#### UMAP-learn

##### 2 dims

In [21]:
embedder = UMAP(n_components = 2, verbose=True)
z_umap = embedder.fit_transform(X_train_flat)

UMAP(dens_frac=0.0, dens_lambda=0.0, verbose=True)
Construct fuzzy simplicial set
Wed Jul 15 14:52:08 2020 Finding Nearest Neighbors
Wed Jul 15 14:52:08 2020 Building RP forest with 16 trees
Wed Jul 15 14:52:10 2020 parallel NN descent for 16 iterations
	 0  /  16
	 1  /  16
	 2  /  16
	 3  /  16
Wed Jul 15 14:52:19 2020 Finished Nearest Neighbor Search
Wed Jul 15 14:52:22 2020 Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
Wed Jul 15 14:53:09 2020 Finished embedding


In [22]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    embedder.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['umap-learn', 2, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  18.775295767001808
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  5.9559709338936955
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  6.092531468952075
	completed  0  / 

In [23]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,fmnist,0.66652
1,network,2,fmnist,0.570827
2,network,2,fmnist,0.587004
3,network,2,fmnist,0.579936
4,network,2,fmnist,0.565148
5,network,2,fmnist,0.565181
6,network,2,fmnist,0.57211
7,network,2,fmnist,0.601597
8,network,2,fmnist,0.578452
9,network,2,fmnist,0.572267


##### 64 dims

In [24]:
embedder = UMAP(n_components = 64, verbose=True)
z_umap = embedder.fit_transform(X_train_flat)

UMAP(dens_frac=0.0, dens_lambda=0.0, n_components=64, verbose=True)
Construct fuzzy simplicial set
Wed Jul 15 14:54:21 2020 Finding Nearest Neighbors
Wed Jul 15 14:54:21 2020 Building RP forest with 16 trees
Wed Jul 15 14:54:22 2020 parallel NN descent for 16 iterations
	 0  /  16
	 1  /  16
	 2  /  16
	 3  /  16
Wed Jul 15 14:54:24 2020 Finished Nearest Neighbor Search
Wed Jul 15 14:54:24 2020 Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
Wed Jul 15 14:55:22 2020 Finished embedding


In [25]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    embedder.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['umap-learn', 64, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  7.70125941792503
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  6.358414246933535
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  5.10625543887727
	completed  0  /  100

In [26]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,fmnist,0.66652
1,network,2,fmnist,0.570827
2,network,2,fmnist,0.587004
3,network,2,fmnist,0.579936
4,network,2,fmnist,0.565148
5,network,2,fmnist,0.565181
6,network,2,fmnist,0.57211
7,network,2,fmnist,0.601597
8,network,2,fmnist,0.578452
9,network,2,fmnist,0.572267


#### PCA

##### 2 dims

In [27]:
pca = PCA(n_components=2)
z = pca.fit_transform(X_train_flat)

In [28]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    pca.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['pca', 2, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.012142679886892438
seconds:  0.015337032033130527
seconds:  0.015329151879996061
seconds:  0.016142185777425766
seconds:  0.015245229937136173
seconds:  0.016163286054506898
seconds:  0.015217638807371259
seconds:  0.015228929929435253
seconds:  0.01610818412154913
seconds:  0.015298180980607867



In [29]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,fmnist,0.66652
1,network,2,fmnist,0.570827
2,network,2,fmnist,0.587004
3,network,2,fmnist,0.579936
4,network,2,fmnist,0.565148
5,network,2,fmnist,0.565181
6,network,2,fmnist,0.57211
7,network,2,fmnist,0.601597
8,network,2,fmnist,0.578452
9,network,2,fmnist,0.572267


##### 64 dims

In [30]:
pca = PCA(n_components=64)
z = pca.fit_transform(X_train_flat)

In [31]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    pca.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['pca', 64, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.01815981394611299
seconds:  0.019492382183670998
seconds:  0.01855977508239448
seconds:  0.018594756023958325
seconds:  0.018535224022343755
seconds:  0.020074347965419292
seconds:  0.01935253804549575
seconds:  0.01946695218794048
seconds:  0.01888064411468804
seconds:  0.01951951300725341



In [32]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,fmnist,0.66652
1,network,2,fmnist,0.570827
2,network,2,fmnist,0.587004
3,network,2,fmnist,0.579936
4,network,2,fmnist,0.565148
5,network,2,fmnist,0.565181
6,network,2,fmnist,0.57211
7,network,2,fmnist,0.601597
8,network,2,fmnist,0.578452
9,network,2,fmnist,0.572267


#### TSNE

##### 2 dims

In [33]:
tsne = TSNE(
    n_components = 2,
    n_jobs=32,
    verbose=True
)

In [34]:
embedding_train = tsne.fit(X_train_flat)

--------------------------------------------------------------------------------
TSNE(n_jobs=32, neighbors=None, verbose=True)
--------------------------------------------------------------------------------
===> Finding 90 nearest neighbors using Annoy approximate search using euclidean distance...




   --> Time elapsed: 36.72 seconds
===> Calculating affinity matrix...
   --> Time elapsed: 0.41 seconds
===> Calculating PCA-based initialization...
   --> Time elapsed: 0.48 seconds
===> Running optimization with exaggeration=12.00, lr=4166.67 for 250 iterations...
Iteration   50, KL divergence 5.9436, 50 iterations in 1.6407 sec
Iteration  100, KL divergence 5.4387, 50 iterations in 1.6313 sec
Iteration  150, KL divergence 5.3138, 50 iterations in 1.6602 sec
Iteration  200, KL divergence 5.2570, 50 iterations in 1.6842 sec
Iteration  250, KL divergence 5.2247, 50 iterations in 1.6110 sec
   --> Time elapsed: 8.23 seconds
===> Running optimization with exaggeration=1.00, lr=4166.67 for 500 iterations...
Iteration   50, KL divergence 3.8565, 50 iterations in 1.6199 sec
Iteration  100, KL divergence 3.3411, 50 iterations in 1.5053 sec
Iteration  150, KL divergence 3.0753, 50 iterations in 2.0471 sec
Iteration  200, KL divergence 2.9082, 50 iterations in 2.7860 sec
Iteration  250, KL di

In [35]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    embedding_train.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['TSNE', 2, dataset, end_time - start_time]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

===> Finding 15 nearest neighbors in existing embedding using Annoy approximate search...
   --> Time elapsed: 6.04 seconds
===> Calculating affinity matrix...
   --> Time elapsed: 0.04 seconds
===> Running optimization with exaggeration=4.00, lr=0.10 for 0 iterations...
   --> Time elapsed: 0.00 seconds
===> Running optimization with exaggeration=1.50, lr=0.10 for 250 iterations...
Iteration   50, KL divergence 197925.5359, 50 iterations in 0.2627 sec
Iteration  100, KL divergence 198144.9919, 50 iterations in 0.2712 sec
Iteration  150, KL divergence 198282.1556, 50 iterations in 0.2775 sec
Iteration  200, KL divergence 198392.0414, 50 iterations in 0.2752 sec
Iteration  250, KL divergence 198468.5677, 50 iterations in 0.2893 sec
   --> Time elapsed: 1.38 seconds
seconds:  7.755008135922253
===> Finding 15 nearest neighbors in existing embedding using Annoy approximate search...
   --> Time elapsed: 4.37 seconds
===> Calculating affinity matrix...
   --> Time elapsed: 0.03 seconds
===

In [36]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,fmnist,0.666520
1,network,2,fmnist,0.570827
2,network,2,fmnist,0.587004
3,network,2,fmnist,0.579936
4,network,2,fmnist,0.565148
...,...,...,...,...
65,TSNE,2,fmnist,5.697683
66,TSNE,2,fmnist,5.848243
67,TSNE,2,fmnist,5.894935
68,TSNE,2,fmnist,5.865172


### Save

In [37]:
save_loc = DATA_DIR / 'projection_speeds' / (dataset + '.pickle')
ensure_dir(save_loc)
projection_speeds.to_pickle(save_loc)