#### Outline
- for each dataset: 
    - load dataset; 
    - for each network: 
        - load network
        - project 1000 test dataset samples
        - save to metric dataframe

In [1]:
# reload packages
%load_ext autoreload
%autoreload 2

### Choose GPU (this may not be needed on your computer)

In [2]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=''

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=''


In [3]:
import numpy as np
import pickle
import pandas as pd
import time
from umap import UMAP

In [4]:
from tfumap.umap import tfUMAP
import tensorflow as tf
from sklearn.decomposition import PCA
from openTSNE import TSNE



In [5]:
from tqdm.autonotebook import tqdm

In [6]:
from tfumap.paths import ensure_dir, MODEL_DIR, DATA_DIR

In [7]:
output_dir = MODEL_DIR/'projections' 

In [8]:
projection_speeds = pd.DataFrame(columns = ['method_', 'dimensions', 'dataset', 'speed', "nex"])

### FMNIST

In [9]:
dataset = 'cifar10'
dims = (32,32,3)

##### load dataset

In [10]:
from tensorflow.keras.datasets import cifar10

# load dataset
(train_images, Y_train), (test_images, Y_test) = cifar10.load_data()
X_train = (train_images/255.).astype('float32')
X_test = (test_images/255.).astype('float32')
X_train = X_train.reshape((len(X_train), np.product(np.shape(X_train)[1:])))
X_test = X_test.reshape((len(X_test), np.product(np.shape(X_test)[1:])))

# subset a validation set
n_valid = 10000
X_valid = X_train[-n_valid:]
Y_valid = Y_train[-n_valid:].flatten()
X_train = X_train[:-n_valid]
Y_train = Y_train[:-n_valid].flatten()
Y_test = Y_test.flatten()

print(len(X_train), len(X_valid), len(X_test))

40000 10000 10000


In [11]:
X_test.shape

(10000, 3072)

In [12]:
X_test_flat = X_test

In [13]:
X_train_flat = X_train

In [None]:
X_test = X_test.reshape((10000, 32,32,3))

#### Network 

##### 2 dims

In [14]:
load_loc = output_dir / dataset / 'network' 

In [15]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    batch_size = 100,
    dims = dims
)

In [16]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [17]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    encoder(X_test);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['network', 2, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.6012542841490358
seconds:  0.5013506698887795
seconds:  0.5005080159753561
seconds:  0.48961735400371253
seconds:  0.5211484869942069
seconds:  0.48291740217246115
seconds:  0.4844005540944636
seconds:  0.49247688497416675
seconds:  0.497127708978951
seconds:  0.4983965049032122



In [18]:
##### Network CPU

with tf.device('/CPU:0'):
    n_repeats = 10
    times = []
    for i in tqdm(range(n_repeats)):
        start_time = time.monotonic()
        encoder(X_test);
        end_time = time.monotonic()
        print('seconds: ', end_time - start_time)
        times.append(end_time - start_time)
        projection_speeds.loc[len(projection_speeds)] = ['network-cpu', 2, dataset, end_time - start_time]

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,cifar10,0.601254
1,network,2,cifar10,0.501351
2,network,2,cifar10,0.500508
3,network,2,cifar10,0.489617
4,network,2,cifar10,0.521148
5,network,2,cifar10,0.482917
6,network,2,cifar10,0.484401
7,network,2,cifar10,0.492477
8,network,2,cifar10,0.497128
9,network,2,cifar10,0.498397


##### 64 dims

In [19]:
load_loc = output_dir / dataset /"64"/ 'network' 

In [20]:
embedder = tfUMAP(
    direct_embedding=False,
    verbose=True,
    negative_sample_rate=5,
    training_epochs=5,
    batch_size = 100,
    dims = dims
)

In [21]:
encoder = tf.keras.models.load_model((load_loc / 'encoder').as_posix())
embedder.encoder = encoder

In [22]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    encoder(X_test);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['network', 64, dataset, end_time - start_time]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.5710086468607187
seconds:  0.5088594648987055
seconds:  0.5286082709208131
seconds:  0.5182931739836931
seconds:  0.5071479850448668
seconds:  0.5131123859900981
seconds:  0.5106427951250225
seconds:  0.5068614569026977
seconds:  0.518206843174994
seconds:  0.5020528400782496



In [23]:
##### Network CPU

with tf.device('/CPU:0'):
    n_repeats = 10
    times = []
    for i in tqdm(range(n_repeats)):
        start_time = time.monotonic()
        encoder(X_test);
        end_time = time.monotonic()
        print('seconds: ', end_time - start_time)
        times.append(end_time - start_time)
        projection_speeds.loc[len(projection_speeds)] = ['network-cpu', 64, dataset, end_time - start_time]

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,cifar10,0.601254
1,network,2,cifar10,0.501351
2,network,2,cifar10,0.500508
3,network,2,cifar10,0.489617
4,network,2,cifar10,0.521148
5,network,2,cifar10,0.482917
6,network,2,cifar10,0.484401
7,network,2,cifar10,0.492477
8,network,2,cifar10,0.497128
9,network,2,cifar10,0.498397


#### UMAP-learn

##### 2 dims

In [24]:
embedder = UMAP(n_components = 2, verbose=True)
z_umap = embedder.fit_transform(X_train_flat)

UMAP(dens_frac=0.0, dens_lambda=0.0, verbose=True)
Construct fuzzy simplicial set
Wed Jul 15 15:05:02 2020 Finding Nearest Neighbors
Wed Jul 15 15:05:03 2020 Building RP forest with 15 trees
Wed Jul 15 15:05:05 2020 parallel NN descent for 15 iterations
	 0  /  15
	 1  /  15
	 2  /  15
	 3  /  15
	 4  /  15
	 5  /  15
Wed Jul 15 15:05:17 2020 Finished Nearest Neighbor Search
Wed Jul 15 15:05:19 2020 Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
Wed Jul 15 15:05:46 2020 Finished embedding


In [25]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    embedder.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['umap-learn', 2, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  24.000269999960437
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  10.875150758074597
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  12.785103488946334
	completed  0  /

In [26]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,cifar10,0.601254
1,network,2,cifar10,0.501351
2,network,2,cifar10,0.500508
3,network,2,cifar10,0.489617
4,network,2,cifar10,0.521148
5,network,2,cifar10,0.482917
6,network,2,cifar10,0.484401
7,network,2,cifar10,0.492477
8,network,2,cifar10,0.497128
9,network,2,cifar10,0.498397


##### 64 dims

In [27]:
embedder = UMAP(n_components = 64, verbose=True)
z_umap = embedder.fit_transform(X_train_flat)

UMAP(dens_frac=0.0, dens_lambda=0.0, n_components=64, verbose=True)
Construct fuzzy simplicial set
Wed Jul 15 15:07:56 2020 Finding Nearest Neighbors
Wed Jul 15 15:07:56 2020 Building RP forest with 15 trees
Wed Jul 15 15:07:58 2020 parallel NN descent for 15 iterations
	 0  /  15
	 1  /  15
	 2  /  15
	 3  /  15
	 4  /  15
	 5  /  15
Wed Jul 15 15:08:02 2020 Finished Nearest Neighbor Search
Wed Jul 15 15:08:02 2020 Construct embedding
	completed  0  /  200 epochs
	completed  20  /  200 epochs
	completed  40  /  200 epochs
	completed  60  /  200 epochs
	completed  80  /  200 epochs
	completed  100  /  200 epochs
	completed  120  /  200 epochs
	completed  140  /  200 epochs
	completed  160  /  200 epochs
	completed  180  /  200 epochs
Wed Jul 15 15:08:37 2020 Finished embedding


In [28]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    embedder.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['umap-learn', 64, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  12.429585800971836
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  12.76642105402425
	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
seconds:  12.842278184834868
	completed  0  / 

In [29]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,cifar10,0.601254
1,network,2,cifar10,0.501351
2,network,2,cifar10,0.500508
3,network,2,cifar10,0.489617
4,network,2,cifar10,0.521148
5,network,2,cifar10,0.482917
6,network,2,cifar10,0.484401
7,network,2,cifar10,0.492477
8,network,2,cifar10,0.497128
9,network,2,cifar10,0.498397


#### PCA

##### 2 dims

In [30]:
pca = PCA(n_components=2)
z = pca.fit_transform(X_train_flat)

In [31]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    pca.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['pca', 2, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.06865169410593808
seconds:  0.06343075609765947
seconds:  0.06430793018080294
seconds:  0.06356498994864523
seconds:  0.06579376198351383
seconds:  0.06408629403449595
seconds:  0.06308056600391865
seconds:  0.06559553695842624
seconds:  0.06296488200314343
seconds:  0.06479169405065477



In [32]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,cifar10,0.601254
1,network,2,cifar10,0.501351
2,network,2,cifar10,0.500508
3,network,2,cifar10,0.489617
4,network,2,cifar10,0.521148
5,network,2,cifar10,0.482917
6,network,2,cifar10,0.484401
7,network,2,cifar10,0.492477
8,network,2,cifar10,0.497128
9,network,2,cifar10,0.498397


##### 64 dims

In [33]:
pca = PCA(n_components=64)
z = pca.fit_transform(X_train_flat)

In [34]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    pca.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['pca', 64, dataset, end_time - start_time]


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

seconds:  0.07228238810785115
seconds:  0.07668709405697882
seconds:  0.07485091197304428
seconds:  0.07794421003200114
seconds:  0.07974905311129987
seconds:  0.07419163291342556
seconds:  0.0791001240722835
seconds:  0.07585171004757285
seconds:  0.07538590696640313
seconds:  0.07671611593104899



In [35]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,cifar10,0.601254
1,network,2,cifar10,0.501351
2,network,2,cifar10,0.500508
3,network,2,cifar10,0.489617
4,network,2,cifar10,0.521148
5,network,2,cifar10,0.482917
6,network,2,cifar10,0.484401
7,network,2,cifar10,0.492477
8,network,2,cifar10,0.497128
9,network,2,cifar10,0.498397


#### TSNE

##### 2 dims

In [36]:
tsne = TSNE(
    n_components = 2,
    n_jobs=32,
    verbose=True
)

In [37]:
embedding_train = tsne.fit(X_train_flat)

--------------------------------------------------------------------------------
TSNE(n_jobs=32, neighbors=None, verbose=True)
--------------------------------------------------------------------------------
===> Finding 90 nearest neighbors using Annoy approximate search using euclidean distance...




   --> Time elapsed: 83.38 seconds
===> Calculating affinity matrix...
   --> Time elapsed: 0.36 seconds
===> Calculating PCA-based initialization...
   --> Time elapsed: 1.35 seconds
===> Running optimization with exaggeration=12.00, lr=3333.33 for 250 iterations...
Iteration   50, KL divergence 6.8012, 50 iterations in 1.8006 sec
Iteration  100, KL divergence 6.3838, 50 iterations in 3.5250 sec
Iteration  150, KL divergence 6.5139, 50 iterations in 3.8239 sec
Iteration  200, KL divergence 6.3266, 50 iterations in 5.8870 sec
Iteration  250, KL divergence 6.3516, 50 iterations in 11.6224 sec
   --> Time elapsed: 26.66 seconds
===> Running optimization with exaggeration=1.00, lr=3333.33 for 500 iterations...
Iteration   50, KL divergence 4.5630, 50 iterations in 7.6965 sec
Iteration  100, KL divergence 4.2771, 50 iterations in 1.8515 sec
Iteration  150, KL divergence 4.1549, 50 iterations in 1.4570 sec
Iteration  200, KL divergence 4.0800, 50 iterations in 1.6480 sec
Iteration  250, KL 

In [38]:
n_repeats = 10
times = []
for i in tqdm(range(n_repeats)):
    start_time = time.monotonic()
    embedding_train.transform(X_test_flat);
    end_time = time.monotonic()
    print('seconds: ', end_time - start_time)
    times.append(end_time - start_time)
    projection_speeds.loc[len(projection_speeds)] = ['TSNE', 2, dataset, end_time - start_time]

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

===> Finding 15 nearest neighbors in existing embedding using Annoy approximate search...
   --> Time elapsed: 10.57 seconds
===> Calculating affinity matrix...
   --> Time elapsed: 0.03 seconds
===> Running optimization with exaggeration=4.00, lr=0.10 for 0 iterations...
   --> Time elapsed: 0.00 seconds
===> Running optimization with exaggeration=1.50, lr=0.10 for 250 iterations...
Iteration   50, KL divergence 193452.3546, 50 iterations in 0.2563 sec
Iteration  100, KL divergence 193419.9573, 50 iterations in 0.2866 sec
Iteration  150, KL divergence 193431.6214, 50 iterations in 0.2897 sec
Iteration  200, KL divergence 193445.2527, 50 iterations in 0.2593 sec
Iteration  250, KL divergence 193434.2710, 50 iterations in 0.2650 sec
   --> Time elapsed: 1.36 seconds
seconds:  12.148497946094722
===> Finding 15 nearest neighbors in existing embedding using Annoy approximate search...
   --> Time elapsed: 10.70 seconds
===> Calculating affinity matrix...
   --> Time elapsed: 0.03 seconds


In [39]:
projection_speeds

Unnamed: 0,method_,dimensions,dataset,speed
0,network,2,cifar10,0.601254
1,network,2,cifar10,0.501351
2,network,2,cifar10,0.500508
3,network,2,cifar10,0.489617
4,network,2,cifar10,0.521148
...,...,...,...,...
65,TSNE,2,cifar10,11.991418
66,TSNE,2,cifar10,12.699352
67,TSNE,2,cifar10,11.923089
68,TSNE,2,cifar10,12.163713


### Save

In [40]:
save_loc = DATA_DIR / 'projection_speeds' / (dataset + '.pickle')
ensure_dir(save_loc)
projection_speeds.to_pickle(save_loc)