# ImageNet-10 preparation

In [None]:
import data_split
import data_converter
import vgg19bn

In [None]:
ds = data_split.DataSplit()
ds.save_split('data')

In [None]:
%%time
dc = data_converter.DataConverter(vgg19bn.VGG19bn())
dc.convert_split()

# Mapper

In [3]:
import numpy as np
import mapper
import my_umap
import my_pca
import pickle
from sklearn.cluster import DBSCAN
from gtda.mapper import FirstSimpleGap

In [4]:
def pipeline(n_components, epsilon):
    experiment_name = 'comp{}_pca_dbscan{}'.format(n_components, epsilon)
    clusterer = DBSCAN(eps=epsilon, min_samples=1)
    projector = my_pca.MyPCA(n_components=n_components)
    
    loaded = np.load('data/in10_split_converted.npz', allow_pickle=True)
    x_train = loaded['x_train']
    x_test_none = loaded['x_test_none']
    x_test_gaussian = loaded['x_test_gaussian']
    y_train = loaded['y_train']
    y_test = loaded['y_test']
    
    print(experiment_name)
    print(x_train.shape, x_test_none.shape, x_test_gaussian.shape, y_train.shape, y_test.shape)

    m = mapper.Mapper()
    m.fit(x_train, projector=projector, clusterer=clusterer, n_components=n_components, 
          n_intervals=10, experiment_name=experiment_name, kind='uniform')
    
    mapper_data = pickle.load(open('experiments/{}'.format(experiment_name), 'rb'))
    latent_space, graphs, covers = mapper_data[0], mapper_data[1], mapper_data[2]
    k = 5
    
    m.get_representations(x_train, x_test_none, x_test_gaussian, y_train, y_test,
                          k, latent_space, graphs, covers, experiment_name)
    
    loaded = np.load('experiments/{}.npz'.format(experiment_name), allow_pickle=True)
    x_train = loaded['x_train']
    x_test_none = loaded['x_test_none']
    x_test_gaussian = loaded['x_test_gaussian']
    y_train = loaded['y_train']
    y_test = loaded['y_test']
    print(x_train.shape, x_test_none.shape, x_test_gaussian.shape, y_train.shape, y_test.shape)

In [None]:
for eps in [60, 55, 50, 45, 40, 35, 30]:
    pipeline(120, eps)

comp120_pca_dbscan60
(10000, 5096) (3000, 5096) (3000, 5096) (10000,) (3000,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s

[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.2s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.2s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.4s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.0s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.5s
[Pipel

[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  4.2min


[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=  22.6s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.3s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.3s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.5s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=  21.5s
[Pipel

[Parallel(n_jobs=4)]: Done 120 out of 120 | elapsed: 11.4min finished
[binarization]: 100%|██████████| 10000/10000 [01:23<00:00, 119.16it/s]
[wknn]: 100%|██████████| 3000/3000 [03:47<00:00, 13.22it/s]


(10000, 1214) (3000, 1214) (0,) (10000,) (3000,)
comp120_pca_dbscan55
(10000, 5096) (3000, 5096) (3000, 5096) (10000,) (3000,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s

[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.2s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.2s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.5s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.5s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.6s[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.2s

[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.7s
[Pipel

[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  4.5min


[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.3s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.3s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.5s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.5s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.7s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=  24.3s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipel

[Parallel(n_jobs=4)]: Done 120 out of 120 | elapsed: 11.9min finished
[binarization]: 100%|██████████| 10000/10000 [01:19<00:00, 125.97it/s]
[wknn]: 100%|██████████| 3000/3000 [03:44<00:00, 13.34it/s]


(10000, 1236) (3000, 1236) (0,) (10000,) (3000,)
comp120_pca_dbscan50
(10000, 5096) (3000, 5096) (3000, 5096) (10000,) (3000,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.2s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.2s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.4s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipel

[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  4.2min


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.3s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.3s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=  22.2s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.0s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipel

[Parallel(n_jobs=4)]: Done 120 out of 120 | elapsed: 11.7min finished
[binarization]: 100%|██████████| 10000/10000 [01:25<00:00, 117.05it/s]
[wknn]: 100%|██████████| 3000/3000 [04:15<00:00, 11.72it/s]


(10000, 1337) (3000, 1337) (0,) (10000,) (3000,)
comp120_pca_dbscan45
(10000, 5096) (3000, 5096) (3000, 5096) (10000,) (3000,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.3s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.3s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.4s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.5s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.5s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.7s
[Pipel

[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  4.8min


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=  25.3s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   0.1s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipel

[Parallel(n_jobs=4)]: Done 120 out of 120 | elapsed: 12.4min finished
[binarization]: 100%|██████████| 10000/10000 [02:06<00:00, 79.16it/s]
[wknn]: 100%|██████████| 3000/3000 [03:43<00:00, 13.44it/s]


(10000, 2216) (3000, 2216) (0,) (10000,) (3000,)
comp120_pca_dbscan40
(10000, 5096) (3000, 5096) (3000, 5096) (10000,) (3000,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.2s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.2s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.0s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.4s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.2s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.7s
[Pipel

[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  4.4min


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.6s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.6s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.0s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.9s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=  22.2s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   0.2s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipel

[Parallel(n_jobs=4)]: Done 120 out of 120 | elapsed: 11.6min finished
[binarization]: 100%|██████████| 10000/10000 [06:19<00:00, 26.34it/s]
[wknn]: 100%|██████████| 3000/3000 [03:57<00:00, 12.66it/s]


(10000, 7465) (3000, 7465) (0,) (10000,) (3000,)
comp120_pca_dbscan35
(10000, 5096) (3000, 5096) (3000, 5096) (10000,) (3000,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s

[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s

[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.2s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.2s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.4s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipel

[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:  5.0min


[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.3s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.3s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.5s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   4.5s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.3s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.7s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   4.9s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipel

[Parallel(n_jobs=4)]: Done 120 out of 120 | elapsed: 12.9min finished
[binarization]: 100%|██████████| 10000/10000 [22:51<00:00,  7.29it/s]
[wknn]: 100%|██████████| 3000/3000 [03:34<00:00, 13.96it/s]


(10000, 29636) (3000, 29636) (0,) (10000,) (3000,)
comp120_pca_dbscan30
(10000, 5096) (3000, 5096) (3000, 5096) (10000,) (3000,)


[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.2s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.3s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.4s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.4s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.6s
[Pipel

[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed: 111.9min


[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.3s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.3s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.5s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=  18.0s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=10.9min
[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ......... (step 1 of 2) Processing projector, total=   0.2s
[Pipeline] .............. (step 2 of 2) Processing proj, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.2s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.1s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   0.4s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=  16.5s
[Pipel

[Parallel(n_jobs=4)]: Done 120 out of 120 | elapsed: 323.7min finished
[binarization]:  85%|████████▍ | 8498/10000 [2:09:01<24:00,  1.04it/s]  